From f55982be2317505b0d9bcc1bdb6ba350cb810e55 Mon Sep 17 00:00:00 2001
From: "Raziel K. Crowe" <84860158+CWDSYSTEMS@users.noreply.github.com>
Date: Sat, 2 Apr 2022 18:06:01 +0500
Subject: [PATCH] cleaning up the git

---
 arch/.gitignore                               |    3 +
 arch/Kconfig                                  | 1329 +++++++
 arch/alpha/Kbuild                             |    6 +
 arch/alpha/Kconfig                            |  678 ++++
 arch/alpha/Kconfig.debug                      |   38 +
 arch/alpha/Makefile                           |   66 +
 arch/alpha/boot/Makefile                      |  120 +
 arch/alpha/boot/bootloader.lds                |   25 +
 arch/alpha/boot/bootp.c                       |  214 ++
 arch/alpha/boot/bootpz.c                      |  475 +++
 arch/alpha/boot/head.S                        |  124 +
 arch/alpha/boot/main.c                        |  190 +
 arch/alpha/boot/misc.c                        |  174 +
 arch/alpha/boot/stdio.c                       |  302 ++
 arch/alpha/boot/tools/mkbb.c                  |  153 +
 arch/alpha/boot/tools/objstrip.c              |  284 ++
 arch/alpha/configs/defconfig                  |   73 +
 arch/alpha/include/asm/Kbuild                 |    6 +
 arch/alpha/include/asm/a.out.h                |   16 +
 arch/alpha/include/asm/agp.h                  |   19 +
 arch/alpha/include/asm/agp_backend.h          |   43 +
 arch/alpha/include/asm/asm-offsets.h          |    1 +
 arch/alpha/include/asm/asm-prototypes.h       |   19 +
 arch/alpha/include/asm/atomic.h               |  306 ++
 arch/alpha/include/asm/barrier.h              |   23 +
 arch/alpha/include/asm/bitops.h               |  459 +++
 arch/alpha/include/asm/bug.h                  |   25 +
 arch/alpha/include/asm/bugs.h                 |   20 +
 arch/alpha/include/asm/cache.h                |   23 +
 arch/alpha/include/asm/cacheflush.h           |   62 +
 arch/alpha/include/asm/checksum.h             |   74 +
 arch/alpha/include/asm/cmpxchg.h              |   76 +
 arch/alpha/include/asm/compiler.h             |    7 +
 arch/alpha/include/asm/console.h              |   30 +
 arch/alpha/include/asm/core_apecs.h           |  518 +++
 arch/alpha/include/asm/core_cia.h             |  501 +++
 arch/alpha/include/asm/core_irongate.h        |  233 ++
 arch/alpha/include/asm/core_lca.h             |  362 ++
 arch/alpha/include/asm/core_marvel.h          |  378 ++
 arch/alpha/include/asm/core_mcpcia.h          |  382 ++
 arch/alpha/include/asm/core_polaris.h         |  111 +
 arch/alpha/include/asm/core_t2.h              |  615 +++
 arch/alpha/include/asm/core_titan.h           |  410 ++
 arch/alpha/include/asm/core_tsunami.h         |  335 ++
 arch/alpha/include/asm/core_wildfire.h        |  319 ++
 arch/alpha/include/asm/delay.h                |   11 +
 arch/alpha/include/asm/device.h               |    6 +
 arch/alpha/include/asm/div64.h                |    1 +
 arch/alpha/include/asm/dma-mapping.h          |   16 +
 arch/alpha/include/asm/dma.h                  |  377 ++
 arch/alpha/include/asm/elf.h                  |  166 +
 arch/alpha/include/asm/emergency-restart.h    |    6 +
 arch/alpha/include/asm/err_common.h           |  119 +
 arch/alpha/include/asm/err_ev6.h              |    6 +
 arch/alpha/include/asm/err_ev7.h              |  203 +
 arch/alpha/include/asm/extable.h              |   56 +
 arch/alpha/include/asm/floppy.h               |  113 +
 arch/alpha/include/asm/fpu.h                  |   76 +
 arch/alpha/include/asm/ftrace.h               |    1 +
 arch/alpha/include/asm/futex.h                |   95 +
 arch/alpha/include/asm/gct.h                  |   59 +
 arch/alpha/include/asm/hardirq.h              |   10 +
 arch/alpha/include/asm/hw_irq.h               |   15 +
 arch/alpha/include/asm/hwrpb.h                |  221 ++
 arch/alpha/include/asm/io.h                   |  607 +++
 arch/alpha/include/asm/io_trivial.h           |  132 +
 arch/alpha/include/asm/irq.h                  |   92 +
 arch/alpha/include/asm/irq_regs.h             |    1 +
 arch/alpha/include/asm/irqflags.h             |   68 +
 arch/alpha/include/asm/jensen.h               |  347 ++
 arch/alpha/include/asm/kdebug.h               |    1 +
 arch/alpha/include/asm/kmap_types.h           |   15 +
 arch/alpha/include/asm/linkage.h              |    9 +
 arch/alpha/include/asm/local.h                |  102 +
 arch/alpha/include/asm/machvec.h              |  137 +
 arch/alpha/include/asm/mc146818rtc.h          |   28 +
 arch/alpha/include/asm/mce.h                  |   84 +
 arch/alpha/include/asm/mmu.h                  |    8 +
 arch/alpha/include/asm/mmu_context.h          |  259 ++
 arch/alpha/include/asm/mmzone.h               |  110 +
 arch/alpha/include/asm/module.h               |   18 +
 arch/alpha/include/asm/page.h                 |   97 +
 arch/alpha/include/asm/pal.h                  |  187 +
 arch/alpha/include/asm/param.h                |   12 +
 arch/alpha/include/asm/parport.h              |   19 +
 arch/alpha/include/asm/pci.h                  |  100 +
 arch/alpha/include/asm/percpu.h               |   19 +
 arch/alpha/include/asm/perf_event.h           |    4 +
 arch/alpha/include/asm/pgalloc.h              |   36 +
 arch/alpha/include/asm/pgtable.h              |  346 ++
 arch/alpha/include/asm/processor.h            |   80 +
 arch/alpha/include/asm/ptrace.h               |   28 +
 arch/alpha/include/asm/rwonce.h               |   35 +
 arch/alpha/include/asm/serial.h               |   30 +
 arch/alpha/include/asm/setup.h                |   43 +
 arch/alpha/include/asm/sfp-machine.h          |   82 +
 arch/alpha/include/asm/shmparam.h             |    7 +
 arch/alpha/include/asm/signal.h               |   28 +
 arch/alpha/include/asm/smp.h                  |   60 +
 arch/alpha/include/asm/socket.h               |   11 +
 arch/alpha/include/asm/sparsemem.h            |   18 +
 arch/alpha/include/asm/special_insns.h        |   42 +
 arch/alpha/include/asm/spinlock.h             |  163 +
 arch/alpha/include/asm/spinlock_types.h       |   21 +
 arch/alpha/include/asm/string.h               |   80 +
 arch/alpha/include/asm/switch_to.h            |   15 +
 arch/alpha/include/asm/syscall.h              |   18 +
 arch/alpha/include/asm/termios.h              |   87 +
 arch/alpha/include/asm/thread_info.h          |  114 +
 arch/alpha/include/asm/timex.h                |   32 +
 arch/alpha/include/asm/tlb.h                  |   10 +
 arch/alpha/include/asm/tlbflush.h             |  152 +
 arch/alpha/include/asm/topology.h             |   12 +
 arch/alpha/include/asm/types.h                |    7 +
 arch/alpha/include/asm/uaccess.h              |  335 ++
 arch/alpha/include/asm/ucontext.h             |   14 +
 arch/alpha/include/asm/unaligned.h            |   12 +
 arch/alpha/include/asm/unistd.h               |   22 +
 arch/alpha/include/asm/user.h                 |   54 +
 arch/alpha/include/asm/vga.h                  |   83 +
 arch/alpha/include/asm/vmalloc.h              |    4 +
 arch/alpha/include/asm/word-at-a-time.h       |   58 +
 arch/alpha/include/asm/wrperfmon.h            |   94 +
 arch/alpha/include/asm/xchg.h                 |  246 ++
 arch/alpha/include/asm/xor.h                  |  847 +++++
 arch/alpha/include/uapi/asm/Kbuild            |    2 +
 arch/alpha/include/uapi/asm/a.out.h           |   92 +
 arch/alpha/include/uapi/asm/auxvec.h          |   27 +
 arch/alpha/include/uapi/asm/bitsperlong.h     |    9 +
 arch/alpha/include/uapi/asm/byteorder.h       |    7 +
 arch/alpha/include/uapi/asm/compiler.h        |  118 +
 arch/alpha/include/uapi/asm/console.h         |   51 +
 arch/alpha/include/uapi/asm/errno.h           |  128 +
 arch/alpha/include/uapi/asm/fcntl.h           |   58 +
 arch/alpha/include/uapi/asm/fpu.h             |  124 +
 arch/alpha/include/uapi/asm/gentrap.h         |   38 +
 arch/alpha/include/uapi/asm/ioctl.h           |   67 +
 arch/alpha/include/uapi/asm/ioctls.h          |  128 +
 arch/alpha/include/uapi/asm/mman.h            |   85 +
 arch/alpha/include/uapi/asm/pal.h             |   54 +
 arch/alpha/include/uapi/asm/param.h           |   15 +
 arch/alpha/include/uapi/asm/posix_types.h     |   18 +
 arch/alpha/include/uapi/asm/ptrace.h          |   71 +
 arch/alpha/include/uapi/asm/reg.h             |   53 +
 arch/alpha/include/uapi/asm/regdef.h          |   45 +
 arch/alpha/include/uapi/asm/resource.h        |   23 +
 arch/alpha/include/uapi/asm/setup.h           |    7 +
 arch/alpha/include/uapi/asm/sigcontext.h      |   35 +
 arch/alpha/include/uapi/asm/siginfo.h         |    7 +
 arch/alpha/include/uapi/asm/signal.h          |  116 +
 arch/alpha/include/uapi/asm/socket.h          |  160 +
 arch/alpha/include/uapi/asm/sockios.h         |   17 +
 arch/alpha/include/uapi/asm/stat.h            |   49 +
 arch/alpha/include/uapi/asm/statfs.h          |   13 +
 arch/alpha/include/uapi/asm/swab.h            |   43 +
 arch/alpha/include/uapi/asm/sysinfo.h         |   32 +
 arch/alpha/include/uapi/asm/termbits.h        |  223 ++
 arch/alpha/include/uapi/asm/termios.h         |   71 +
 arch/alpha/include/uapi/asm/types.h           |   27 +
 arch/alpha/include/uapi/asm/unistd.h          |   17 +
 arch/alpha/kernel/.gitignore                  |    2 +
 arch/alpha/kernel/Makefile                    |  112 +
 arch/alpha/kernel/asm-offsets.c               |   44 +
 arch/alpha/kernel/audit.c                     |   62 +
 arch/alpha/kernel/binfmt_loader.c             |   46 +
 arch/alpha/kernel/bugs.c                      |   45 +
 arch/alpha/kernel/console.c                   |   95 +
 arch/alpha/kernel/core_apecs.c                |  420 ++
 arch/alpha/kernel/core_cia.c                  | 1216 ++++++
 arch/alpha/kernel/core_irongate.c             |  417 ++
 arch/alpha/kernel/core_lca.c                  |  517 +++
 arch/alpha/kernel/core_marvel.c               | 1099 ++++++
 arch/alpha/kernel/core_mcpcia.c               |  616 +++
 arch/alpha/kernel/core_polaris.c              |  203 +
 arch/alpha/kernel/core_t2.c                   |  624 +++
 arch/alpha/kernel/core_titan.c                |  802 ++++
 arch/alpha/kernel/core_tsunami.c              |  484 +++
 arch/alpha/kernel/core_wildfire.c             |  633 ++++
 arch/alpha/kernel/entry.S                     |  852 +++++
 arch/alpha/kernel/err_common.c                |  321 ++
 arch/alpha/kernel/err_ev6.c                   |  276 ++
 arch/alpha/kernel/err_ev7.c                   |  287 ++
 arch/alpha/kernel/err_impl.h                  |   88 +
 arch/alpha/kernel/err_marvel.c                | 1161 ++++++
 arch/alpha/kernel/err_titan.c                 |  761 ++++
 arch/alpha/kernel/es1888.c                    |   50 +
 arch/alpha/kernel/gct.c                       |   48 +
 arch/alpha/kernel/head.S                      |   99 +
 arch/alpha/kernel/io.c                        |  677 ++++
 arch/alpha/kernel/irq.c                       |  123 +
 arch/alpha/kernel/irq_alpha.c                 |  225 ++
 arch/alpha/kernel/irq_i8259.c                 |  163 +
 arch/alpha/kernel/irq_impl.h                  |   36 +
 arch/alpha/kernel/irq_pyxis.c                 |  112 +
 arch/alpha/kernel/irq_srm.c                   |   65 +
 arch/alpha/kernel/machvec_impl.h              |  153 +
 arch/alpha/kernel/module.c                    |  273 ++
 arch/alpha/kernel/osf_sys.c                   | 1369 +++++++
 arch/alpha/kernel/pc873xx.c                   |   89 +
 arch/alpha/kernel/pc873xx.h                   |   36 +
 arch/alpha/kernel/pci-noop.c                  |  113 +
 arch/alpha/kernel/pci-sysfs.c                 |  372 ++
 arch/alpha/kernel/pci.c                       |  476 +++
 arch/alpha/kernel/pci_impl.h                  |  195 +
 arch/alpha/kernel/pci_iommu.c                 |  933 +++++
 arch/alpha/kernel/perf_event.c                |  899 +++++
 arch/alpha/kernel/process.c                   |  400 ++
 arch/alpha/kernel/proto.h                     |  213 ++
 arch/alpha/kernel/ptrace.c                    |  338 ++
 arch/alpha/kernel/rtc.c                       |  226 ++
 arch/alpha/kernel/setup.c                     | 1458 +++++++
 arch/alpha/kernel/signal.c                    |  540 +++
 arch/alpha/kernel/smc37c669.c                 | 2537 +++++++++++++
 arch/alpha/kernel/smc37c93x.c                 |  274 ++
 arch/alpha/kernel/smp.c                       |  768 ++++
 arch/alpha/kernel/srm_env.c                   |  218 ++
 arch/alpha/kernel/srmcons.c                   |  289 ++
 arch/alpha/kernel/sys_alcor.c                 |  309 ++
 arch/alpha/kernel/sys_cabriolet.c             |  444 +++
 arch/alpha/kernel/sys_dp264.c                 |  665 ++++
 arch/alpha/kernel/sys_eb64p.c                 |  238 ++
 arch/alpha/kernel/sys_eiger.c                 |  225 ++
 arch/alpha/kernel/sys_jensen.c                |  237 ++
 arch/alpha/kernel/sys_marvel.c                |  465 +++
 arch/alpha/kernel/sys_miata.c                 |  294 ++
 arch/alpha/kernel/sys_mikasa.c                |  247 ++
 arch/alpha/kernel/sys_nautilus.c              |  297 ++
 arch/alpha/kernel/sys_noritake.c              |  336 ++
 arch/alpha/kernel/sys_rawhide.c               |  271 ++
 arch/alpha/kernel/sys_ruffian.c               |  240 ++
 arch/alpha/kernel/sys_rx164.c                 |  203 +
 arch/alpha/kernel/sys_sable.c                 |  635 ++++
 arch/alpha/kernel/sys_sio.c                   |  484 +++
 arch/alpha/kernel/sys_sx164.c                 |  179 +
 arch/alpha/kernel/sys_takara.c                |  288 ++
 arch/alpha/kernel/sys_titan.c                 |  419 ++
 arch/alpha/kernel/sys_wildfire.c              |  341 ++
 arch/alpha/kernel/syscalls/Makefile           |   33 +
 arch/alpha/kernel/syscalls/syscall.tbl        |  492 +++
 arch/alpha/kernel/syscalls/syscallhdr.sh      |   36 +
 arch/alpha/kernel/syscalls/syscalltbl.sh      |   32 +
 arch/alpha/kernel/systbls.S                   |   15 +
 arch/alpha/kernel/time.c                      |  458 +++
 arch/alpha/kernel/traps.c                     |  992 +++++
 arch/alpha/kernel/vmlinux.lds.S               |   78 +
 arch/alpha/lib/Makefile                       |   61 +
 arch/alpha/lib/callback_srm.S                 |  109 +
 arch/alpha/lib/checksum.c                     |  184 +
 arch/alpha/lib/clear_page.S                   |   41 +
 arch/alpha/lib/clear_user.S                   |  102 +
 arch/alpha/lib/copy_page.S                    |   51 +
 arch/alpha/lib/copy_user.S                    |  121 +
 arch/alpha/lib/csum_ipv6_magic.S              |  118 +
 arch/alpha/lib/csum_partial_copy.c            |  364 ++
 arch/alpha/lib/dbg_current.S                  |   30 +
 arch/alpha/lib/dbg_stackcheck.S               |   28 +
 arch/alpha/lib/dbg_stackkill.S                |   36 +
 arch/alpha/lib/divide.S                       |  199 +
 arch/alpha/lib/ev6-clear_page.S               |   56 +
 arch/alpha/lib/ev6-clear_user.S               |  213 ++
 arch/alpha/lib/ev6-copy_page.S                |  205 +
 arch/alpha/lib/ev6-copy_user.S                |  227 ++
 arch/alpha/lib/ev6-csum_ipv6_magic.S          |  153 +
 arch/alpha/lib/ev6-divide.S                   |  263 ++
 arch/alpha/lib/ev6-memchr.S                   |  193 +
 arch/alpha/lib/ev6-memcpy.S                   |  250 ++
 arch/alpha/lib/ev6-memset.S                   |  605 +++
 arch/alpha/lib/ev6-stxcpy.S                   |  322 ++
 arch/alpha/lib/ev6-stxncpy.S                  |  398 ++
 arch/alpha/lib/ev67-strcat.S                  |   56 +
 arch/alpha/lib/ev67-strchr.S                  |   90 +
 arch/alpha/lib/ev67-strlen.S                  |   51 +
 arch/alpha/lib/ev67-strncat.S                 |   96 +
 arch/alpha/lib/ev67-strrchr.S                 |  111 +
 arch/alpha/lib/fls.c                          |   39 +
 arch/alpha/lib/fpreg.c                        |  201 +
 arch/alpha/lib/memchr.S                       |  165 +
 arch/alpha/lib/memcpy.c                       |  163 +
 arch/alpha/lib/memmove.S                      |  183 +
 arch/alpha/lib/memset.S                       |  133 +
 arch/alpha/lib/srm_printk.c                   |   42 +
 arch/alpha/lib/srm_puts.c                     |   24 +
 arch/alpha/lib/stacktrace.c                   |  103 +
 arch/alpha/lib/strcat.S                       |   55 +
 arch/alpha/lib/strchr.S                       |   72 +
 arch/alpha/lib/strcpy.S                       |   25 +
 arch/alpha/lib/strlen.S                       |   59 +
 arch/alpha/lib/strncat.S                      |   86 +
 arch/alpha/lib/strncpy.S                      |   83 +
 arch/alpha/lib/strrchr.S                      |   89 +
 arch/alpha/lib/stxcpy.S                       |  290 ++
 arch/alpha/lib/stxncpy.S                      |  346 ++
 arch/alpha/lib/udelay.c                       |   56 +
 arch/alpha/lib/udiv-qrnnd.S                   |  165 +
 arch/alpha/math-emu/Makefile                  |   10 +
 arch/alpha/math-emu/math.c                    |  405 ++
 arch/alpha/math-emu/qrnnd.S                   |  163 +
 arch/alpha/math-emu/sfp-util.h                |   36 +
 arch/alpha/mm/Makefile                        |    8 +
 arch/alpha/mm/fault.c                         |  248 ++
 arch/alpha/mm/init.c                          |  282 ++
 arch/alpha/mm/numa.c                          |  223 ++
 arch/alpha/oprofile/Makefile                  |   20 +
 arch/alpha/oprofile/common.c                  |  189 +
 arch/alpha/oprofile/op_impl.h                 |   55 +
 arch/alpha/oprofile/op_model_ev4.c            |  114 +
 arch/alpha/oprofile/op_model_ev5.c            |  209 +
 arch/alpha/oprofile/op_model_ev6.c            |  101 +
 arch/alpha/oprofile/op_model_ev67.c           |  261 ++
 arch/arc/Kbuild                               |    6 +
 arch/arc/Kconfig                              |  561 +++
 arch/arc/Kconfig.debug                        |   10 +
 arch/arc/Makefile                             |  114 +
 arch/arc/boot/.gitignore                      |    2 +
 arch/arc/boot/Makefile                        |   38 +
 arch/arc/boot/dts/Makefile                    |   17 +
 arch/arc/boot/dts/abilis_tb100.dtsi           |  336 ++
 arch/arc/boot/dts/abilis_tb100_dvk.dts        |  116 +
 arch/arc/boot/dts/abilis_tb101.dtsi           |  345 ++
 arch/arc/boot/dts/abilis_tb101_dvk.dts        |  116 +
 arch/arc/boot/dts/abilis_tb10x.dtsi           |  243 ++
 arch/arc/boot/dts/axc001.dtsi                 |  126 +
 arch/arc/boot/dts/axc003.dtsi                 |  161 +
 arch/arc/boot/dts/axc003_idu.dtsi             |  167 +
 arch/arc/boot/dts/axs101.dts                  |   19 +
 arch/arc/boot/dts/axs103.dts                  |   22 +
 arch/arc/boot/dts/axs103_idu.dts              |   22 +
 arch/arc/boot/dts/axs10x_mb.dtsi              |  330 ++
 arch/arc/boot/dts/haps_hs.dts                 |   99 +
 arch/arc/boot/dts/haps_hs_idu.dts             |   74 +
 arch/arc/boot/dts/hsdk.dts                    |  351 ++
 arch/arc/boot/dts/nsim_700.dts                |   59 +
 arch/arc/boot/dts/nsimosci.dts                |   88 +
 arch/arc/boot/dts/nsimosci_hs.dts             |   90 +
 arch/arc/boot/dts/nsimosci_hs_idu.dts         |   98 +
 arch/arc/boot/dts/skeleton.dtsi               |   48 +
 arch/arc/boot/dts/skeleton_hs.dtsi            |   49 +
 arch/arc/boot/dts/skeleton_hs_idu.dtsi        |   61 +
 arch/arc/boot/dts/vdk_axc003.dtsi             |   65 +
 arch/arc/boot/dts/vdk_axc003_idu.dtsi         |   73 +
 arch/arc/boot/dts/vdk_axs10x_mb.dtsi          |  126 +
 arch/arc/boot/dts/vdk_hs38.dts                |   19 +
 arch/arc/boot/dts/vdk_hs38_smp.dts            |   19 +
 arch/arc/configs/axs101_defconfig             |  109 +
 arch/arc/configs/axs103_defconfig             |  107 +
 arch/arc/configs/axs103_smp_defconfig         |  110 +
 arch/arc/configs/haps_hs_defconfig            |   65 +
 arch/arc/configs/haps_hs_smp_defconfig        |   65 +
 arch/arc/configs/hsdk_defconfig               |   96 +
 arch/arc/configs/nsim_700_defconfig           |   61 +
 arch/arc/configs/nsimosci_defconfig           |   69 +
 arch/arc/configs/nsimosci_hs_defconfig        |   67 +
 arch/arc/configs/nsimosci_hs_smp_defconfig    |   79 +
 arch/arc/configs/tb10x_defconfig              |  106 +
 arch/arc/configs/vdk_hs38_defconfig           |   98 +
 arch/arc/configs/vdk_hs38_smp_defconfig       |  102 +
 arch/arc/include/asm/Kbuild                   |    6 +
 arch/arc/include/asm/arcregs.h                |  359 ++
 arch/arc/include/asm/asm-offsets.h            |    6 +
 arch/arc/include/asm/asserts.h                |   34 +
 arch/arc/include/asm/atomic-llsc.h            |   97 +
 arch/arc/include/asm/atomic-spinlock.h        |  102 +
 arch/arc/include/asm/atomic.h                 |   60 +
 arch/arc/include/asm/atomic64-arcv2.h         |  250 ++
 arch/arc/include/asm/barrier.h                |   44 +
 arch/arc/include/asm/bitops.h                 |  197 +
 arch/arc/include/asm/bug.h                    |   34 +
 arch/arc/include/asm/cache.h                  |  128 +
 arch/arc/include/asm/cacheflush.h             |  117 +
 arch/arc/include/asm/checksum.h               |   98 +
 arch/arc/include/asm/cmpxchg.h                |  143 +
 arch/arc/include/asm/current.h                |   25 +
 arch/arc/include/asm/delay.h                  |   69 +
 arch/arc/include/asm/disasm.h                 |  113 +
 arch/arc/include/asm/dma.h                    |   16 +
 arch/arc/include/asm/dsp-impl.h               |  152 +
 arch/arc/include/asm/dsp.h                    |   29 +
 arch/arc/include/asm/dwarf.h                  |   35 +
 arch/arc/include/asm/elf.h                    |   72 +
 arch/arc/include/asm/entry-arcv2.h            |  284 ++
 arch/arc/include/asm/entry-compact.h          |  304 ++
 arch/arc/include/asm/entry.h                  |  298 ++
 arch/arc/include/asm/exec.h                   |   12 +
 arch/arc/include/asm/fb.h                     |   20 +
 arch/arc/include/asm/fpu.h                    |   57 +
 arch/arc/include/asm/futex.h                  |  169 +
 arch/arc/include/asm/highmem.h                |   53 +
 arch/arc/include/asm/hugepage.h               |   68 +
 arch/arc/include/asm/io.h                     |  235 ++
 arch/arc/include/asm/irq.h                    |   29 +
 arch/arc/include/asm/irqflags-arcv2.h         |  175 +
 arch/arc/include/asm/irqflags-compact.h       |  201 +
 arch/arc/include/asm/irqflags.h               |   16 +
 arch/arc/include/asm/jump_label.h             |   72 +
 arch/arc/include/asm/kdebug.h                 |   16 +
 arch/arc/include/asm/kgdb.h                   |   60 +
 arch/arc/include/asm/kmap_types.h             |   14 +
 arch/arc/include/asm/kprobes.h                |   55 +
 arch/arc/include/asm/linkage.h                |   80 +
 arch/arc/include/asm/mach_desc.h              |   64 +
 arch/arc/include/asm/mmu-arcv2.h              |  103 +
 arch/arc/include/asm/mmu.h                    |   21 +
 arch/arc/include/asm/mmu_context.h            |  174 +
 arch/arc/include/asm/mmzone.h                 |   40 +
 arch/arc/include/asm/module.h                 |   21 +
 arch/arc/include/asm/page.h                   |  139 +
 arch/arc/include/asm/pci.h                    |   19 +
 arch/arc/include/asm/perf_event.h             |   66 +
 arch/arc/include/asm/pgalloc.h                |   97 +
 arch/arc/include/asm/pgtable-bits-arcv2.h     |  149 +
 arch/arc/include/asm/pgtable-levels.h         |  189 +
 arch/arc/include/asm/pgtable.h                |   34 +
 arch/arc/include/asm/processor.h              |  110 +
 arch/arc/include/asm/ptrace.h                 |  159 +
 arch/arc/include/asm/sections.h               |   13 +
 arch/arc/include/asm/segment.h                |   20 +
 arch/arc/include/asm/serial.h                 |   19 +
 arch/arc/include/asm/setup.h                  |   45 +
 arch/arc/include/asm/shmparam.h               |   15 +
 arch/arc/include/asm/smp.h                    |  128 +
 arch/arc/include/asm/spinlock.h               |  382 ++
 arch/arc/include/asm/spinlock_types.h         |   34 +
 arch/arc/include/asm/stacktrace.h             |   34 +
 arch/arc/include/asm/string.h                 |   34 +
 arch/arc/include/asm/switch_to.h              |   27 +
 arch/arc/include/asm/syscall.h                |   78 +
 arch/arc/include/asm/syscalls.h               |   22 +
 arch/arc/include/asm/thread_info.h            |  106 +
 arch/arc/include/asm/timex.h                  |   15 +
 arch/arc/include/asm/tlb-mmu1.h               |  101 +
 arch/arc/include/asm/tlb.h                    |   12 +
 arch/arc/include/asm/tlbflush.h               |   42 +
 arch/arc/include/asm/uaccess.h                |  673 ++++
 arch/arc/include/asm/unaligned.h              |   27 +
 arch/arc/include/asm/unwind.h                 |  156 +
 arch/arc/include/asm/vermagic.h               |    8 +
 arch/arc/include/asm/vmalloc.h                |    4 +
 arch/arc/include/uapi/asm/Kbuild              |    2 +
 arch/arc/include/uapi/asm/byteorder.h         |   19 +
 arch/arc/include/uapi/asm/cachectl.h          |   29 +
 arch/arc/include/uapi/asm/elf.h               |   35 +
 arch/arc/include/uapi/asm/page.h              |   36 +
 arch/arc/include/uapi/asm/ptrace.h            |   58 +
 arch/arc/include/uapi/asm/setup.h             |    6 +
 arch/arc/include/uapi/asm/sigcontext.h        |   24 +
 arch/arc/include/uapi/asm/signal.h            |   28 +
 arch/arc/include/uapi/asm/swab.h              |   99 +
 arch/arc/include/uapi/asm/unistd.h            |   52 +
 arch/arc/kernel/.gitignore                    |    2 +
 arch/arc/kernel/Makefile                      |   34 +
 arch/arc/kernel/arc_hostlink.c                |   55 +
 arch/arc/kernel/arcksyms.c                    |   54 +
 arch/arc/kernel/asm-offsets.c                 |   84 +
 arch/arc/kernel/ctx_sw.c                      |  112 +
 arch/arc/kernel/ctx_sw_asm.S                  |   60 +
 arch/arc/kernel/devtree.c                     |   75 +
 arch/arc/kernel/disasm.c                      |  535 +++
 arch/arc/kernel/entry-arcv2.S                 |  264 ++
 arch/arc/kernel/entry-compact.S               |  403 ++
 arch/arc/kernel/entry.S                       |  368 ++
 arch/arc/kernel/fpu.c                         |   82 +
 arch/arc/kernel/head.S                        |  173 +
 arch/arc/kernel/intc-arcv2.c                  |  191 +
 arch/arc/kernel/intc-compact.c                |  170 +
 arch/arc/kernel/irq.c                         |   51 +
 arch/arc/kernel/jump_label.c                  |  170 +
 arch/arc/kernel/kgdb.c                        |  206 +
 arch/arc/kernel/kprobes.c                     |  415 ++
 arch/arc/kernel/mcip.c                        |  421 ++
 arch/arc/kernel/module.c                      |  146 +
 arch/arc/kernel/perf_event.c                  |  850 +++++
 arch/arc/kernel/process.c                     |  303 ++
 arch/arc/kernel/ptrace.c                      |  270 ++
 arch/arc/kernel/reset.c                       |   31 +
 arch/arc/kernel/setup.c                       |  701 ++++
 arch/arc/kernel/signal.c                      |  442 +++
 arch/arc/kernel/smp.c                         |  421 ++
 arch/arc/kernel/stacktrace.c                  |  274 ++
 arch/arc/kernel/sys.c                         |   18 +
 arch/arc/kernel/traps.c                       |  159 +
 arch/arc/kernel/troubleshoot.c                |  227 ++
 arch/arc/kernel/unaligned.c                   |  262 ++
 arch/arc/kernel/unwind.c                      | 1313 +++++++
 arch/arc/kernel/vmlinux.lds.S                 |  156 +
 arch/arc/lib/Makefile                         |   15 +
 arch/arc/lib/memcmp.S                         |  149 +
 arch/arc/lib/memcpy-700.S                     |   63 +
 arch/arc/lib/memcpy-archs-unaligned.S         |   47 +
 arch/arc/lib/memcpy-archs.S                   |  219 ++
 arch/arc/lib/memset-archs.S                   |  143 +
 arch/arc/lib/memset.S                         |   56 +
 arch/arc/lib/strchr-700.S                     |  130 +
 arch/arc/lib/strcmp-archs.S                   |   75 +
 arch/arc/lib/strcmp.S                         |   93 +
 arch/arc/lib/strcpy-700.S                     |   67 +
 arch/arc/lib/strlen.S                         |   80 +
 arch/arc/mm/Makefile                          |    8 +
 arch/arc/mm/cache.c                           | 1248 ++++++
 arch/arc/mm/dma.c                             |  106 +
 arch/arc/mm/extable.c                         |   35 +
 arch/arc/mm/fault.c                           |  193 +
 arch/arc/mm/highmem.c                         |   73 +
 arch/arc/mm/init.c                            |  206 +
 arch/arc/mm/ioremap.c                         |  105 +
 arch/arc/mm/mmap.c                            |   76 +
 arch/arc/mm/tlb.c                             |  780 ++++
 arch/arc/mm/tlbex.S                           |  378 ++
 arch/arc/oprofile/Makefile                    |   10 +
 arch/arc/oprofile/common.c                    |   23 +
 arch/arc/plat-axs10x/Kconfig                  |   46 +
 arch/arc/plat-axs10x/Makefile                 |    6 +
 arch/arc/plat-axs10x/axs10x.c                 |  385 ++
 arch/arc/plat-hsdk/Kconfig                    |   14 +
 arch/arc/plat-hsdk/Makefile                   |    6 +
 arch/arc/plat-hsdk/platform.c                 |  326 ++
 arch/arc/plat-sim/Makefile                    |    6 +
 arch/arc/plat-sim/platform.c                  |   32 +
 arch/arc/plat-tb10x/Kconfig                   |   20 +
 arch/arc/plat-tb10x/Makefile                  |   10 +
 arch/arc/plat-tb10x/tb10x.c                   |   20 +
 arch/arm/ARMnn/Android.bp                     |    8 +
 arch/arm/ARMnn/Android.mk                     |  512 +++
 arch/arm/ARMnn/BuildGuideAndroidNDK.md        |  259 ++
 arch/arm/ARMnn/BuildGuideCrossCompilation.md  |  366 ++
 arch/arm/ARMnn/CMakeLists.txt                 | 1060 ++++++
 arch/arm/ARMnn/ContributorGuide.md            |   47 +
 .../arm/ARMnn/InstallationViaAptRepository.md |  186 +
 arch/arm/ARMnn/LICENSE                        |   21 +
 arch/arm/ARMnn/README.md                      |  137 +
 arch/arm/ARMnn/SECURITY.md                    |   68 +
 arch/arm/ARMnn/cmake/AddDllCopyCommands.cmake |   39 +
 arch/arm/ARMnn/cmake/ArmnnConfig.cmake.in     |   15 +
 arch/arm/ARMnn/cmake/ArmnnVersion.cmake       |   19 +
 arch/arm/ARMnn/cmake/DelegateVersion.cmake    |   18 +
 arch/arm/ARMnn/cmake/GlobalConfig.cmake       |  410 ++
 arch/arm/ARMnn/cmake/ParserVersion.cmake      |   35 +
 arch/arm/ARMnn/cmake/Utils.cmake              |   43 +
 arch/arm/ARMnn/delegate/BuildGuideNative.md   |  261 ++
 arch/arm/ARMnn/delegate/CMakeLists.txt        |  308 ++
 .../ARMnn/delegate/DelegateQuickStartGuide.md |  108 +
 arch/arm/ARMnn/delegate/README.md             |    6 +
 .../Modules/ArmnnDelegateConfig.cmake.in      |   23 +
 .../cmake/Modules/FindFlatbuffers.cmake       |   32 +
 .../delegate/cmake/Modules/FindTfLite.cmake   |   53 +
 .../cmake/Modules/FindTfLiteSrc.cmake         |   28 +
 .../delegate/include/DelegateOptions.hpp      |  287 ++
 arch/arm/ARMnn/delegate/include/Version.hpp   |   29 +
 .../ARMnn/delegate/include/armnn_delegate.hpp |  128 +
 .../ARMnn/delegate/python/test/conftest.py    |   30 +
 .../arm/ARMnn/delegate/python/test/pytest.ini |    9 +
 .../python/test/test_data/conv2d.tflite       |  Bin 0 -> 728 bytes
 .../test/test_data/fallback_model.tflite      |  Bin 0 -> 704 bytes
 .../python/test/test_data/fp32_model.tflite   |  Bin 0 -> 688 bytes
 .../python/test/test_data/mock_model.tflite   |  Bin 0 -> 37944 bytes
 .../python/test/test_external_delegate.py     |  274 ++
 arch/arm/ARMnn/delegate/python/test/utils.py  |   53 +
 arch/arm/ARMnn/delegate/src/Activation.hpp    |  126 +
 arch/arm/ARMnn/delegate/src/ArgMinMax.hpp     |  123 +
 arch/arm/ARMnn/delegate/src/BatchSpace.hpp    |  198 +
 arch/arm/ARMnn/delegate/src/Comparison.hpp    |  130 +
 arch/arm/ARMnn/delegate/src/Control.hpp       |  311 ++
 arch/arm/ARMnn/delegate/src/Convolution.hpp   |  760 ++++
 .../ARMnn/delegate/src/DelegateOptions.cpp    |  238 ++
 arch/arm/ARMnn/delegate/src/DelegateUtils.hpp |  653 ++++
 .../ARMnn/delegate/src/ElementwiseBinary.hpp  |  369 ++
 .../ARMnn/delegate/src/ElementwiseUnary.hpp   |   83 +
 arch/arm/ARMnn/delegate/src/Fill.hpp          |  111 +
 .../arm/ARMnn/delegate/src/FullyConnected.hpp |  213 ++
 arch/arm/ARMnn/delegate/src/Gather.hpp        |  105 +
 arch/arm/ARMnn/delegate/src/LogicalBinary.hpp |  103 +
 arch/arm/ARMnn/delegate/src/Lstm.hpp          |  265 ++
 .../ARMnn/delegate/src/MultiLayerFacade.hpp   |  147 +
 arch/arm/ARMnn/delegate/src/Normalization.hpp |  144 +
 arch/arm/ARMnn/delegate/src/Pack.hpp          |  119 +
 arch/arm/ARMnn/delegate/src/Pad.hpp           |  176 +
 arch/arm/ARMnn/delegate/src/Pooling.hpp       |  116 +
 arch/arm/ARMnn/delegate/src/Prelu.hpp         |  107 +
 arch/arm/ARMnn/delegate/src/Quantization.hpp  |  158 +
 arch/arm/ARMnn/delegate/src/Redefine.hpp      |  271 ++
 arch/arm/ARMnn/delegate/src/Reduce.hpp        |  137 +
 arch/arm/ARMnn/delegate/src/Resize.hpp        |  198 +
 arch/arm/ARMnn/delegate/src/Round.hpp         |   65 +
 arch/arm/ARMnn/delegate/src/Shape.hpp         |   86 +
 .../ARMnn/delegate/src/SharedFunctions.cpp    |   40 +
 .../ARMnn/delegate/src/SharedFunctions.hpp    |   19 +
 arch/arm/ARMnn/delegate/src/Slice.hpp         |  146 +
 arch/arm/ARMnn/delegate/src/Softmax.hpp       |  147 +
 arch/arm/ARMnn/delegate/src/SpaceDepth.hpp    |  134 +
 arch/arm/ARMnn/delegate/src/Split.hpp         |  335 ++
 arch/arm/ARMnn/delegate/src/Transpose.hpp     |  102 +
 .../src/UnidirectionalSequenceLstm.hpp        |  266 ++
 arch/arm/ARMnn/delegate/src/Unpack.hpp        |  208 +
 .../arm/ARMnn/delegate/src/armnn_delegate.cpp |  957 +++++
 .../delegate/src/armnn_external_delegate.cpp  |   68 +
 .../delegate/src/test/ActivationTest.cpp      |  299 ++
 .../src/test/ActivationTestHelper.hpp         |  130 +
 .../ARMnn/delegate/src/test/ArgMinMaxTest.cpp |  174 +
 .../delegate/src/test/ArgMinMaxTestHelper.hpp |  198 +
 .../delegate/src/test/ArmnnDelegateTest.cpp   |   93 +
 .../delegate/src/test/BatchSpaceTest.cpp      |  299 ++
 .../src/test/BatchSpaceTestHelper.hpp         |  216 ++
 arch/arm/ARMnn/delegate/src/test/CastTest.cpp |   95 +
 .../delegate/src/test/CastTestHelper.hpp      |  157 +
 .../delegate/src/test/ComparisonTest.cpp      |  844 +++++
 .../src/test/ComparisonTestHelper.hpp         |  235 ++
 .../ARMnn/delegate/src/test/ControlTest.cpp   |  420 ++
 .../delegate/src/test/ControlTestHelper.hpp   |  344 ++
 .../delegate/src/test/Convolution2dTest.cpp   |  489 +++
 .../delegate/src/test/Convolution3dTest.cpp   |  318 ++
 .../src/test/ConvolutionTestHelper.hpp        |  782 ++++
 .../delegate/src/test/DelegateOptionsTest.cpp |  313 ++
 .../src/test/DelegateOptionsTestHelper.hpp    |  298 ++
 .../src/test/DepthwiseConvolution2dTest.cpp   |  282 ++
 .../src/test/ElementwiseBinaryTest.cpp        | 1136 ++++++
 .../src/test/ElementwiseBinaryTestHelper.hpp  |  242 ++
 .../src/test/ElementwiseUnaryTest.cpp         |  303 ++
 .../src/test/ElementwiseUnaryTestHelper.hpp   |  189 +
 arch/arm/ARMnn/delegate/src/test/FillTest.cpp |  221 ++
 .../delegate/src/test/FillTestHelper.hpp      |  160 +
 .../delegate/src/test/FullyConnectedTest.cpp  |  178 +
 .../src/test/FullyConnectedTestHelper.hpp     |  253 ++
 .../ARMnn/delegate/src/test/GatherTest.cpp    |  117 +
 .../delegate/src/test/GatherTestHelper.hpp    |  181 +
 .../ARMnn/delegate/src/test/LogicalTest.cpp   |  226 ++
 .../delegate/src/test/LogicalTestHelper.hpp   |  198 +
 arch/arm/ARMnn/delegate/src/test/LstmTest.cpp |  189 +
 .../delegate/src/test/LstmTestHelper.hpp      |  691 ++++
 .../ARMnn/delegate/src/test/MirrorPadTest.cpp |  341 ++
 .../src/test/NeonDelegateTests_NDK_Issue.cpp  |   63 +
 .../delegate/src/test/NormalizationTest.cpp   |   72 +
 .../src/test/NormalizationTestHelper.hpp      |  262 ++
 arch/arm/ARMnn/delegate/src/test/PackTest.cpp |  516 +++
 .../delegate/src/test/PackTestHelper.hpp      |  185 +
 arch/arm/ARMnn/delegate/src/test/PadTest.cpp  |  606 +++
 .../ARMnn/delegate/src/test/PadTestHelper.hpp |  224 ++
 .../ARMnn/delegate/src/test/Pooling2dTest.cpp | 1275 +++++++
 .../delegate/src/test/Pooling2dTestHelper.hpp |  196 +
 .../arm/ARMnn/delegate/src/test/PreluTest.cpp |  134 +
 .../delegate/src/test/PreluTestHelper.hpp     |  193 +
 .../delegate/src/test/QuantizationTest.cpp    |  455 +++
 .../src/test/QuantizationTestHelper.hpp       |  197 +
 .../delegate/src/test/RedefineTestHelper.hpp  |  192 +
 .../ARMnn/delegate/src/test/ReduceTest.cpp    |  423 +++
 .../delegate/src/test/ReduceTestHelper.hpp    |  186 +
 .../ARMnn/delegate/src/test/ReshapeTest.cpp   |  517 +++
 .../ARMnn/delegate/src/test/ResizeTest.cpp    |  134 +
 .../delegate/src/test/ResizeTestHelper.hpp    |  192 +
 .../arm/ARMnn/delegate/src/test/RoundTest.cpp |   72 +
 .../delegate/src/test/RoundTestHelper.hpp     |  161 +
 .../arm/ARMnn/delegate/src/test/ShapeTest.cpp |   45 +
 .../delegate/src/test/ShapeTestHelper.hpp     |  171 +
 .../arm/ARMnn/delegate/src/test/SliceTest.cpp |  243 ++
 .../delegate/src/test/SliceTestHelper.hpp     |  241 ++
 .../ARMnn/delegate/src/test/SoftmaxTest.cpp   |   77 +
 .../delegate/src/test/SoftmaxTestHelper.hpp   |  192 +
 .../delegate/src/test/SpaceDepthTest.cpp      |  207 +
 .../src/test/SpaceDepthTestHelper.hpp         |  166 +
 .../arm/ARMnn/delegate/src/test/SplitTest.cpp |  262 ++
 .../delegate/src/test/SplitTestHelper.hpp     |  368 ++
 .../arm/ARMnn/delegate/src/test/TestUtils.cpp |  152 +
 .../arm/ARMnn/delegate/src/test/TestUtils.hpp |  101 +
 .../ARMnn/delegate/src/test/TransposeTest.cpp |   46 +
 .../delegate/src/test/TransposeTestHelper.hpp |  174 +
 .../test/UnidirectionalSequenceLstmTest.cpp   | 1464 +++++++
 .../UnidirectionalSequenceLstmTestHelper.hpp  |  722 ++++
 .../ARMnn/delegate/src/test/UnpackTest.cpp    |  179 +
 .../delegate/src/test/UnpackTestHelper.hpp    |  185 +
 arch/arm/ARMnn/docker/README.md               |   34 +
 .../arm/ARMnn/docker/armnn-android/Dockerfile |  164 +
 .../docker/armnn-android/docker-entrypoint.sh |  116 +
 arch/arm/ARMnn/docker/x86_64/Dockerfile       |  213 ++
 arch/arm/ARMnn/docs/01_00_quick_start.dox     |   25 +
 arch/arm/ARMnn/docs/02_operator_list.dox      | 3370 +++++++++++++++++
 arch/arm/ARMnn/docs/03_build_guides.dox       |   35 +
 arch/arm/ARMnn/docs/04_contributor.dox        |   39 +
 .../ARMnn/docs/05_00_software_components.dox  |   46 +
 arch/arm/ARMnn/docs/05_01_parsers.dox         |  208 +
 .../docs/05_02_deserializer_serializer.dox    |  185 +
 arch/arm/ARMnn/docs/05_03_delegate.dox        |  178 +
 .../arm/ARMnn/docs/Arm_NN_horizontal_blue.png |  Bin 0 -> 16540 bytes
 arch/arm/ARMnn/docs/Doxyfile                  | 2541 +++++++++++++
 arch/arm/ARMnn/docs/FAQ.md                    |   58 +
 arch/arm/ARMnn/docs/PerformanceChart.png      |  Bin 0 -> 63529 bytes
 arch/arm/ARMnn/docs/header.xhtml              |   55 +
 arch/arm/ARMnn/docs/pyarmnn.png               |  Bin 0 -> 74951 bytes
 arch/arm/ARMnn/docs/stylesheet.css            |  213 ++
 arch/arm/ARMnn/include/armnn/ArmNN.hpp        |   20 +
 .../arm/ARMnn/include/armnn/BackendHelper.hpp |  459 +++
 arch/arm/ARMnn/include/armnn/BackendId.hpp    |  196 +
 .../ARMnn/include/armnn/BackendOptions.hpp    |  339 ++
 .../ARMnn/include/armnn/BackendRegistry.hpp   |   82 +
 arch/arm/ARMnn/include/armnn/Conversion.hpp   |   43 +
 arch/arm/ARMnn/include/armnn/Deprecated.hpp   |   55 +
 arch/arm/ARMnn/include/armnn/Descriptors.hpp  | 1528 ++++++++
 .../ARMnn/include/armnn/DescriptorsFwd.hpp    |   63 +
 arch/arm/ARMnn/include/armnn/Exceptions.hpp   |  209 +
 .../include/armnn/IAsyncExecutionCallback.hpp |   30 +
 .../arm/ARMnn/include/armnn/ILayerVisitor.hpp |  514 +++
 arch/arm/ARMnn/include/armnn/INetwork.hpp     |  845 +++++
 arch/arm/ARMnn/include/armnn/IProfiler.hpp    |   78 +
 arch/arm/ARMnn/include/armnn/IRuntime.hpp     |  341 ++
 arch/arm/ARMnn/include/armnn/IStrategy.hpp    |   31 +
 .../ARMnn/include/armnn/IWorkingMemHandle.hpp |   46 +
 arch/arm/ARMnn/include/armnn/LayerSupport.hpp |  402 ++
 .../ARMnn/include/armnn/LayerVisitorBase.hpp  |  263 ++
 arch/arm/ARMnn/include/armnn/Logging.hpp      |  208 +
 arch/arm/ARMnn/include/armnn/LstmParams.hpp   |  208 +
 .../arm/ARMnn/include/armnn/MemorySources.hpp |   46 +
 arch/arm/ARMnn/include/armnn/NetworkFwd.hpp   |   21 +
 arch/arm/ARMnn/include/armnn/Optional.hpp     |  310 ++
 .../include/armnn/QuantizedLstmParams.hpp     |  218 ++
 arch/arm/ARMnn/include/armnn/StrategyBase.hpp |   55 +
 arch/arm/ARMnn/include/armnn/Tensor.hpp       |  395 ++
 arch/arm/ARMnn/include/armnn/TensorFwd.hpp    |   15 +
 arch/arm/ARMnn/include/armnn/Threadpool.hpp   |   78 +
 arch/arm/ARMnn/include/armnn/Types.hpp        |  469 +++
 arch/arm/ARMnn/include/armnn/TypesUtils.hpp   |  349 ++
 arch/arm/ARMnn/include/armnn/Utils.hpp        |   44 +
 arch/arm/ARMnn/include/armnn/Version.hpp      |   24 +
 .../include/armnn/backends/CMakeLists.txt     |   23 +
 .../armnn/backends/CpuTensorHandleFwd.hpp     |   20 +
 .../include/armnn/backends/DynamicBackend.hpp |   54 +
 .../armnn/backends/IBackendContext.hpp        |   42 +
 .../armnn/backends/IBackendInternal.hpp       |  215 ++
 .../armnn/backends/ICustomAllocator.hpp       |   56 +
 .../include/armnn/backends/ILayerSupport.hpp  |  574 +++
 .../include/armnn/backends/IMemoryManager.hpp |   26 +
 .../backends/IMemoryOptimizerStrategy.hpp     |   57 +
 .../include/armnn/backends/ITensorHandle.hpp  |   96 +
 .../armnn/backends/ITensorHandleFactory.hpp   |  108 +
 .../include/armnn/backends/IWorkload.hpp      |   47 +
 .../armnn/backends/MemCopyWorkload.hpp        |   27 +
 .../armnn/backends/OptimizationViews.hpp      |   78 +
 .../include/armnn/backends/SubgraphView.hpp   |  198 +
 .../include/armnn/backends/TensorHandle.hpp   |  267 ++
 .../armnn/backends/TensorHandleFwd.hpp        |   17 +
 .../ARMnn/include/armnn/backends/Workload.hpp |  238 ++
 .../include/armnn/backends/WorkloadData.hpp   |  771 ++++
 .../armnn/backends/WorkloadFactory.hpp        |  431 +++
 .../include/armnn/backends/WorkloadInfo.hpp   |   25 +
 .../backends/profiling/IBackendProfiling.hpp  |  104 +
 .../profiling/IBackendProfilingContext.hpp    |   30 +
 .../armnn/profiling/ILocalPacketHandler.hpp   |   78 +
 .../armnn/profiling/ISendTimelinePacket.hpp   |   61 +
 .../ARMnn/include/armnn/utility/Assert.hpp    |   21 +
 .../include/armnn/utility/IgnoreUnused.hpp    |   16 +
 .../include/armnn/utility/NumericCast.hpp     |  250 ++
 .../armnn/utility/PolymorphicDowncast.hpp     |  100 +
 .../include/armnn/utility/StringUtils.hpp     |  164 +
 .../arm/ARMnn/include/armnn/utility/Timer.hpp |   25 +
 .../armnn/utility/TransformIterator.hpp       |   86 +
 .../armnnDeserializer/IDeserializer.hpp       |   55 +
 .../include/armnnOnnxParser/IOnnxParser.hpp   |   65 +
 .../ARMnn/include/armnnOnnxParser/Version.hpp |   29 +
 .../include/armnnSerializer/ISerializer.hpp   |   41 +
 .../armnnTestUtils/DataLayoutUtils.hpp        |   60 +
 .../armnnTestUtils/LayerTestResult.hpp        |   63 +
 .../armnnTestUtils/MemCopyTestImpl.hpp        |  115 +
 .../include/armnnTestUtils/MockBackend.hpp    |  115 +
 .../armnnTestUtils/MockMemoryManager.hpp      |   59 +
 .../armnnTestUtils/MockTensorHandle.hpp       |   81 +
 .../armnnTestUtils/PredicateResult.hpp        |   48 +
 .../armnnTestUtils/TensorCopyUtils.hpp        |   15 +
 .../include/armnnTestUtils/TensorHelpers.hpp  |  235 ++
 .../armnnTestUtils/WorkloadTestUtils.hpp      |  113 +
 .../armnnTfLiteParser/ITfLiteParser.hpp       |   75 +
 .../include/armnnTfLiteParser/Version.hpp     |   29 +
 .../include/armnnUtils/CompatibleTypes.hpp    |   16 +
 .../include/armnnUtils/DataLayoutIndexed.hpp  |   74 +
 .../ARMnn/include/armnnUtils/Filesystem.hpp   |   25 +
 .../armnnUtils/FloatingPointComparison.hpp    |   27 +
 .../armnnUtils/FloatingPointConverter.hpp     |   29 +
 arch/arm/ARMnn/include/armnnUtils/Permute.hpp |   23 +
 .../include/armnnUtils/QuantizeHelper.hpp     |  132 +
 .../ARMnn/include/armnnUtils/TContainer.hpp   |   20 +
 .../ARMnn/include/armnnUtils/TensorUtils.hpp  |   47 +
 arch/arm/ARMnn/include/armnnUtils/Threads.hpp |   16 +
 .../ARMnn/include/armnnUtils/Transpose.hpp    |   21 +
 arch/arm/ARMnn/profiling/CMakeLists.txt       |   36 +
 arch/arm/ARMnn/profiling/buildpipe.sh         |   91 +
 .../ARMnn/profiling/common/include/Assert.hpp |   24 +
 .../common/include/CommandHandlerFunctor.hpp  |   42 +
 .../common/include/CommandHandlerKey.hpp      |   41 +
 .../common/include/CommandHandlerRegistry.hpp |   49 +
 .../common/include/CommonProfilingUtils.hpp   |   38 +
 .../profiling/common/include/Constants.hpp    |   14 +
 .../profiling/common/include/Conversion.hpp   |   43 +
 .../profiling/common/include/DllExport.hpp    |   19 +
 .../common/include/EncodeVersion.hpp          |   83 +
 .../include/IProfilingGuidGenerator.hpp       |   32 +
 .../profiling/common/include/IgnoreUnused.hpp |   18 +
 .../common/include/LabelsAndEventClasses.hpp  |   69 +
 .../profiling/common/include/Logging.hpp      |  182 +
 .../common/include/NetworkSockets.hpp         |   79 +
 .../profiling/common/include/NumericCast.hpp  |  128 +
 .../ARMnn/profiling/common/include/Packet.hpp |   91 +
 .../common/include/PacketVersionResolver.hpp  |   50 +
 .../common/include/ProfilingException.hpp     |   82 +
 .../common/include/ProfilingGuid.hpp          |  113 +
 .../common/include/ProfilingGuidGenerator.hpp |   62 +
 .../include/SocketConnectionException.hpp     |   54 +
 .../profiling/common/include/SwTrace.hpp      |  139 +
 .../common/include/WindowsWrapper.hpp         |   30 +
 .../ARMnn/profiling/common/src/CMakeLists.txt |   40 +
 .../common/src/CommandHandlerFunctor.cpp      |   31 +
 .../common/src/CommandHandlerKey.cpp          |   77 +
 .../common/src/CommandHandlerRegistry.cpp     |   61 +
 .../common/src/CommonProfilingUtils.cpp       |  145 +
 .../common/src/LabelsAndEventClasses.cpp      |   79 +
 .../profiling/common/src/NetworkSockets.cpp   |  114 +
 .../common/src/PacketVersionResolver.cpp      |   71 +
 .../ARMnn/profiling/common/src/SwTrace.cpp    |  128 +
 .../include/basePipeServer/BasePipeServer.hpp |  120 +
 .../basePipeServer/ConnectionHandler.hpp      |   48 +
 .../timelineDecoder/ITimelineDecoder.hpp      |   91 +
 .../TimelineCaptureCommandHandler.hpp         |   63 +
 .../timelineDecoder/TimelineDecoder.hpp       |   79 +
 ...TimelineDirectoryCaptureCommandHandler.hpp |   50 +
 .../src/basePipeServer/BasePipeServer.cpp     |  302 ++
 .../server/src/basePipeServer/CMakeLists.txt  |   49 +
 .../src/basePipeServer/ConnectionHandler.cpp  |   60 +
 .../tests/BasePipeServerTests.cpp             |   97 +
 .../server/src/timelineDecoder/CMakeLists.txt |   44 +
 .../TimelineCaptureCommandHandler.cpp         |  166 +
 .../src/timelineDecoder/TimelineDecoder.cpp   |  330 ++
 ...TimelineDirectoryCaptureCommandHandler.cpp |  117 +
 .../timelineDecoder/tests/TimelineTests.cpp   |  373 ++
 .../toolchain-x86-ubuntu-mingw64.cmake        |   28 +
 arch/arm/ARMnn/python/pyarmnn/.gitignore      |    7 +
 arch/arm/ARMnn/python/pyarmnn/CMakeLists.txt  |   54 +
 arch/arm/ARMnn/python/pyarmnn/LICENSE         |   22 +
 arch/arm/ARMnn/python/pyarmnn/README.md       |  256 ++
 arch/arm/ARMnn/python/pyarmnn/conftest.py     |   52 +
 .../python/pyarmnn/docs_conf/config.mako      |   34 +
 .../pyarmnn/examples/common/audio_capture.py  |  149 +
 .../pyarmnn/examples/common/cv_utils.py       |  192 +
 .../python/pyarmnn/examples/common/mfcc.py    |  238 ++
 .../examples/common/network_executor.py       |  108 +
 .../python/pyarmnn/examples/common/utils.py   |  108 +
 .../examples/image_classification/README.md   |   46 +
 .../image_classification/example_utils.py     |  358 ++
 .../image_classification/onnx_mobilenetv2.py  |   92 +
 .../image_classification/requirements.txt     |    4 +
 .../tflite_mobilenetv1_quantized.py           |   54 +
 .../examples/keyword_spotting/README.MD       |  189 +
 .../examples/keyword_spotting/__init__.py     |    0
 .../examples/keyword_spotting/audio_utils.py  |   31 +
 .../keyword_spotting/requirements.txt         |    5 +
 .../run_audio_classification.py               |  136 +
 .../examples/object_detection/README.md       |  196 +
 .../object_detection/requirements.txt         |    2 +
 .../object_detection/run_video_file.py        |   87 +
 .../object_detection/run_video_stream.py      |   90 +
 arch/arm/ARMnn/python/pyarmnn/pylintconfig    |  486 +++
 arch/arm/ARMnn/python/pyarmnn/setup.py        |  346 ++
 .../arm/ARMnn/python/pyarmnn/swig_generate.py |  115 +
 arch/arm/ARMnn/python/pyarmnn/tox.ini         |   63 +
 arch/arm/Kbuild                               |   14 +
 arch/arm/Kconfig                              | 2006 ++++++++++
 arch/arm/Kconfig-nommu                        |   66 +
 arch/arm/Kconfig.assembler                    |    6 +
 arch/arm/Kconfig.debug                        | 1929 ++++++++++
 arch/arm/Makefile                             |  339 ++
 863 files changed, 158774 insertions(+)
 create mode 100644 arch/.gitignore
 create mode 100644 arch/Kconfig
 create mode 100644 arch/alpha/Kbuild
 create mode 100644 arch/alpha/Kconfig
 create mode 100644 arch/alpha/Kconfig.debug
 create mode 100644 arch/alpha/Makefile
 create mode 100644 arch/alpha/boot/Makefile
 create mode 100644 arch/alpha/boot/bootloader.lds
 create mode 100644 arch/alpha/boot/bootp.c
 create mode 100644 arch/alpha/boot/bootpz.c
 create mode 100644 arch/alpha/boot/head.S
 create mode 100644 arch/alpha/boot/main.c
 create mode 100644 arch/alpha/boot/misc.c
 create mode 100644 arch/alpha/boot/stdio.c
 create mode 100644 arch/alpha/boot/tools/mkbb.c
 create mode 100644 arch/alpha/boot/tools/objstrip.c
 create mode 100644 arch/alpha/configs/defconfig
 create mode 100644 arch/alpha/include/asm/Kbuild
 create mode 100644 arch/alpha/include/asm/a.out.h
 create mode 100644 arch/alpha/include/asm/agp.h
 create mode 100644 arch/alpha/include/asm/agp_backend.h
 create mode 100644 arch/alpha/include/asm/asm-offsets.h
 create mode 100644 arch/alpha/include/asm/asm-prototypes.h
 create mode 100644 arch/alpha/include/asm/atomic.h
 create mode 100644 arch/alpha/include/asm/barrier.h
 create mode 100644 arch/alpha/include/asm/bitops.h
 create mode 100644 arch/alpha/include/asm/bug.h
 create mode 100644 arch/alpha/include/asm/bugs.h
 create mode 100644 arch/alpha/include/asm/cache.h
 create mode 100644 arch/alpha/include/asm/cacheflush.h
 create mode 100644 arch/alpha/include/asm/checksum.h
 create mode 100644 arch/alpha/include/asm/cmpxchg.h
 create mode 100644 arch/alpha/include/asm/compiler.h
 create mode 100644 arch/alpha/include/asm/console.h
 create mode 100644 arch/alpha/include/asm/core_apecs.h
 create mode 100644 arch/alpha/include/asm/core_cia.h
 create mode 100644 arch/alpha/include/asm/core_irongate.h
 create mode 100644 arch/alpha/include/asm/core_lca.h
 create mode 100644 arch/alpha/include/asm/core_marvel.h
 create mode 100644 arch/alpha/include/asm/core_mcpcia.h
 create mode 100644 arch/alpha/include/asm/core_polaris.h
 create mode 100644 arch/alpha/include/asm/core_t2.h
 create mode 100644 arch/alpha/include/asm/core_titan.h
 create mode 100644 arch/alpha/include/asm/core_tsunami.h
 create mode 100644 arch/alpha/include/asm/core_wildfire.h
 create mode 100644 arch/alpha/include/asm/delay.h
 create mode 100644 arch/alpha/include/asm/device.h
 create mode 100644 arch/alpha/include/asm/div64.h
 create mode 100644 arch/alpha/include/asm/dma-mapping.h
 create mode 100644 arch/alpha/include/asm/dma.h
 create mode 100644 arch/alpha/include/asm/elf.h
 create mode 100644 arch/alpha/include/asm/emergency-restart.h
 create mode 100644 arch/alpha/include/asm/err_common.h
 create mode 100644 arch/alpha/include/asm/err_ev6.h
 create mode 100644 arch/alpha/include/asm/err_ev7.h
 create mode 100644 arch/alpha/include/asm/extable.h
 create mode 100644 arch/alpha/include/asm/floppy.h
 create mode 100644 arch/alpha/include/asm/fpu.h
 create mode 100644 arch/alpha/include/asm/ftrace.h
 create mode 100644 arch/alpha/include/asm/futex.h
 create mode 100644 arch/alpha/include/asm/gct.h
 create mode 100644 arch/alpha/include/asm/hardirq.h
 create mode 100644 arch/alpha/include/asm/hw_irq.h
 create mode 100644 arch/alpha/include/asm/hwrpb.h
 create mode 100644 arch/alpha/include/asm/io.h
 create mode 100644 arch/alpha/include/asm/io_trivial.h
 create mode 100644 arch/alpha/include/asm/irq.h
 create mode 100644 arch/alpha/include/asm/irq_regs.h
 create mode 100644 arch/alpha/include/asm/irqflags.h
 create mode 100644 arch/alpha/include/asm/jensen.h
 create mode 100644 arch/alpha/include/asm/kdebug.h
 create mode 100644 arch/alpha/include/asm/kmap_types.h
 create mode 100644 arch/alpha/include/asm/linkage.h
 create mode 100644 arch/alpha/include/asm/local.h
 create mode 100644 arch/alpha/include/asm/machvec.h
 create mode 100644 arch/alpha/include/asm/mc146818rtc.h
 create mode 100644 arch/alpha/include/asm/mce.h
 create mode 100644 arch/alpha/include/asm/mmu.h
 create mode 100644 arch/alpha/include/asm/mmu_context.h
 create mode 100644 arch/alpha/include/asm/mmzone.h
 create mode 100644 arch/alpha/include/asm/module.h
 create mode 100644 arch/alpha/include/asm/page.h
 create mode 100644 arch/alpha/include/asm/pal.h
 create mode 100644 arch/alpha/include/asm/param.h
 create mode 100644 arch/alpha/include/asm/parport.h
 create mode 100644 arch/alpha/include/asm/pci.h
 create mode 100644 arch/alpha/include/asm/percpu.h
 create mode 100644 arch/alpha/include/asm/perf_event.h
 create mode 100644 arch/alpha/include/asm/pgalloc.h
 create mode 100644 arch/alpha/include/asm/pgtable.h
 create mode 100644 arch/alpha/include/asm/processor.h
 create mode 100644 arch/alpha/include/asm/ptrace.h
 create mode 100644 arch/alpha/include/asm/rwonce.h
 create mode 100644 arch/alpha/include/asm/serial.h
 create mode 100644 arch/alpha/include/asm/setup.h
 create mode 100644 arch/alpha/include/asm/sfp-machine.h
 create mode 100644 arch/alpha/include/asm/shmparam.h
 create mode 100644 arch/alpha/include/asm/signal.h
 create mode 100644 arch/alpha/include/asm/smp.h
 create mode 100644 arch/alpha/include/asm/socket.h
 create mode 100644 arch/alpha/include/asm/sparsemem.h
 create mode 100644 arch/alpha/include/asm/special_insns.h
 create mode 100644 arch/alpha/include/asm/spinlock.h
 create mode 100644 arch/alpha/include/asm/spinlock_types.h
 create mode 100644 arch/alpha/include/asm/string.h
 create mode 100644 arch/alpha/include/asm/switch_to.h
 create mode 100644 arch/alpha/include/asm/syscall.h
 create mode 100644 arch/alpha/include/asm/termios.h
 create mode 100644 arch/alpha/include/asm/thread_info.h
 create mode 100644 arch/alpha/include/asm/timex.h
 create mode 100644 arch/alpha/include/asm/tlb.h
 create mode 100644 arch/alpha/include/asm/tlbflush.h
 create mode 100644 arch/alpha/include/asm/topology.h
 create mode 100644 arch/alpha/include/asm/types.h
 create mode 100644 arch/alpha/include/asm/uaccess.h
 create mode 100644 arch/alpha/include/asm/ucontext.h
 create mode 100644 arch/alpha/include/asm/unaligned.h
 create mode 100644 arch/alpha/include/asm/unistd.h
 create mode 100644 arch/alpha/include/asm/user.h
 create mode 100644 arch/alpha/include/asm/vga.h
 create mode 100644 arch/alpha/include/asm/vmalloc.h
 create mode 100644 arch/alpha/include/asm/word-at-a-time.h
 create mode 100644 arch/alpha/include/asm/wrperfmon.h
 create mode 100644 arch/alpha/include/asm/xchg.h
 create mode 100644 arch/alpha/include/asm/xor.h
 create mode 100644 arch/alpha/include/uapi/asm/Kbuild
 create mode 100644 arch/alpha/include/uapi/asm/a.out.h
 create mode 100644 arch/alpha/include/uapi/asm/auxvec.h
 create mode 100644 arch/alpha/include/uapi/asm/bitsperlong.h
 create mode 100644 arch/alpha/include/uapi/asm/byteorder.h
 create mode 100644 arch/alpha/include/uapi/asm/compiler.h
 create mode 100644 arch/alpha/include/uapi/asm/console.h
 create mode 100644 arch/alpha/include/uapi/asm/errno.h
 create mode 100644 arch/alpha/include/uapi/asm/fcntl.h
 create mode 100644 arch/alpha/include/uapi/asm/fpu.h
 create mode 100644 arch/alpha/include/uapi/asm/gentrap.h
 create mode 100644 arch/alpha/include/uapi/asm/ioctl.h
 create mode 100644 arch/alpha/include/uapi/asm/ioctls.h
 create mode 100644 arch/alpha/include/uapi/asm/mman.h
 create mode 100644 arch/alpha/include/uapi/asm/pal.h
 create mode 100644 arch/alpha/include/uapi/asm/param.h
 create mode 100644 arch/alpha/include/uapi/asm/posix_types.h
 create mode 100644 arch/alpha/include/uapi/asm/ptrace.h
 create mode 100644 arch/alpha/include/uapi/asm/reg.h
 create mode 100644 arch/alpha/include/uapi/asm/regdef.h
 create mode 100644 arch/alpha/include/uapi/asm/resource.h
 create mode 100644 arch/alpha/include/uapi/asm/setup.h
 create mode 100644 arch/alpha/include/uapi/asm/sigcontext.h
 create mode 100644 arch/alpha/include/uapi/asm/siginfo.h
 create mode 100644 arch/alpha/include/uapi/asm/signal.h
 create mode 100644 arch/alpha/include/uapi/asm/socket.h
 create mode 100644 arch/alpha/include/uapi/asm/sockios.h
 create mode 100644 arch/alpha/include/uapi/asm/stat.h
 create mode 100644 arch/alpha/include/uapi/asm/statfs.h
 create mode 100644 arch/alpha/include/uapi/asm/swab.h
 create mode 100644 arch/alpha/include/uapi/asm/sysinfo.h
 create mode 100644 arch/alpha/include/uapi/asm/termbits.h
 create mode 100644 arch/alpha/include/uapi/asm/termios.h
 create mode 100644 arch/alpha/include/uapi/asm/types.h
 create mode 100644 arch/alpha/include/uapi/asm/unistd.h
 create mode 100644 arch/alpha/kernel/.gitignore
 create mode 100644 arch/alpha/kernel/Makefile
 create mode 100644 arch/alpha/kernel/asm-offsets.c
 create mode 100644 arch/alpha/kernel/audit.c
 create mode 100644 arch/alpha/kernel/binfmt_loader.c
 create mode 100644 arch/alpha/kernel/bugs.c
 create mode 100644 arch/alpha/kernel/console.c
 create mode 100644 arch/alpha/kernel/core_apecs.c
 create mode 100644 arch/alpha/kernel/core_cia.c
 create mode 100644 arch/alpha/kernel/core_irongate.c
 create mode 100644 arch/alpha/kernel/core_lca.c
 create mode 100644 arch/alpha/kernel/core_marvel.c
 create mode 100644 arch/alpha/kernel/core_mcpcia.c
 create mode 100644 arch/alpha/kernel/core_polaris.c
 create mode 100644 arch/alpha/kernel/core_t2.c
 create mode 100644 arch/alpha/kernel/core_titan.c
 create mode 100644 arch/alpha/kernel/core_tsunami.c
 create mode 100644 arch/alpha/kernel/core_wildfire.c
 create mode 100644 arch/alpha/kernel/entry.S
 create mode 100644 arch/alpha/kernel/err_common.c
 create mode 100644 arch/alpha/kernel/err_ev6.c
 create mode 100644 arch/alpha/kernel/err_ev7.c
 create mode 100644 arch/alpha/kernel/err_impl.h
 create mode 100644 arch/alpha/kernel/err_marvel.c
 create mode 100644 arch/alpha/kernel/err_titan.c
 create mode 100644 arch/alpha/kernel/es1888.c
 create mode 100644 arch/alpha/kernel/gct.c
 create mode 100644 arch/alpha/kernel/head.S
 create mode 100644 arch/alpha/kernel/io.c
 create mode 100644 arch/alpha/kernel/irq.c
 create mode 100644 arch/alpha/kernel/irq_alpha.c
 create mode 100644 arch/alpha/kernel/irq_i8259.c
 create mode 100644 arch/alpha/kernel/irq_impl.h
 create mode 100644 arch/alpha/kernel/irq_pyxis.c
 create mode 100644 arch/alpha/kernel/irq_srm.c
 create mode 100644 arch/alpha/kernel/machvec_impl.h
 create mode 100644 arch/alpha/kernel/module.c
 create mode 100644 arch/alpha/kernel/osf_sys.c
 create mode 100644 arch/alpha/kernel/pc873xx.c
 create mode 100644 arch/alpha/kernel/pc873xx.h
 create mode 100644 arch/alpha/kernel/pci-noop.c
 create mode 100644 arch/alpha/kernel/pci-sysfs.c
 create mode 100644 arch/alpha/kernel/pci.c
 create mode 100644 arch/alpha/kernel/pci_impl.h
 create mode 100644 arch/alpha/kernel/pci_iommu.c
 create mode 100644 arch/alpha/kernel/perf_event.c
 create mode 100644 arch/alpha/kernel/process.c
 create mode 100644 arch/alpha/kernel/proto.h
 create mode 100644 arch/alpha/kernel/ptrace.c
 create mode 100644 arch/alpha/kernel/rtc.c
 create mode 100644 arch/alpha/kernel/setup.c
 create mode 100644 arch/alpha/kernel/signal.c
 create mode 100644 arch/alpha/kernel/smc37c669.c
 create mode 100644 arch/alpha/kernel/smc37c93x.c
 create mode 100644 arch/alpha/kernel/smp.c
 create mode 100644 arch/alpha/kernel/srm_env.c
 create mode 100644 arch/alpha/kernel/srmcons.c
 create mode 100644 arch/alpha/kernel/sys_alcor.c
 create mode 100644 arch/alpha/kernel/sys_cabriolet.c
 create mode 100644 arch/alpha/kernel/sys_dp264.c
 create mode 100644 arch/alpha/kernel/sys_eb64p.c
 create mode 100644 arch/alpha/kernel/sys_eiger.c
 create mode 100644 arch/alpha/kernel/sys_jensen.c
 create mode 100644 arch/alpha/kernel/sys_marvel.c
 create mode 100644 arch/alpha/kernel/sys_miata.c
 create mode 100644 arch/alpha/kernel/sys_mikasa.c
 create mode 100644 arch/alpha/kernel/sys_nautilus.c
 create mode 100644 arch/alpha/kernel/sys_noritake.c
 create mode 100644 arch/alpha/kernel/sys_rawhide.c
 create mode 100644 arch/alpha/kernel/sys_ruffian.c
 create mode 100644 arch/alpha/kernel/sys_rx164.c
 create mode 100644 arch/alpha/kernel/sys_sable.c
 create mode 100644 arch/alpha/kernel/sys_sio.c
 create mode 100644 arch/alpha/kernel/sys_sx164.c
 create mode 100644 arch/alpha/kernel/sys_takara.c
 create mode 100644 arch/alpha/kernel/sys_titan.c
 create mode 100644 arch/alpha/kernel/sys_wildfire.c
 create mode 100644 arch/alpha/kernel/syscalls/Makefile
 create mode 100644 arch/alpha/kernel/syscalls/syscall.tbl
 create mode 100644 arch/alpha/kernel/syscalls/syscallhdr.sh
 create mode 100644 arch/alpha/kernel/syscalls/syscalltbl.sh
 create mode 100644 arch/alpha/kernel/systbls.S
 create mode 100644 arch/alpha/kernel/time.c
 create mode 100644 arch/alpha/kernel/traps.c
 create mode 100644 arch/alpha/kernel/vmlinux.lds.S
 create mode 100644 arch/alpha/lib/Makefile
 create mode 100644 arch/alpha/lib/callback_srm.S
 create mode 100644 arch/alpha/lib/checksum.c
 create mode 100644 arch/alpha/lib/clear_page.S
 create mode 100644 arch/alpha/lib/clear_user.S
 create mode 100644 arch/alpha/lib/copy_page.S
 create mode 100644 arch/alpha/lib/copy_user.S
 create mode 100644 arch/alpha/lib/csum_ipv6_magic.S
 create mode 100644 arch/alpha/lib/csum_partial_copy.c
 create mode 100644 arch/alpha/lib/dbg_current.S
 create mode 100644 arch/alpha/lib/dbg_stackcheck.S
 create mode 100644 arch/alpha/lib/dbg_stackkill.S
 create mode 100644 arch/alpha/lib/divide.S
 create mode 100644 arch/alpha/lib/ev6-clear_page.S
 create mode 100644 arch/alpha/lib/ev6-clear_user.S
 create mode 100644 arch/alpha/lib/ev6-copy_page.S
 create mode 100644 arch/alpha/lib/ev6-copy_user.S
 create mode 100644 arch/alpha/lib/ev6-csum_ipv6_magic.S
 create mode 100644 arch/alpha/lib/ev6-divide.S
 create mode 100644 arch/alpha/lib/ev6-memchr.S
 create mode 100644 arch/alpha/lib/ev6-memcpy.S
 create mode 100644 arch/alpha/lib/ev6-memset.S
 create mode 100644 arch/alpha/lib/ev6-stxcpy.S
 create mode 100644 arch/alpha/lib/ev6-stxncpy.S
 create mode 100644 arch/alpha/lib/ev67-strcat.S
 create mode 100644 arch/alpha/lib/ev67-strchr.S
 create mode 100644 arch/alpha/lib/ev67-strlen.S
 create mode 100644 arch/alpha/lib/ev67-strncat.S
 create mode 100644 arch/alpha/lib/ev67-strrchr.S
 create mode 100644 arch/alpha/lib/fls.c
 create mode 100644 arch/alpha/lib/fpreg.c
 create mode 100644 arch/alpha/lib/memchr.S
 create mode 100644 arch/alpha/lib/memcpy.c
 create mode 100644 arch/alpha/lib/memmove.S
 create mode 100644 arch/alpha/lib/memset.S
 create mode 100644 arch/alpha/lib/srm_printk.c
 create mode 100644 arch/alpha/lib/srm_puts.c
 create mode 100644 arch/alpha/lib/stacktrace.c
 create mode 100644 arch/alpha/lib/strcat.S
 create mode 100644 arch/alpha/lib/strchr.S
 create mode 100644 arch/alpha/lib/strcpy.S
 create mode 100644 arch/alpha/lib/strlen.S
 create mode 100644 arch/alpha/lib/strncat.S
 create mode 100644 arch/alpha/lib/strncpy.S
 create mode 100644 arch/alpha/lib/strrchr.S
 create mode 100644 arch/alpha/lib/stxcpy.S
 create mode 100644 arch/alpha/lib/stxncpy.S
 create mode 100644 arch/alpha/lib/udelay.c
 create mode 100644 arch/alpha/lib/udiv-qrnnd.S
 create mode 100644 arch/alpha/math-emu/Makefile
 create mode 100644 arch/alpha/math-emu/math.c
 create mode 100644 arch/alpha/math-emu/qrnnd.S
 create mode 100644 arch/alpha/math-emu/sfp-util.h
 create mode 100644 arch/alpha/mm/Makefile
 create mode 100644 arch/alpha/mm/fault.c
 create mode 100644 arch/alpha/mm/init.c
 create mode 100644 arch/alpha/mm/numa.c
 create mode 100644 arch/alpha/oprofile/Makefile
 create mode 100644 arch/alpha/oprofile/common.c
 create mode 100644 arch/alpha/oprofile/op_impl.h
 create mode 100644 arch/alpha/oprofile/op_model_ev4.c
 create mode 100644 arch/alpha/oprofile/op_model_ev5.c
 create mode 100644 arch/alpha/oprofile/op_model_ev6.c
 create mode 100644 arch/alpha/oprofile/op_model_ev67.c
 create mode 100644 arch/arc/Kbuild
 create mode 100644 arch/arc/Kconfig
 create mode 100644 arch/arc/Kconfig.debug
 create mode 100644 arch/arc/Makefile
 create mode 100644 arch/arc/boot/.gitignore
 create mode 100644 arch/arc/boot/Makefile
 create mode 100644 arch/arc/boot/dts/Makefile
 create mode 100644 arch/arc/boot/dts/abilis_tb100.dtsi
 create mode 100644 arch/arc/boot/dts/abilis_tb100_dvk.dts
 create mode 100644 arch/arc/boot/dts/abilis_tb101.dtsi
 create mode 100644 arch/arc/boot/dts/abilis_tb101_dvk.dts
 create mode 100644 arch/arc/boot/dts/abilis_tb10x.dtsi
 create mode 100644 arch/arc/boot/dts/axc001.dtsi
 create mode 100644 arch/arc/boot/dts/axc003.dtsi
 create mode 100644 arch/arc/boot/dts/axc003_idu.dtsi
 create mode 100644 arch/arc/boot/dts/axs101.dts
 create mode 100644 arch/arc/boot/dts/axs103.dts
 create mode 100644 arch/arc/boot/dts/axs103_idu.dts
 create mode 100644 arch/arc/boot/dts/axs10x_mb.dtsi
 create mode 100644 arch/arc/boot/dts/haps_hs.dts
 create mode 100644 arch/arc/boot/dts/haps_hs_idu.dts
 create mode 100644 arch/arc/boot/dts/hsdk.dts
 create mode 100644 arch/arc/boot/dts/nsim_700.dts
 create mode 100644 arch/arc/boot/dts/nsimosci.dts
 create mode 100644 arch/arc/boot/dts/nsimosci_hs.dts
 create mode 100644 arch/arc/boot/dts/nsimosci_hs_idu.dts
 create mode 100644 arch/arc/boot/dts/skeleton.dtsi
 create mode 100644 arch/arc/boot/dts/skeleton_hs.dtsi
 create mode 100644 arch/arc/boot/dts/skeleton_hs_idu.dtsi
 create mode 100644 arch/arc/boot/dts/vdk_axc003.dtsi
 create mode 100644 arch/arc/boot/dts/vdk_axc003_idu.dtsi
 create mode 100644 arch/arc/boot/dts/vdk_axs10x_mb.dtsi
 create mode 100644 arch/arc/boot/dts/vdk_hs38.dts
 create mode 100644 arch/arc/boot/dts/vdk_hs38_smp.dts
 create mode 100644 arch/arc/configs/axs101_defconfig
 create mode 100644 arch/arc/configs/axs103_defconfig
 create mode 100644 arch/arc/configs/axs103_smp_defconfig
 create mode 100644 arch/arc/configs/haps_hs_defconfig
 create mode 100644 arch/arc/configs/haps_hs_smp_defconfig
 create mode 100644 arch/arc/configs/hsdk_defconfig
 create mode 100644 arch/arc/configs/nsim_700_defconfig
 create mode 100644 arch/arc/configs/nsimosci_defconfig
 create mode 100644 arch/arc/configs/nsimosci_hs_defconfig
 create mode 100644 arch/arc/configs/nsimosci_hs_smp_defconfig
 create mode 100644 arch/arc/configs/tb10x_defconfig
 create mode 100644 arch/arc/configs/vdk_hs38_defconfig
 create mode 100644 arch/arc/configs/vdk_hs38_smp_defconfig
 create mode 100644 arch/arc/include/asm/Kbuild
 create mode 100644 arch/arc/include/asm/arcregs.h
 create mode 100644 arch/arc/include/asm/asm-offsets.h
 create mode 100644 arch/arc/include/asm/asserts.h
 create mode 100644 arch/arc/include/asm/atomic-llsc.h
 create mode 100644 arch/arc/include/asm/atomic-spinlock.h
 create mode 100644 arch/arc/include/asm/atomic.h
 create mode 100644 arch/arc/include/asm/atomic64-arcv2.h
 create mode 100644 arch/arc/include/asm/barrier.h
 create mode 100644 arch/arc/include/asm/bitops.h
 create mode 100644 arch/arc/include/asm/bug.h
 create mode 100644 arch/arc/include/asm/cache.h
 create mode 100644 arch/arc/include/asm/cacheflush.h
 create mode 100644 arch/arc/include/asm/checksum.h
 create mode 100644 arch/arc/include/asm/cmpxchg.h
 create mode 100644 arch/arc/include/asm/current.h
 create mode 100644 arch/arc/include/asm/delay.h
 create mode 100644 arch/arc/include/asm/disasm.h
 create mode 100644 arch/arc/include/asm/dma.h
 create mode 100644 arch/arc/include/asm/dsp-impl.h
 create mode 100644 arch/arc/include/asm/dsp.h
 create mode 100644 arch/arc/include/asm/dwarf.h
 create mode 100644 arch/arc/include/asm/elf.h
 create mode 100644 arch/arc/include/asm/entry-arcv2.h
 create mode 100644 arch/arc/include/asm/entry-compact.h
 create mode 100644 arch/arc/include/asm/entry.h
 create mode 100644 arch/arc/include/asm/exec.h
 create mode 100644 arch/arc/include/asm/fb.h
 create mode 100644 arch/arc/include/asm/fpu.h
 create mode 100644 arch/arc/include/asm/futex.h
 create mode 100644 arch/arc/include/asm/highmem.h
 create mode 100644 arch/arc/include/asm/hugepage.h
 create mode 100644 arch/arc/include/asm/io.h
 create mode 100644 arch/arc/include/asm/irq.h
 create mode 100644 arch/arc/include/asm/irqflags-arcv2.h
 create mode 100644 arch/arc/include/asm/irqflags-compact.h
 create mode 100644 arch/arc/include/asm/irqflags.h
 create mode 100644 arch/arc/include/asm/jump_label.h
 create mode 100644 arch/arc/include/asm/kdebug.h
 create mode 100644 arch/arc/include/asm/kgdb.h
 create mode 100644 arch/arc/include/asm/kmap_types.h
 create mode 100644 arch/arc/include/asm/kprobes.h
 create mode 100644 arch/arc/include/asm/linkage.h
 create mode 100644 arch/arc/include/asm/mach_desc.h
 create mode 100644 arch/arc/include/asm/mmu-arcv2.h
 create mode 100644 arch/arc/include/asm/mmu.h
 create mode 100644 arch/arc/include/asm/mmu_context.h
 create mode 100644 arch/arc/include/asm/mmzone.h
 create mode 100644 arch/arc/include/asm/module.h
 create mode 100644 arch/arc/include/asm/page.h
 create mode 100644 arch/arc/include/asm/pci.h
 create mode 100644 arch/arc/include/asm/perf_event.h
 create mode 100644 arch/arc/include/asm/pgalloc.h
 create mode 100644 arch/arc/include/asm/pgtable-bits-arcv2.h
 create mode 100644 arch/arc/include/asm/pgtable-levels.h
 create mode 100644 arch/arc/include/asm/pgtable.h
 create mode 100644 arch/arc/include/asm/processor.h
 create mode 100644 arch/arc/include/asm/ptrace.h
 create mode 100644 arch/arc/include/asm/sections.h
 create mode 100644 arch/arc/include/asm/segment.h
 create mode 100644 arch/arc/include/asm/serial.h
 create mode 100644 arch/arc/include/asm/setup.h
 create mode 100644 arch/arc/include/asm/shmparam.h
 create mode 100644 arch/arc/include/asm/smp.h
 create mode 100644 arch/arc/include/asm/spinlock.h
 create mode 100644 arch/arc/include/asm/spinlock_types.h
 create mode 100644 arch/arc/include/asm/stacktrace.h
 create mode 100644 arch/arc/include/asm/string.h
 create mode 100644 arch/arc/include/asm/switch_to.h
 create mode 100644 arch/arc/include/asm/syscall.h
 create mode 100644 arch/arc/include/asm/syscalls.h
 create mode 100644 arch/arc/include/asm/thread_info.h
 create mode 100644 arch/arc/include/asm/timex.h
 create mode 100644 arch/arc/include/asm/tlb-mmu1.h
 create mode 100644 arch/arc/include/asm/tlb.h
 create mode 100644 arch/arc/include/asm/tlbflush.h
 create mode 100644 arch/arc/include/asm/uaccess.h
 create mode 100644 arch/arc/include/asm/unaligned.h
 create mode 100644 arch/arc/include/asm/unwind.h
 create mode 100644 arch/arc/include/asm/vermagic.h
 create mode 100644 arch/arc/include/asm/vmalloc.h
 create mode 100644 arch/arc/include/uapi/asm/Kbuild
 create mode 100644 arch/arc/include/uapi/asm/byteorder.h
 create mode 100644 arch/arc/include/uapi/asm/cachectl.h
 create mode 100644 arch/arc/include/uapi/asm/elf.h
 create mode 100644 arch/arc/include/uapi/asm/page.h
 create mode 100644 arch/arc/include/uapi/asm/ptrace.h
 create mode 100644 arch/arc/include/uapi/asm/setup.h
 create mode 100644 arch/arc/include/uapi/asm/sigcontext.h
 create mode 100644 arch/arc/include/uapi/asm/signal.h
 create mode 100644 arch/arc/include/uapi/asm/swab.h
 create mode 100644 arch/arc/include/uapi/asm/unistd.h
 create mode 100644 arch/arc/kernel/.gitignore
 create mode 100644 arch/arc/kernel/Makefile
 create mode 100644 arch/arc/kernel/arc_hostlink.c
 create mode 100644 arch/arc/kernel/arcksyms.c
 create mode 100644 arch/arc/kernel/asm-offsets.c
 create mode 100644 arch/arc/kernel/ctx_sw.c
 create mode 100644 arch/arc/kernel/ctx_sw_asm.S
 create mode 100644 arch/arc/kernel/devtree.c
 create mode 100644 arch/arc/kernel/disasm.c
 create mode 100644 arch/arc/kernel/entry-arcv2.S
 create mode 100644 arch/arc/kernel/entry-compact.S
 create mode 100644 arch/arc/kernel/entry.S
 create mode 100644 arch/arc/kernel/fpu.c
 create mode 100644 arch/arc/kernel/head.S
 create mode 100644 arch/arc/kernel/intc-arcv2.c
 create mode 100644 arch/arc/kernel/intc-compact.c
 create mode 100644 arch/arc/kernel/irq.c
 create mode 100644 arch/arc/kernel/jump_label.c
 create mode 100644 arch/arc/kernel/kgdb.c
 create mode 100644 arch/arc/kernel/kprobes.c
 create mode 100644 arch/arc/kernel/mcip.c
 create mode 100644 arch/arc/kernel/module.c
 create mode 100644 arch/arc/kernel/perf_event.c
 create mode 100644 arch/arc/kernel/process.c
 create mode 100644 arch/arc/kernel/ptrace.c
 create mode 100644 arch/arc/kernel/reset.c
 create mode 100644 arch/arc/kernel/setup.c
 create mode 100644 arch/arc/kernel/signal.c
 create mode 100644 arch/arc/kernel/smp.c
 create mode 100644 arch/arc/kernel/stacktrace.c
 create mode 100644 arch/arc/kernel/sys.c
 create mode 100644 arch/arc/kernel/traps.c
 create mode 100644 arch/arc/kernel/troubleshoot.c
 create mode 100644 arch/arc/kernel/unaligned.c
 create mode 100644 arch/arc/kernel/unwind.c
 create mode 100644 arch/arc/kernel/vmlinux.lds.S
 create mode 100644 arch/arc/lib/Makefile
 create mode 100644 arch/arc/lib/memcmp.S
 create mode 100644 arch/arc/lib/memcpy-700.S
 create mode 100644 arch/arc/lib/memcpy-archs-unaligned.S
 create mode 100644 arch/arc/lib/memcpy-archs.S
 create mode 100644 arch/arc/lib/memset-archs.S
 create mode 100644 arch/arc/lib/memset.S
 create mode 100644 arch/arc/lib/strchr-700.S
 create mode 100644 arch/arc/lib/strcmp-archs.S
 create mode 100644 arch/arc/lib/strcmp.S
 create mode 100644 arch/arc/lib/strcpy-700.S
 create mode 100644 arch/arc/lib/strlen.S
 create mode 100644 arch/arc/mm/Makefile
 create mode 100644 arch/arc/mm/cache.c
 create mode 100644 arch/arc/mm/dma.c
 create mode 100644 arch/arc/mm/extable.c
 create mode 100644 arch/arc/mm/fault.c
 create mode 100644 arch/arc/mm/highmem.c
 create mode 100644 arch/arc/mm/init.c
 create mode 100644 arch/arc/mm/ioremap.c
 create mode 100644 arch/arc/mm/mmap.c
 create mode 100644 arch/arc/mm/tlb.c
 create mode 100644 arch/arc/mm/tlbex.S
 create mode 100644 arch/arc/oprofile/Makefile
 create mode 100644 arch/arc/oprofile/common.c
 create mode 100644 arch/arc/plat-axs10x/Kconfig
 create mode 100644 arch/arc/plat-axs10x/Makefile
 create mode 100644 arch/arc/plat-axs10x/axs10x.c
 create mode 100644 arch/arc/plat-hsdk/Kconfig
 create mode 100644 arch/arc/plat-hsdk/Makefile
 create mode 100644 arch/arc/plat-hsdk/platform.c
 create mode 100644 arch/arc/plat-sim/Makefile
 create mode 100644 arch/arc/plat-sim/platform.c
 create mode 100644 arch/arc/plat-tb10x/Kconfig
 create mode 100644 arch/arc/plat-tb10x/Makefile
 create mode 100644 arch/arc/plat-tb10x/tb10x.c
 create mode 100644 arch/arm/ARMnn/Android.bp
 create mode 100644 arch/arm/ARMnn/Android.mk
 create mode 100644 arch/arm/ARMnn/BuildGuideAndroidNDK.md
 create mode 100644 arch/arm/ARMnn/BuildGuideCrossCompilation.md
 create mode 100644 arch/arm/ARMnn/CMakeLists.txt
 create mode 100644 arch/arm/ARMnn/ContributorGuide.md
 create mode 100644 arch/arm/ARMnn/InstallationViaAptRepository.md
 create mode 100644 arch/arm/ARMnn/LICENSE
 create mode 100644 arch/arm/ARMnn/README.md
 create mode 100644 arch/arm/ARMnn/SECURITY.md
 create mode 100644 arch/arm/ARMnn/cmake/AddDllCopyCommands.cmake
 create mode 100644 arch/arm/ARMnn/cmake/ArmnnConfig.cmake.in
 create mode 100644 arch/arm/ARMnn/cmake/ArmnnVersion.cmake
 create mode 100644 arch/arm/ARMnn/cmake/DelegateVersion.cmake
 create mode 100644 arch/arm/ARMnn/cmake/GlobalConfig.cmake
 create mode 100644 arch/arm/ARMnn/cmake/ParserVersion.cmake
 create mode 100644 arch/arm/ARMnn/cmake/Utils.cmake
 create mode 100644 arch/arm/ARMnn/delegate/BuildGuideNative.md
 create mode 100644 arch/arm/ARMnn/delegate/CMakeLists.txt
 create mode 100644 arch/arm/ARMnn/delegate/DelegateQuickStartGuide.md
 create mode 100644 arch/arm/ARMnn/delegate/README.md
 create mode 100644 arch/arm/ARMnn/delegate/cmake/Modules/ArmnnDelegateConfig.cmake.in
 create mode 100644 arch/arm/ARMnn/delegate/cmake/Modules/FindFlatbuffers.cmake
 create mode 100644 arch/arm/ARMnn/delegate/cmake/Modules/FindTfLite.cmake
 create mode 100644 arch/arm/ARMnn/delegate/cmake/Modules/FindTfLiteSrc.cmake
 create mode 100644 arch/arm/ARMnn/delegate/include/DelegateOptions.hpp
 create mode 100644 arch/arm/ARMnn/delegate/include/Version.hpp
 create mode 100644 arch/arm/ARMnn/delegate/include/armnn_delegate.hpp
 create mode 100644 arch/arm/ARMnn/delegate/python/test/conftest.py
 create mode 100644 arch/arm/ARMnn/delegate/python/test/pytest.ini
 create mode 100644 arch/arm/ARMnn/delegate/python/test/test_data/conv2d.tflite
 create mode 100644 arch/arm/ARMnn/delegate/python/test/test_data/fallback_model.tflite
 create mode 100644 arch/arm/ARMnn/delegate/python/test/test_data/fp32_model.tflite
 create mode 100644 arch/arm/ARMnn/delegate/python/test/test_data/mock_model.tflite
 create mode 100644 arch/arm/ARMnn/delegate/python/test/test_external_delegate.py
 create mode 100644 arch/arm/ARMnn/delegate/python/test/utils.py
 create mode 100644 arch/arm/ARMnn/delegate/src/Activation.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/ArgMinMax.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/BatchSpace.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Comparison.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Control.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Convolution.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/DelegateOptions.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/DelegateUtils.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/ElementwiseBinary.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/ElementwiseUnary.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Fill.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/FullyConnected.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Gather.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/LogicalBinary.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Lstm.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/MultiLayerFacade.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Normalization.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Pack.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Pad.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Pooling.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Prelu.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Quantization.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Redefine.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Reduce.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Resize.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Round.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Shape.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/SharedFunctions.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/SharedFunctions.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Slice.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Softmax.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/SpaceDepth.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Split.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Transpose.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/UnidirectionalSequenceLstm.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/Unpack.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/armnn_delegate.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/armnn_external_delegate.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ActivationTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ActivationTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ArgMinMaxTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ArgMinMaxTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ArmnnDelegateTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/BatchSpaceTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/BatchSpaceTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/CastTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/CastTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ComparisonTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ComparisonTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ControlTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ControlTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/Convolution2dTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/Convolution3dTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ConvolutionTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/DelegateOptionsTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/DelegateOptionsTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/DepthwiseConvolution2dTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ElementwiseBinaryTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ElementwiseBinaryTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ElementwiseUnaryTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ElementwiseUnaryTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/FillTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/FillTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/FullyConnectedTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/FullyConnectedTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/GatherTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/GatherTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/LogicalTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/LogicalTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/LstmTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/LstmTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/MirrorPadTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/NeonDelegateTests_NDK_Issue.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/NormalizationTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/NormalizationTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/PackTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/PackTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/PadTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/PadTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/Pooling2dTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/Pooling2dTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/PreluTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/PreluTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/QuantizationTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/QuantizationTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/RedefineTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ReduceTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ReduceTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ReshapeTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ResizeTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ResizeTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/RoundTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/RoundTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ShapeTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/ShapeTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/SliceTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/SliceTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/SoftmaxTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/SoftmaxTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/SpaceDepthTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/SpaceDepthTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/SplitTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/SplitTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/TestUtils.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/TestUtils.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/TransposeTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/TransposeTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/UnidirectionalSequenceLstmTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/UnidirectionalSequenceLstmTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/UnpackTest.cpp
 create mode 100644 arch/arm/ARMnn/delegate/src/test/UnpackTestHelper.hpp
 create mode 100644 arch/arm/ARMnn/docker/README.md
 create mode 100644 arch/arm/ARMnn/docker/armnn-android/Dockerfile
 create mode 100644 arch/arm/ARMnn/docker/armnn-android/docker-entrypoint.sh
 create mode 100644 arch/arm/ARMnn/docker/x86_64/Dockerfile
 create mode 100644 arch/arm/ARMnn/docs/01_00_quick_start.dox
 create mode 100644 arch/arm/ARMnn/docs/02_operator_list.dox
 create mode 100644 arch/arm/ARMnn/docs/03_build_guides.dox
 create mode 100644 arch/arm/ARMnn/docs/04_contributor.dox
 create mode 100644 arch/arm/ARMnn/docs/05_00_software_components.dox
 create mode 100644 arch/arm/ARMnn/docs/05_01_parsers.dox
 create mode 100644 arch/arm/ARMnn/docs/05_02_deserializer_serializer.dox
 create mode 100644 arch/arm/ARMnn/docs/05_03_delegate.dox
 create mode 100644 arch/arm/ARMnn/docs/Arm_NN_horizontal_blue.png
 create mode 100644 arch/arm/ARMnn/docs/Doxyfile
 create mode 100644 arch/arm/ARMnn/docs/FAQ.md
 create mode 100644 arch/arm/ARMnn/docs/PerformanceChart.png
 create mode 100644 arch/arm/ARMnn/docs/header.xhtml
 create mode 100644 arch/arm/ARMnn/docs/pyarmnn.png
 create mode 100644 arch/arm/ARMnn/docs/stylesheet.css
 create mode 100644 arch/arm/ARMnn/include/armnn/ArmNN.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/BackendHelper.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/BackendId.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/BackendOptions.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/BackendRegistry.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Conversion.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Deprecated.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Descriptors.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/DescriptorsFwd.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Exceptions.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/IAsyncExecutionCallback.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/ILayerVisitor.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/INetwork.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/IProfiler.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/IRuntime.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/IStrategy.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/IWorkingMemHandle.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/LayerSupport.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/LayerVisitorBase.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Logging.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/LstmParams.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/MemorySources.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/NetworkFwd.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Optional.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/QuantizedLstmParams.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/StrategyBase.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Tensor.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/TensorFwd.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Threadpool.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Types.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/TypesUtils.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Utils.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/Version.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/CMakeLists.txt
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/CpuTensorHandleFwd.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/DynamicBackend.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/IBackendContext.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/IBackendInternal.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/ICustomAllocator.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/ILayerSupport.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/IMemoryManager.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/IMemoryOptimizerStrategy.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/ITensorHandle.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/ITensorHandleFactory.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/IWorkload.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/MemCopyWorkload.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/OptimizationViews.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/SubgraphView.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/TensorHandle.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/TensorHandleFwd.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/Workload.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/WorkloadData.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/WorkloadFactory.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/WorkloadInfo.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/profiling/IBackendProfiling.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/backends/profiling/IBackendProfilingContext.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/profiling/ILocalPacketHandler.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/profiling/ISendTimelinePacket.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/utility/Assert.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/utility/IgnoreUnused.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/utility/NumericCast.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/utility/PolymorphicDowncast.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/utility/StringUtils.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/utility/Timer.hpp
 create mode 100644 arch/arm/ARMnn/include/armnn/utility/TransformIterator.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnDeserializer/IDeserializer.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnOnnxParser/IOnnxParser.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnOnnxParser/Version.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnSerializer/ISerializer.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/DataLayoutUtils.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/LayerTestResult.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/MemCopyTestImpl.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/MockBackend.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/MockMemoryManager.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/MockTensorHandle.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/PredicateResult.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/TensorCopyUtils.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/TensorHelpers.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTestUtils/WorkloadTestUtils.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTfLiteParser/ITfLiteParser.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnTfLiteParser/Version.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/CompatibleTypes.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/DataLayoutIndexed.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/Filesystem.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/FloatingPointComparison.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/FloatingPointConverter.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/Permute.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/QuantizeHelper.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/TContainer.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/TensorUtils.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/Threads.hpp
 create mode 100644 arch/arm/ARMnn/include/armnnUtils/Transpose.hpp
 create mode 100644 arch/arm/ARMnn/profiling/CMakeLists.txt
 create mode 100644 arch/arm/ARMnn/profiling/buildpipe.sh
 create mode 100644 arch/arm/ARMnn/profiling/common/include/Assert.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/CommandHandlerFunctor.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/CommandHandlerKey.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/CommandHandlerRegistry.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/CommonProfilingUtils.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/Constants.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/Conversion.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/DllExport.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/EncodeVersion.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/IProfilingGuidGenerator.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/IgnoreUnused.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/LabelsAndEventClasses.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/Logging.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/NetworkSockets.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/NumericCast.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/Packet.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/PacketVersionResolver.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/ProfilingException.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/ProfilingGuid.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/ProfilingGuidGenerator.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/SocketConnectionException.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/SwTrace.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/include/WindowsWrapper.hpp
 create mode 100644 arch/arm/ARMnn/profiling/common/src/CMakeLists.txt
 create mode 100644 arch/arm/ARMnn/profiling/common/src/CommandHandlerFunctor.cpp
 create mode 100644 arch/arm/ARMnn/profiling/common/src/CommandHandlerKey.cpp
 create mode 100644 arch/arm/ARMnn/profiling/common/src/CommandHandlerRegistry.cpp
 create mode 100644 arch/arm/ARMnn/profiling/common/src/CommonProfilingUtils.cpp
 create mode 100644 arch/arm/ARMnn/profiling/common/src/LabelsAndEventClasses.cpp
 create mode 100644 arch/arm/ARMnn/profiling/common/src/NetworkSockets.cpp
 create mode 100644 arch/arm/ARMnn/profiling/common/src/PacketVersionResolver.cpp
 create mode 100644 arch/arm/ARMnn/profiling/common/src/SwTrace.cpp
 create mode 100644 arch/arm/ARMnn/profiling/server/include/basePipeServer/BasePipeServer.hpp
 create mode 100644 arch/arm/ARMnn/profiling/server/include/basePipeServer/ConnectionHandler.hpp
 create mode 100644 arch/arm/ARMnn/profiling/server/include/timelineDecoder/ITimelineDecoder.hpp
 create mode 100644 arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineCaptureCommandHandler.hpp
 create mode 100644 arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineDecoder.hpp
 create mode 100644 arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineDirectoryCaptureCommandHandler.hpp
 create mode 100644 arch/arm/ARMnn/profiling/server/src/basePipeServer/BasePipeServer.cpp
 create mode 100644 arch/arm/ARMnn/profiling/server/src/basePipeServer/CMakeLists.txt
 create mode 100644 arch/arm/ARMnn/profiling/server/src/basePipeServer/ConnectionHandler.cpp
 create mode 100644 arch/arm/ARMnn/profiling/server/src/basePipeServer/tests/BasePipeServerTests.cpp
 create mode 100644 arch/arm/ARMnn/profiling/server/src/timelineDecoder/CMakeLists.txt
 create mode 100644 arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineCaptureCommandHandler.cpp
 create mode 100644 arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineDecoder.cpp
 create mode 100644 arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineDirectoryCaptureCommandHandler.cpp
 create mode 100644 arch/arm/ARMnn/profiling/server/src/timelineDecoder/tests/TimelineTests.cpp
 create mode 100644 arch/arm/ARMnn/profiling/toolchain-x86-ubuntu-mingw64.cmake
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/.gitignore
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/CMakeLists.txt
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/LICENSE
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/README.md
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/conftest.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/docs_conf/config.mako
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/common/audio_capture.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/common/cv_utils.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/common/mfcc.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/common/network_executor.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/common/utils.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/image_classification/README.md
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/image_classification/example_utils.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/image_classification/onnx_mobilenetv2.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/image_classification/requirements.txt
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/image_classification/tflite_mobilenetv1_quantized.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/README.MD
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/__init__.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/audio_utils.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/requirements.txt
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/run_audio_classification.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/object_detection/README.md
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/object_detection/requirements.txt
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/object_detection/run_video_file.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/examples/object_detection/run_video_stream.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/pylintconfig
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/setup.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/swig_generate.py
 create mode 100644 arch/arm/ARMnn/python/pyarmnn/tox.ini
 create mode 100644 arch/arm/Kbuild
 create mode 100644 arch/arm/Kconfig
 create mode 100644 arch/arm/Kconfig-nommu
 create mode 100644 arch/arm/Kconfig.assembler
 create mode 100644 arch/arm/Kconfig.debug
 create mode 100644 arch/arm/Makefile

diff --git a/arch/.gitignore b/arch/.gitignore
new file mode 100644
index 0000000000..756c19c34f
--- /dev/null
+++ b/arch/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/i386/
+/x86_64/
diff --git a/arch/Kconfig b/arch/Kconfig
new file mode 100644
index 0000000000..678a80713b
--- /dev/null
+++ b/arch/Kconfig
@@ -0,0 +1,1329 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# General architecture dependent options
+#
+
+#
+# Note: arch/$(SRCARCH)/Kconfig needs to be included first so that it can
+# override the default values in this file.
+#
+source "arch/$(SRCARCH)/Kconfig"
+
+menu "General architecture-dependent options"
+
+config CRASH_CORE
+	bool
+
+config KEXEC_CORE
+	select CRASH_CORE
+	bool
+
+config KEXEC_ELF
+	bool
+
+config HAVE_IMA_KEXEC
+	bool
+
+config SET_FS
+	bool
+
+config HOTPLUG_SMT
+	bool
+
+config GENERIC_ENTRY
+       bool
+
+config KPROBES
+	bool "Kprobes"
+	depends on MODULES
+	depends on HAVE_KPROBES
+	select KALLSYMS
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.  register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+
+config JUMP_LABEL
+	bool "Optimize very unlikely/likely branches"
+	depends on HAVE_ARCH_JUMP_LABEL
+	depends on CC_HAS_ASM_GOTO
+	help
+	 This option enables a transparent branch optimization that
+	 makes certain almost-always-true or almost-always-false branch
+	 conditions even cheaper to execute within the kernel.
+
+	 Certain performance-sensitive kernel code, such as trace points,
+	 scheduler functionality, networking code and KVM have such
+	 branches and include support for this optimization technique.
+
+	 If it is detected that the compiler has support for "asm goto",
+	 the kernel will compile such branches with just a nop
+	 instruction. When the condition flag is toggled to true, the
+	 nop will be converted to a jump instruction to execute the
+	 conditional block of instructions.
+
+	 This technique lowers overhead and stress on the branch prediction
+	 of the processor and generally makes the kernel faster. The update
+	 of the condition is slower, but those are always very rare.
+
+	 ( On 32-bit x86, the necessary options added to the compiler
+	   flags may increase the size of the kernel slightly. )
+
+config STATIC_KEYS_SELFTEST
+	bool "Static key selftest"
+	depends on JUMP_LABEL
+	help
+	  Boot time self-test of the branch patching code.
+
+config STATIC_CALL_SELFTEST
+	bool "Static call selftest"
+	depends on HAVE_STATIC_CALL
+	help
+	  Boot time self-test of the call patching code.
+
+config OPTPROBES
+	def_bool y
+	depends on KPROBES && HAVE_OPTPROBES
+	select TASKS_RCU if PREEMPTION
+
+config KPROBES_ON_FTRACE
+	def_bool y
+	depends on KPROBES && HAVE_KPROBES_ON_FTRACE
+	depends on DYNAMIC_FTRACE_WITH_REGS
+	help
+	 If function tracer is enabled and the arch supports full
+	 passing of pt_regs to function tracing, then kprobes can
+	 optimize on top of function tracing.
+
+config UPROBES
+	def_bool n
+	depends on ARCH_SUPPORTS_UPROBES
+	help
+	  Uprobes is the user-space counterpart to kprobes: they
+	  enable instrumentation applications (such as 'perf probe')
+	  to establish unintrusive probes in user-space binaries and
+	  libraries, by executing handler functions when the probes
+	  are hit by user-space applications.
+
+	  ( These probes come in the form of single-byte breakpoints,
+	    managed by the kernel and kept transparent to the probed
+	    application. )
+
+config HAVE_64BIT_ALIGNED_ACCESS
+	def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
+	help
+	  Some architectures require 64 bit accesses to be 64 bit
+	  aligned, which also requires structs containing 64 bit values
+	  to be 64 bit aligned too. This includes some 32 bit
+	  architectures which can do 64 bit accesses, as well as 64 bit
+	  architectures without unaligned access.
+
+	  This symbol should be selected by an architecture if 64 bit
+	  accesses are required to be 64 bit aligned in this way even
+	  though it is not a 64 bit architecture.
+
+	  See Documentation/core-api/unaligned-memory-access.rst for
+	  more information on the topic of unaligned memory accesses.
+
+config HAVE_EFFICIENT_UNALIGNED_ACCESS
+	bool
+	help
+	  Some architectures are unable to perform unaligned accesses
+	  without the use of get_unaligned/put_unaligned. Others are
+	  unable to perform such accesses efficiently (e.g. trap on
+	  unaligned access and require fixing it up in the exception
+	  handler.)
+
+	  This symbol should be selected by an architecture if it can
+	  perform unaligned accesses efficiently to allow different
+	  code paths to be selected for these cases. Some network
+	  drivers, for example, could opt to not fix up alignment
+	  problems with received packets if doing so would not help
+	  much.
+
+	  See Documentation/core-api/unaligned-memory-access.rst for more
+	  information on the topic of unaligned memory accesses.
+
+config ARCH_USE_BUILTIN_BSWAP
+	bool
+	help
+	 Modern versions of GCC (since 4.4) have builtin functions
+	 for handling byte-swapping. Using these, instead of the old
+	 inline assembler that the architecture code provides in the
+	 __arch_bswapXX() macros, allows the compiler to see what's
+	 happening and offers more opportunity for optimisation. In
+	 particular, the compiler will be able to combine the byteswap
+	 with a nearby load or store and use load-and-swap or
+	 store-and-swap instructions if the architecture has them. It
+	 should almost *never* result in code which is worse than the
+	 hand-coded assembler in <asm/swab.h>.  But just in case it
+	 does, the use of the builtins is optional.
+
+	 Any architecture with load-and-swap or store-and-swap
+	 instructions should set this. And it shouldn't hurt to set it
+	 on architectures that don't have such instructions.
+
+config KRETPROBES
+	def_bool y
+	depends on KPROBES && HAVE_KRETPROBES
+
+config USER_RETURN_NOTIFIER
+	bool
+	depends on HAVE_USER_RETURN_NOTIFIER
+	help
+	  Provide a kernel-internal notification when a cpu is about to
+	  switch to user mode.
+
+config HAVE_IOREMAP_PROT
+	bool
+
+config HAVE_KPROBES
+	bool
+
+config HAVE_KRETPROBES
+	bool
+
+config HAVE_OPTPROBES
+	bool
+
+config HAVE_KPROBES_ON_FTRACE
+	bool
+
+config ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
+	bool
+	help
+	  Since kretprobes modifies return address on the stack, the
+	  stacktrace may see the kretprobe trampoline address instead
+	  of correct one. If the architecture stacktrace code and
+	  unwinder can adjust such entries, select this configuration.
+
+config HAVE_FUNCTION_ERROR_INJECTION
+	bool
+
+config HAVE_NMI
+	bool
+
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+
+#
+# An arch should select this if it provides all these things:
+#
+#	task_pt_regs()		in asm/processor.h or asm/ptrace.h
+#	arch_has_single_step()	if there is hardware single-step support
+#	arch_has_block_step()	if there is hardware block-step support
+#	asm/syscall.h		supplying asm-generic/syscall.h interface
+#	linux/regset.h		user_regset interfaces
+#	CORE_DUMP_USE_REGSET	#define'd in linux/elf.h
+#	TIF_SYSCALL_TRACE	calls tracehook_report_syscall_{entry,exit}
+#	TIF_NOTIFY_RESUME	calls tracehook_notify_resume()
+#	signal delivery		calls tracehook_signal_handler()
+#
+config HAVE_ARCH_TRACEHOOK
+	bool
+
+config HAVE_DMA_CONTIGUOUS
+	bool
+
+config GENERIC_SMP_IDLE_THREAD
+	bool
+
+config GENERIC_IDLE_POLL_SETUP
+	bool
+
+config ARCH_HAS_FORTIFY_SOURCE
+	bool
+	help
+	  An architecture should select this when it can successfully
+	  build and run with CONFIG_FORTIFY_SOURCE.
+
+#
+# Select if the arch provides a historic keepinit alias for the retain_initrd
+# command line option
+#
+config ARCH_HAS_KEEPINITRD
+	bool
+
+# Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h
+config ARCH_HAS_SET_MEMORY
+	bool
+
+# Select if arch has all set_direct_map_invalid/default() functions
+config ARCH_HAS_SET_DIRECT_MAP
+	bool
+
+#
+# Select if the architecture provides the arch_dma_set_uncached symbol to
+# either provide an uncached segment alias for a DMA allocation, or
+# to remap the page tables in place.
+#
+config ARCH_HAS_DMA_SET_UNCACHED
+	bool
+
+#
+# Select if the architectures provides the arch_dma_clear_uncached symbol
+# to undo an in-place page table remap for uncached access.
+#
+config ARCH_HAS_DMA_CLEAR_UNCACHED
+	bool
+
+# Select if arch init_task must go in the __init_task_data section
+config ARCH_TASK_STRUCT_ON_STACK
+	bool
+
+# Select if arch has its private alloc_task_struct() function
+config ARCH_TASK_STRUCT_ALLOCATOR
+	bool
+
+config HAVE_ARCH_THREAD_STRUCT_WHITELIST
+	bool
+	depends on !ARCH_TASK_STRUCT_ALLOCATOR
+	help
+	  An architecture should select this to provide hardened usercopy
+	  knowledge about what region of the thread_struct should be
+	  whitelisted for copying to userspace. Normally this is only the
+	  FPU registers. Specifically, arch_thread_struct_whitelist()
+	  should be implemented. Without this, the entire thread_struct
+	  field in task_struct will be left whitelisted.
+
+# Select if arch has its private alloc_thread_stack() function
+config ARCH_THREAD_STACK_ALLOCATOR
+	bool
+
+# Select if arch wants to size task_struct dynamically via arch_task_struct_size:
+config ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	bool
+
+config ARCH_WANTS_NO_INSTR
+	bool
+	help
+	  An architecture should select this if the noinstr macro is being used on
+	  functions to denote that the toolchain should avoid instrumenting such
+	  functions and is required for correctness.
+
+config ARCH_32BIT_OFF_T
+	bool
+	depends on !64BIT
+	help
+	  All new 32-bit architectures should have 64-bit off_t type on
+	  userspace side which corresponds to the loff_t kernel type. This
+	  is the requirement for modern ABIs. Some existing architectures
+	  still support 32-bit off_t. This option is enabled for all such
+	  architectures explicitly.
+
+# Selected by 64 bit architectures which have a 32 bit f_tinode in struct ustat
+config ARCH_32BIT_USTAT_F_TINODE
+	bool
+
+config HAVE_ASM_MODVERSIONS
+	bool
+	help
+	  This symbol should be selected by an architecture if it provides
+	  <asm/asm-prototypes.h> to support the module versioning for symbols
+	  exported from assembly code.
+
+config HAVE_REGS_AND_STACK_ACCESS_API
+	bool
+	help
+	  This symbol should be selected by an architecture if it supports
+	  the API needed to access registers and stack entries from pt_regs,
+	  declared in asm/ptrace.h
+	  For example the kprobes-based event tracer needs this API.
+
+config HAVE_RSEQ
+	bool
+	depends on HAVE_REGS_AND_STACK_ACCESS_API
+	help
+	  This symbol should be selected by an architecture if it
+	  supports an implementation of restartable sequences.
+
+config HAVE_FUNCTION_ARG_ACCESS_API
+	bool
+	help
+	  This symbol should be selected by an architecture if it supports
+	  the API needed to access function arguments from pt_regs,
+	  declared in asm/ptrace.h
+
+config HAVE_HW_BREAKPOINT
+	bool
+	depends on PERF_EVENTS
+
+config HAVE_MIXED_BREAKPOINTS_REGS
+	bool
+	depends on HAVE_HW_BREAKPOINT
+	help
+	  Depending on the arch implementation of hardware breakpoints,
+	  some of them have separate registers for data and instruction
+	  breakpoints addresses, others have mixed registers to store
+	  them but define the access type in a control register.
+	  Select this option if your arch implements breakpoints under the
+	  latter fashion.
+
+config HAVE_USER_RETURN_NOTIFIER
+	bool
+
+config HAVE_PERF_EVENTS_NMI
+	bool
+	help
+	  System hardware can generate an NMI using the perf event
+	  subsystem.  Also has support for calculating CPU cycle events
+	  to determine how many clock cycles in a given period.
+
+config HAVE_HARDLOCKUP_DETECTOR_PERF
+	bool
+	depends on HAVE_PERF_EVENTS_NMI
+	help
+	  The arch chooses to use the generic perf-NMI-based hardlockup
+	  detector. Must define HAVE_PERF_EVENTS_NMI.
+
+config HAVE_NMI_WATCHDOG
+	depends on HAVE_NMI
+	bool
+	help
+	  The arch provides a low level NMI watchdog. It provides
+	  asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
+
+config HAVE_HARDLOCKUP_DETECTOR_ARCH
+	bool
+	select HAVE_NMI_WATCHDOG
+	help
+	  The arch chooses to provide its own hardlockup detector, which is
+	  a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
+	  interfaces and parameters provided by hardlockup detector subsystem.
+
+config HAVE_PERF_REGS
+	bool
+	help
+	  Support selective register dumps for perf events. This includes
+	  bit-mapping of each registers and a unique architecture id.
+
+config HAVE_PERF_USER_STACK_DUMP
+	bool
+	help
+	  Support user stack dumps for perf event samples. This needs
+	  access to the user stack pointer which is not unified across
+	  architectures.
+
+config HAVE_ARCH_JUMP_LABEL
+	bool
+
+config HAVE_ARCH_JUMP_LABEL_RELATIVE
+	bool
+
+config MMU_GATHER_TABLE_FREE
+	bool
+
+config MMU_GATHER_RCU_TABLE_FREE
+	bool
+	select MMU_GATHER_TABLE_FREE
+
+config MMU_GATHER_PAGE_SIZE
+	bool
+
+config MMU_GATHER_NO_RANGE
+	bool
+
+config MMU_GATHER_NO_GATHER
+	bool
+	depends on MMU_GATHER_TABLE_FREE
+
+config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+	bool
+	help
+	  Temporary select until all architectures can be converted to have
+	  irqs disabled over activate_mm. Architectures that do IPI based TLB
+	  shootdowns should enable this.
+
+config ARCH_HAVE_NMI_SAFE_CMPXCHG
+	bool
+
+config HAVE_ALIGNED_STRUCT_PAGE
+	bool
+	help
+	  This makes sure that struct pages are double word aligned and that
+	  e.g. the SLUB allocator can perform double word atomic operations
+	  on a struct page for better performance. However selecting this
+	  might increase the size of a struct page by a word.
+
+config HAVE_CMPXCHG_LOCAL
+	bool
+
+config HAVE_CMPXCHG_DOUBLE
+	bool
+
+config ARCH_WEAK_RELEASE_ACQUIRE
+	bool
+
+config ARCH_WANT_IPC_PARSE_VERSION
+	bool
+
+config ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+	bool
+
+config ARCH_WANT_OLD_COMPAT_IPC
+	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+	bool
+
+config HAVE_ARCH_SECCOMP
+	bool
+	help
+	  An arch should select this symbol to support seccomp mode 1 (the fixed
+	  syscall policy), and must provide an overrides for __NR_seccomp_sigreturn,
+	  and compat syscalls if the asm-generic/seccomp.h defaults need adjustment:
+	  - __NR_seccomp_read_32
+	  - __NR_seccomp_write_32
+	  - __NR_seccomp_exit_32
+	  - __NR_seccomp_sigreturn_32
+
+config HAVE_ARCH_SECCOMP_FILTER
+	bool
+	select HAVE_ARCH_SECCOMP
+	help
+	  An arch should select this symbol if it provides all of these things:
+	  - all the requirements for HAVE_ARCH_SECCOMP
+	  - syscall_get_arch()
+	  - syscall_get_arguments()
+	  - syscall_rollback()
+	  - syscall_set_return_value()
+	  - SIGSYS siginfo_t support
+	  - secure_computing is called from a ptrace_event()-safe context
+	  - secure_computing return value is checked and a return value of -1
+	    results in the system call being skipped immediately.
+	  - seccomp syscall wired up
+	  - if !HAVE_SPARSE_SYSCALL_NR, have SECCOMP_ARCH_NATIVE,
+	    SECCOMP_ARCH_NATIVE_NR, SECCOMP_ARCH_NATIVE_NAME defined. If
+	    COMPAT is supported, have the SECCOMP_ARCH_COMPAT* defines too.
+
+config SECCOMP
+	prompt "Enable seccomp to safely execute untrusted bytecode"
+	def_bool y
+	depends on HAVE_ARCH_SECCOMP
+	help
+	  This kernel feature is useful for number crunching applications
+	  that may need to handle untrusted bytecode during their
+	  execution. By using pipes or other transports made available
+	  to the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in their
+	  own address space using seccomp. Once seccomp is enabled via
+	  prctl(PR_SET_SECCOMP) or the seccomp() syscall, it cannot be
+	  disabled and the task is only allowed to execute a few safe
+	  syscalls defined by each seccomp mode.
+
+	  If unsure, say Y.
+
+config SECCOMP_FILTER
+	def_bool y
+	depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET
+	help
+	  Enable tasks to build secure computing environments defined
+	  in terms of Berkeley Packet Filter programs which implement
+	  task-defined system call filtering polices.
+
+	  See Documentation/userspace-api/seccomp_filter.rst for details.
+
+config SECCOMP_CACHE_DEBUG
+	bool "Show seccomp filter cache status in /proc/pid/seccomp_cache"
+	depends on SECCOMP_FILTER && !HAVE_SPARSE_SYSCALL_NR
+	depends on PROC_FS
+	help
+	  This enables the /proc/pid/seccomp_cache interface to monitor
+	  seccomp cache data. The file format is subject to change. Reading
+	  the file requires CAP_SYS_ADMIN.
+
+	  This option is for debugging only. Enabling presents the risk that
+	  an adversary may be able to infer the seccomp filter logic.
+
+	  If unsure, say N.
+
+config HAVE_ARCH_STACKLEAK
+	bool
+	help
+	  An architecture should select this if it has the code which
+	  fills the used part of the kernel stack with the STACKLEAK_POISON
+	  value before returning from system calls.
+
+config HAVE_STACKPROTECTOR
+	bool
+	help
+	  An arch should select this symbol if:
+	  - it has implemented a stack canary (e.g. __stack_chk_guard)
+
+config STACKPROTECTOR
+	bool "Stack Protector buffer overflow detection"
+	depends on HAVE_STACKPROTECTOR
+	depends on $(cc-option,-fstack-protector)
+	default y
+	help
+	  This option turns on the "stack-protector" GCC feature. This
+	  feature puts, at the beginning of functions, a canary value on
+	  the stack just before the return address, and validates
+	  the value just before actually returning.  Stack based buffer
+	  overflows (that need to overwrite this return address) now also
+	  overwrite the canary, which gets detected and the attack is then
+	  neutralized via a kernel panic.
+
+	  Functions will have the stack-protector canary logic added if they
+	  have an 8-byte or larger character array on the stack.
+
+	  This feature requires gcc version 4.2 or above, or a distribution
+	  gcc with the feature backported ("-fstack-protector").
+
+	  On an x86 "defconfig" build, this feature adds canary checks to
+	  about 3% of all kernel functions, which increases kernel code size
+	  by about 0.3%.
+
+config STACKPROTECTOR_STRONG
+	bool "Strong Stack Protector"
+	depends on STACKPROTECTOR
+	depends on $(cc-option,-fstack-protector-strong)
+	default y
+	help
+	  Functions will have the stack-protector canary logic added in any
+	  of the following conditions:
+
+	  - local variable's address used as part of the right hand side of an
+	    assignment or function argument
+	  - local variable is an array (or union containing an array),
+	    regardless of array type or length
+	  - uses register local variables
+
+	  This feature requires gcc version 4.9 or above, or a distribution
+	  gcc with the feature backported ("-fstack-protector-strong").
+
+	  On an x86 "defconfig" build, this feature adds canary checks to
+	  about 20% of all kernel functions, which increases the kernel code
+	  size by about 2%.
+
+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+	bool
+	help
+	  An architecture should select this if it supports Clang's Shadow
+	  Call Stack and implements runtime support for shadow stack
+	  switching.
+
+config SHADOW_CALL_STACK
+	bool "Clang Shadow Call Stack"
+	depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+	depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+	help
+	  This option enables Clang's Shadow Call Stack, which uses a
+	  shadow stack to protect function return addresses from being
+	  overwritten by an attacker. More information can be found in
+	  Clang's documentation:
+
+	    https://clang.llvm.org/docs/ShadowCallStack.html
+
+	  Note that security guarantees in the kernel differ from the
+	  ones documented for user space. The kernel must store addresses
+	  of shadow stacks in memory, which means an attacker capable of
+	  reading and writing arbitrary memory may be able to locate them
+	  and hijack control flow by modifying the stacks.
+
+config LTO
+	bool
+	help
+	  Selected if the kernel will be built using the compiler's LTO feature.
+
+config LTO_CLANG
+	bool
+	select LTO
+	help
+	  Selected if the kernel will be built using Clang's LTO feature.
+
+config ARCH_SUPPORTS_LTO_CLANG
+	bool
+	help
+	  An architecture should select this option if it supports:
+	  - compiling with Clang,
+	  - compiling inline assembly with Clang's integrated assembler,
+	  - and linking with LLD.
+
+config ARCH_SUPPORTS_LTO_CLANG_THIN
+	bool
+	help
+	  An architecture should select this option if it can support Clang's
+	  ThinLTO mode.
+
+config HAS_LTO_CLANG
+	def_bool y
+	depends on CC_IS_CLANG && LD_IS_LLD && AS_IS_LLVM
+	depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm)
+	depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm)
+	depends on ARCH_SUPPORTS_LTO_CLANG
+	depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT
+	depends on !KASAN || KASAN_HW_TAGS
+	depends on !GCOV_KERNEL
+	help
+	  The compiler and Kconfig options support building with Clang's
+	  LTO.
+
+choice
+	prompt "Link Time Optimization (LTO)"
+	default LTO_NONE
+	help
+	  This option enables Link Time Optimization (LTO), which allows the
+	  compiler to optimize binaries globally.
+
+	  If unsure, select LTO_NONE. Note that LTO is very resource-intensive
+	  so it's disabled by default.
+
+config LTO_NONE
+	bool "None"
+	help
+	  Build the kernel normally, without Link Time Optimization (LTO).
+
+config LTO_CLANG_FULL
+	bool "Clang Full LTO (EXPERIMENTAL)"
+	depends on HAS_LTO_CLANG
+	depends on !COMPILE_TEST
+	select LTO_CLANG
+	help
+          This option enables Clang's full Link Time Optimization (LTO), which
+          allows the compiler to optimize the kernel globally. If you enable
+          this option, the compiler generates LLVM bitcode instead of ELF
+          object files, and the actual compilation from bitcode happens at
+          the LTO link step, which may take several minutes depending on the
+          kernel configuration. More information can be found from LLVM's
+          documentation:
+
+	    https://llvm.org/docs/LinkTimeOptimization.html
+
+	  During link time, this option can use a large amount of RAM, and
+	  may take much longer than the ThinLTO option.
+
+config LTO_CLANG_THIN
+	bool "Clang ThinLTO (EXPERIMENTAL)"
+	depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
+	select LTO_CLANG
+	help
+	  This option enables Clang's ThinLTO, which allows for parallel
+	  optimization and faster incremental compiles compared to the
+	  CONFIG_LTO_CLANG_FULL option. More information can be found
+	  from Clang's documentation:
+
+	    https://clang.llvm.org/docs/ThinLTO.html
+
+	  If unsure, say Y.
+endchoice
+
+config ARCH_SUPPORTS_CFI_CLANG
+	bool
+	help
+	  An architecture should select this option if it can support Clang's
+	  Control-Flow Integrity (CFI) checking.
+
+config CFI_CLANG
+	bool "Use Clang's Control Flow Integrity (CFI)"
+	depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG
+	# Clang >= 12:
+	# - https://bugs.llvm.org/show_bug.cgi?id=46258
+	# - https://bugs.llvm.org/show_bug.cgi?id=47479
+	depends on CLANG_VERSION >= 120000
+	select KALLSYMS
+	help
+	  This option enables Clang’s forward-edge Control Flow Integrity
+	  (CFI) checking, where the compiler injects a runtime check to each
+	  indirect function call to ensure the target is a valid function with
+	  the correct static type. This restricts possible call targets and
+	  makes it more difficult for an attacker to exploit bugs that allow
+	  the modification of stored function pointers. More information can be
+	  found from Clang's documentation:
+
+	    https://clang.llvm.org/docs/ControlFlowIntegrity.html
+
+config CFI_CLANG_SHADOW
+	bool "Use CFI shadow to speed up cross-module checks"
+	default y
+	depends on CFI_CLANG && MODULES
+	help
+	  If you select this option, the kernel builds a fast look-up table of
+	  CFI check functions in loaded modules to reduce performance overhead.
+
+	  If unsure, say Y.
+
+config CFI_PERMISSIVE
+	bool "Use CFI in permissive mode"
+	depends on CFI_CLANG
+	help
+	  When selected, Control Flow Integrity (CFI) violations result in a
+	  warning instead of a kernel panic. This option should only be used
+	  for finding indirect call type mismatches during development.
+
+	  If unsure, say N.
+
+config HAVE_ARCH_WITHIN_STACK_FRAMES
+	bool
+	help
+	  An architecture should select this if it can walk the kernel stack
+	  frames to determine if an object is part of either the arguments
+	  or local variables (i.e. that it excludes saved return addresses,
+	  and similar) by implementing an inline arch_within_stack_frames(),
+	  which is used by CONFIG_HARDENED_USERCOPY.
+
+config HAVE_CONTEXT_TRACKING
+	bool
+	help
+	  Provide kernel/user boundaries probes necessary for subsystems
+	  that need it, such as userspace RCU extended quiescent state.
+	  Syscalls need to be wrapped inside user_exit()-user_enter(), either
+	  optimized behind static key or through the slow path using TIF_NOHZ
+	  flag. Exceptions handlers must be wrapped as well. Irqs are already
+	  protected inside rcu_irq_enter/rcu_irq_exit() but preemption or signal
+	  handling on irq exit still need to be protected.
+
+config HAVE_CONTEXT_TRACKING_OFFSTACK
+	bool
+	help
+	  Architecture neither relies on exception_enter()/exception_exit()
+	  nor on schedule_user(). Also preempt_schedule_notrace() and
+	  preempt_schedule_irq() can't be called in a preemptible section
+	  while context tracking is CONTEXT_USER. This feature reflects a sane
+	  entry implementation where the following requirements are met on
+	  critical entry code, ie: before user_exit() or after user_enter():
+
+	  - Critical entry code isn't preemptible (or better yet:
+	    not interruptible).
+	  - No use of RCU read side critical sections, unless rcu_nmi_enter()
+	    got called.
+	  - No use of instrumentation, unless instrumentation_begin() got
+	    called.
+
+config HAVE_TIF_NOHZ
+	bool
+	help
+	  Arch relies on TIF_NOHZ and syscall slow path to implement context
+	  tracking calls to user_enter()/user_exit().
+
+config HAVE_VIRT_CPU_ACCOUNTING
+	bool
+
+config HAVE_VIRT_CPU_ACCOUNTING_IDLE
+	bool
+	help
+	  Architecture has its own way to account idle CPU time and therefore
+	  doesn't implement vtime_account_idle().
+
+config ARCH_HAS_SCALED_CPUTIME
+	bool
+
+config HAVE_VIRT_CPU_ACCOUNTING_GEN
+	bool
+	default y if 64BIT
+	help
+	  With VIRT_CPU_ACCOUNTING_GEN, cputime_t becomes 64-bit.
+	  Before enabling this option, arch code must be audited
+	  to ensure there are no races in concurrent read/write of
+	  cputime_t. For example, reading/writing 64-bit cputime_t on
+	  some 32-bit arches may require multiple accesses, so proper
+	  locking is needed to protect against concurrent accesses.
+
+config HAVE_IRQ_TIME_ACCOUNTING
+	bool
+	help
+	  Archs need to ensure they use a high enough resolution clock to
+	  support irq time accounting and then call enable_sched_clock_irqtime().
+
+config HAVE_MOVE_PUD
+	bool
+	help
+	  Architectures that select this are able to move page tables at the
+	  PUD level. If there are only 3 page table levels, the move effectively
+	  happens at the PGD level.
+
+config HAVE_MOVE_PMD
+	bool
+	help
+	  Archs that select this are able to move page tables at the PMD level.
+
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	bool
+
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+	bool
+
+config HAVE_ARCH_HUGE_VMAP
+	bool
+
+#
+#  Archs that select this would be capable of PMD-sized vmaps (i.e.,
+#  arch_vmap_pmd_supported() returns true), and they must make no assumptions
+#  that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag
+#  can be used to prohibit arch-specific allocations from using hugepages to
+#  help with this (e.g., modules may require it).
+#
+config HAVE_ARCH_HUGE_VMALLOC
+	depends on HAVE_ARCH_HUGE_VMAP
+	bool
+
+config ARCH_WANT_HUGE_PMD_SHARE
+	bool
+
+config HAVE_ARCH_SOFT_DIRTY
+	bool
+
+config HAVE_MOD_ARCH_SPECIFIC
+	bool
+	help
+	  The arch uses struct mod_arch_specific to store data.  Many arches
+	  just need a simple module loader without arch specific data - those
+	  should not enable this.
+
+config MODULES_USE_ELF_RELA
+	bool
+	help
+	  Modules only use ELF RELA relocations.  Modules with ELF REL
+	  relocations will give an error.
+
+config MODULES_USE_ELF_REL
+	bool
+	help
+	  Modules only use ELF REL relocations.  Modules with ELF RELA
+	  relocations will give an error.
+
+config HAVE_IRQ_EXIT_ON_IRQ_STACK
+	bool
+	help
+	  Architecture doesn't only execute the irq handler on the irq stack
+	  but also irq_exit(). This way we can process softirqs on this irq
+	  stack instead of switching to a new one when we call __do_softirq()
+	  in the end of an hardirq.
+	  This spares a stack switch and improves cache usage on softirq
+	  processing.
+
+config HAVE_SOFTIRQ_ON_OWN_STACK
+	bool
+	help
+	  Architecture provides a function to run __do_softirq() on a
+	  separate stack.
+
+config PGTABLE_LEVELS
+	int
+	default 2
+
+config ARCH_HAS_ELF_RANDOMIZE
+	bool
+	help
+	  An architecture supports choosing randomized locations for
+	  stack, mmap, brk, and ET_DYN. Defined functions:
+	  - arch_mmap_rnd()
+	  - arch_randomize_brk()
+
+config HAVE_ARCH_MMAP_RND_BITS
+	bool
+	help
+	  An arch should select this symbol if it supports setting a variable
+	  number of bits for use in establishing the base address for mmap
+	  allocations, has MMU enabled and provides values for both:
+	  - ARCH_MMAP_RND_BITS_MIN
+	  - ARCH_MMAP_RND_BITS_MAX
+
+config HAVE_EXIT_THREAD
+	bool
+	help
+	  An architecture implements exit_thread.
+
+config ARCH_MMAP_RND_BITS_MIN
+	int
+
+config ARCH_MMAP_RND_BITS_MAX
+	int
+
+config ARCH_MMAP_RND_BITS_DEFAULT
+	int
+
+config ARCH_MMAP_RND_BITS
+	int "Number of bits to use for ASLR of mmap base address" if EXPERT
+	range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
+	default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
+	default ARCH_MMAP_RND_BITS_MIN
+	depends on HAVE_ARCH_MMAP_RND_BITS
+	help
+	  This value can be used to select the number of bits to use to
+	  determine the random offset to the base address of vma regions
+	  resulting from mmap allocations. This value will be bounded
+	  by the architecture's minimum and maximum supported values.
+
+	  This value can be changed after boot using the
+	  /proc/sys/vm/mmap_rnd_bits tunable
+
+config HAVE_ARCH_MMAP_RND_COMPAT_BITS
+	bool
+	help
+	  An arch should select this symbol if it supports running applications
+	  in compatibility mode, supports setting a variable number of bits for
+	  use in establishing the base address for mmap allocations, has MMU
+	  enabled and provides values for both:
+	  - ARCH_MMAP_RND_COMPAT_BITS_MIN
+	  - ARCH_MMAP_RND_COMPAT_BITS_MAX
+
+config ARCH_MMAP_RND_COMPAT_BITS_MIN
+	int
+
+config ARCH_MMAP_RND_COMPAT_BITS_MAX
+	int
+
+config ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
+	int
+
+config ARCH_MMAP_RND_COMPAT_BITS
+	int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
+	range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
+	default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
+	default ARCH_MMAP_RND_COMPAT_BITS_MIN
+	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
+	help
+	  This value can be used to select the number of bits to use to
+	  determine the random offset to the base address of vma regions
+	  resulting from mmap allocations for compatible applications This
+	  value will be bounded by the architecture's minimum and maximum
+	  supported values.
+
+	  This value can be changed after boot using the
+	  /proc/sys/vm/mmap_rnd_compat_bits tunable
+
+config HAVE_ARCH_COMPAT_MMAP_BASES
+	bool
+	help
+	  This allows 64bit applications to invoke 32-bit mmap() syscall
+	  and vice-versa 32-bit applications to call 64-bit mmap().
+	  Required for applications doing different bitness syscalls.
+
+config PAGE_SIZE_LESS_THAN_64KB
+	def_bool y
+	depends on !ARM64_64K_PAGES
+	depends on !IA64_PAGE_SIZE_64KB
+	depends on !PAGE_SIZE_64KB
+	depends on !PARISC_PAGE_SIZE_64KB
+	depends on !PPC_64K_PAGES
+	depends on PAGE_SIZE_LESS_THAN_256KB
+
+config PAGE_SIZE_LESS_THAN_256KB
+	def_bool y
+	depends on !PPC_256K_PAGES
+	depends on !PAGE_SIZE_256KB
+
+# This allows to use a set of generic functions to determine mmap base
+# address by giving priority to top-down scheme only if the process
+# is not in legacy mode (compat task, unlimited stack size or
+# sysctl_legacy_va_layout).
+# Architecture that selects this option can provide its own version of:
+# - STACK_RND_MASK
+config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+	bool
+	depends on MMU
+	select ARCH_HAS_ELF_RANDOMIZE
+
+config HAVE_STACK_VALIDATION
+	bool
+	help
+	  Architecture supports the 'objtool check' host tool command, which
+	  performs compile-time stack metadata validation.
+
+config HAVE_RELIABLE_STACKTRACE
+	bool
+	help
+	  Architecture has either save_stack_trace_tsk_reliable() or
+	  arch_stack_walk_reliable() function which only returns a stack trace
+	  if it can guarantee the trace is reliable.
+
+config HAVE_ARCH_HASH
+	bool
+	default n
+	help
+	  If this is set, the architecture provides an <asm/hash.h>
+	  file which provides platform-specific implementations of some
+	  functions in <linux/hash.h> or fs/namei.c.
+
+config HAVE_ARCH_NVRAM_OPS
+	bool
+
+config ISA_BUS_API
+	def_bool ISA
+
+#
+# ABI hall of shame
+#
+config CLONE_BACKWARDS
+	bool
+	help
+	  Architecture has tls passed as the 4th argument of clone(2),
+	  not the 5th one.
+
+config CLONE_BACKWARDS2
+	bool
+	help
+	  Architecture has the first two arguments of clone(2) swapped.
+
+config CLONE_BACKWARDS3
+	bool
+	help
+	  Architecture has tls passed as the 3rd argument of clone(2),
+	  not the 5th one.
+
+config ODD_RT_SIGACTION
+	bool
+	help
+	  Architecture has unusual rt_sigaction(2) arguments
+
+config OLD_SIGSUSPEND
+	bool
+	help
+	  Architecture has old sigsuspend(2) syscall, of one-argument variety
+
+config OLD_SIGSUSPEND3
+	bool
+	help
+	  Even weirder antique ABI - three-argument sigsuspend(2)
+
+config OLD_SIGACTION
+	bool
+	help
+	  Architecture has old sigaction(2) syscall.  Nope, not the same
+	  as OLD_SIGSUSPEND | OLD_SIGSUSPEND3 - alpha has sigsuspend(2),
+	  but fairly different variant of sigaction(2), thanks to OSF/1
+	  compatibility...
+
+config COMPAT_OLD_SIGACTION
+	bool
+
+config COMPAT_32BIT_TIME
+	bool "Provide system calls for 32-bit time_t"
+	default !64BIT || COMPAT
+	help
+	  This enables 32 bit time_t support in addition to 64 bit time_t support.
+	  This is relevant on all 32-bit architectures, and 64-bit architectures
+	  as part of compat syscall handling.
+
+config ARCH_NO_PREEMPT
+	bool
+
+config ARCH_EPHEMERAL_INODES
+	def_bool n
+	help
+	  An arch should select this symbol if it doesn't keep track of inode
+	  instances on its own, but instead relies on something else (e.g. the
+	  host kernel for an UML kernel).
+
+config ARCH_SUPPORTS_RT
+	bool
+
+config CPU_NO_EFFICIENT_FFS
+	def_bool n
+
+config HAVE_ARCH_VMAP_STACK
+	def_bool n
+	help
+	  An arch should select this symbol if it can support kernel stacks
+	  in vmalloc space.  This means:
+
+	  - vmalloc space must be large enough to hold many kernel stacks.
+	    This may rule out many 32-bit architectures.
+
+	  - Stacks in vmalloc space need to work reliably.  For example, if
+	    vmap page tables are created on demand, either this mechanism
+	    needs to work while the stack points to a virtual address with
+	    unpopulated page tables or arch code (switch_to() and switch_mm(),
+	    most likely) needs to ensure that the stack's page table entries
+	    are populated before running on a possibly unpopulated stack.
+
+	  - If the stack overflows into a guard page, something reasonable
+	    should happen.  The definition of "reasonable" is flexible, but
+	    instantly rebooting without logging anything would be unfriendly.
+
+config VMAP_STACK
+	default y
+	bool "Use a virtually-mapped stack"
+	depends on HAVE_ARCH_VMAP_STACK
+	depends on !KASAN || KASAN_HW_TAGS || KASAN_VMALLOC
+	help
+	  Enable this if you want the use virtually-mapped kernel stacks
+	  with guard pages.  This causes kernel stack overflows to be
+	  caught immediately rather than causing difficult-to-diagnose
+	  corruption.
+
+	  To use this with software KASAN modes, the architecture must support
+	  backing virtual mappings with real shadow memory, and KASAN_VMALLOC
+	  must be enabled.
+
+config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+	def_bool n
+	help
+	  An arch should select this symbol if it can support kernel stack
+	  offset randomization with calls to add_random_kstack_offset()
+	  during syscall entry and choose_random_kstack_offset() during
+	  syscall exit. Careful removal of -fstack-protector-strong and
+	  -fstack-protector should also be applied to the entry code and
+	  closely examined, as the artificial stack bump looks like an array
+	  to the compiler, so it will attempt to add canary checks regardless
+	  of the static branch state.
+
+config RANDOMIZE_KSTACK_OFFSET_DEFAULT
+	bool "Randomize kernel stack offset on syscall entry"
+	depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+	help
+	  The kernel stack offset can be randomized (after pt_regs) by
+	  roughly 5 bits of entropy, frustrating memory corruption
+	  attacks that depend on stack address determinism or
+	  cross-syscall address exposures. This feature is controlled
+	  by kernel boot param "randomize_kstack_offset=on/off", and this
+	  config chooses the default boot state.
+
+config ARCH_OPTIONAL_KERNEL_RWX
+	def_bool n
+
+config ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+	def_bool n
+
+config ARCH_HAS_STRICT_KERNEL_RWX
+	def_bool n
+
+config STRICT_KERNEL_RWX
+	bool "Make kernel text and rodata read-only" if ARCH_OPTIONAL_KERNEL_RWX
+	depends on ARCH_HAS_STRICT_KERNEL_RWX
+	default !ARCH_OPTIONAL_KERNEL_RWX || ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+	help
+	  If this is set, kernel text and rodata memory will be made read-only,
+	  and non-text memory will be made non-executable. This provides
+	  protection against certain security exploits (e.g. executing the heap
+	  or modifying text)
+
+	  These features are considered standard security practice these days.
+	  You should say Y here in almost all cases.
+
+config ARCH_HAS_STRICT_MODULE_RWX
+	def_bool n
+
+config STRICT_MODULE_RWX
+	bool "Set loadable kernel module data as NX and text as RO" if ARCH_OPTIONAL_KERNEL_RWX
+	depends on ARCH_HAS_STRICT_MODULE_RWX && MODULES
+	default !ARCH_OPTIONAL_KERNEL_RWX || ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+	help
+	  If this is set, module text and rodata memory will be made read-only,
+	  and non-text memory will be made non-executable. This provides
+	  protection against certain security exploits (e.g. writing to text)
+
+# select if the architecture provides an asm/dma-direct.h header
+config ARCH_HAS_PHYS_TO_DMA
+	bool
+
+config HAVE_ARCH_COMPILER_H
+	bool
+	help
+	  An architecture can select this if it provides an
+	  asm/compiler.h header that should be included after
+	  linux/compiler-*.h in order to override macro definitions that those
+	  headers generally provide.
+
+config HAVE_ARCH_PREL32_RELOCATIONS
+	bool
+	help
+	  May be selected by an architecture if it supports place-relative
+	  32-bit relocations, both in the toolchain and in the module loader,
+	  in which case relative references can be used in special sections
+	  for PCI fixup, initcalls etc which are only half the size on 64 bit
+	  architectures, and don't require runtime relocation on relocatable
+	  kernels.
+
+config ARCH_USE_MEMREMAP_PROT
+	bool
+
+config LOCK_EVENT_COUNTS
+	bool "Locking event counts collection"
+	depends on DEBUG_FS
+	help
+	  Enable light-weight counting of various locking related events
+	  in the system with minimal performance impact. This reduces
+	  the chance of application behavior change because of timing
+	  differences. The counts are reported via debugfs.
+
+# Select if the architecture has support for applying RELR relocations.
+config ARCH_HAS_RELR
+	bool
+
+config RELR
+	bool "Use RELR relocation packing"
+	depends on ARCH_HAS_RELR && TOOLS_SUPPORT_RELR
+	default y
+	help
+	  Store the kernel's dynamic relocations in the RELR relocation packing
+	  format. Requires a compatible linker (LLD supports this feature), as
+	  well as compatible NM and OBJCOPY utilities (llvm-nm and llvm-objcopy
+	  are compatible).
+
+config ARCH_HAS_MEM_ENCRYPT
+	bool
+
+config ARCH_HAS_CC_PLATFORM
+	bool
+
+config HAVE_SPARSE_SYSCALL_NR
+       bool
+       help
+          An architecture should select this if its syscall numbering is sparse
+	  to save space. For example, MIPS architecture has a syscall array with
+	  entries at 4000, 5000 and 6000 locations. This option turns on syscall
+	  related optimizations for a given architecture.
+
+config ARCH_HAS_VDSO_DATA
+	bool
+
+config HAVE_STATIC_CALL
+	bool
+
+config HAVE_STATIC_CALL_INLINE
+	bool
+	depends on HAVE_STATIC_CALL
+
+config HAVE_PREEMPT_DYNAMIC
+	bool
+	depends on HAVE_STATIC_CALL
+	depends on GENERIC_ENTRY
+	help
+	   Select this if the architecture support boot time preempt setting
+	   on top of static calls. It is strongly advised to support inline
+	   static call to avoid any overhead.
+
+config ARCH_WANT_LD_ORPHAN_WARN
+	bool
+	help
+	  An arch should select this symbol once all linker sections are explicitly
+	  included, size-asserted, or discarded in the linker scripts. This is
+	  important because we never want expected sections to be placed heuristically
+	  by the linker, since the locations of such sections can change between linker
+	  versions.
+
+config HAVE_ARCH_PFN_VALID
+	bool
+
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	bool
+
+config ARCH_SUPPORTS_PAGE_TABLE_CHECK
+	bool
+
+config ARCH_SPLIT_ARG64
+	bool
+	help
+	   If a 32-bit architecture requires 64-bit arguments to be split into
+	   pairs of 32-bit arguments, select this option.
+
+config ARCH_HAS_ELFCORE_COMPAT
+	bool
+
+config ARCH_HAS_PARANOID_L1D_FLUSH
+	bool
+
+config DYNAMIC_SIGFRAME
+	bool
+
+# Select, if arch has a named attribute group bound to NUMA device nodes.
+config HAVE_ARCH_NODE_DEV_GROUP
+	bool
+
+source "kernel/gcov/Kconfig"
+
+source "scripts/gcc-plugins/Kconfig"
+
+endmenu
diff --git a/arch/alpha/Kbuild b/arch/alpha/Kbuild
new file mode 100644
index 0000000000..345d79df24
--- /dev/null
+++ b/arch/alpha/Kbuild
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y			+= kernel/ mm/
+obj-$(CONFIG_MATHEMU)	+= math-emu/
+
+# for cleaning
+subdir- += boot
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
new file mode 100644
index 0000000000..4e87783c90
--- /dev/null
+++ b/arch/alpha/Kconfig
@@ -0,0 +1,678 @@
+# SPDX-License-Identifier: GPL-2.0
+config ALPHA
+	bool
+	default y
+	select ARCH_32BIT_USTAT_F_TINODE
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_NO_PREEMPT
+	select ARCH_NO_SG_CHAIN
+	select ARCH_USE_CMPXCHG_LOCKREF
+	select DMA_OPS if PCI
+	select FORCE_PCI if !ALPHA_JENSEN
+	select PCI_DOMAINS if PCI
+	select PCI_SYSCALL if PCI
+	select HAVE_AOUT
+	select HAVE_ASM_MODVERSIONS
+	select HAVE_PCSPKR_PLATFORM
+	select HAVE_PERF_EVENTS
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
+	select VIRT_TO_BUS
+	select GENERIC_IRQ_PROBE
+	select GENERIC_PCI_IOMAP
+	select AUTO_IRQ_AFFINITY if SMP
+	select GENERIC_IRQ_SHOW
+	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select AUDIT_ARCH
+	select GENERIC_CPU_VULNERABILITIES
+	select GENERIC_SMP_IDLE_THREAD
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
+	select ODD_RT_SIGACTION
+	select OLD_SIGSUSPEND
+	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+	select MMU_GATHER_NO_RANGE
+	select SET_FS
+	select SPARSEMEM_EXTREME if SPARSEMEM
+	select ZONE_DMA
+	help
+	  The Alpha is a 64-bit general-purpose processor designed and
+	  marketed by the Digital Equipment Corporation of blessed memory,
+	  now Hewlett-Packard.  The Alpha Linux project has a home page at
+	  <http://www.alphalinux.org/>.
+
+config 64BIT
+	def_bool y
+
+config MMU
+	bool
+	default y
+
+config ARCH_HAS_ILOG2_U32
+	bool
+	default n
+
+config ARCH_HAS_ILOG2_U64
+	bool
+	default n
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config GENERIC_ISA_DMA
+	bool
+	default y
+
+config PGTABLE_LEVELS
+	int
+	default 3
+
+config AUDIT_ARCH
+	bool
+
+menu "System setup"
+
+choice
+	prompt "Alpha system type"
+	default ALPHA_GENERIC
+	help
+	  This is the system type of your hardware.  A "generic" kernel will
+	  run on any supported Alpha system. However, if you configure a
+	  kernel for your specific system, it will be faster and smaller.
+
+	  To find out what type of Alpha system you have, you may want to
+	  check out the Linux/Alpha FAQ, accessible on the WWW from
+	  <http://www.alphalinux.org/>. In summary:
+
+	  Alcor/Alpha-XLT     AS 600, AS 500, XL-300, XL-366
+	  Alpha-XL            XL-233, XL-266
+	  AlphaBook1          Alpha laptop
+	  Avanti              AS 200, AS 205, AS 250, AS 255, AS 300, AS 400
+	  Cabriolet           AlphaPC64, AlphaPCI64
+	  DP264               DP264 / DS20 / ES40 / DS10 / DS10L
+	  EB164               EB164 21164 evaluation board
+	  EB64+               EB64+ 21064 evaluation board
+	  EB66                EB66 21066 evaluation board
+	  EB66+               EB66+ 21066 evaluation board
+	  Jensen              DECpc 150, DEC 2000 models 300, 500
+	  LX164               AlphaPC164-LX
+	  Lynx                AS 2100A
+	  Miata               Personal Workstation 433/500/600 a/au
+	  Marvel              AlphaServer ES47 / ES80 / GS1280
+	  Mikasa              AS 1000
+	  Noname              AXPpci33, UDB (Multia)
+	  Noritake            AS 1000A, AS 600A, AS 800
+	  PC164               AlphaPC164
+	  Rawhide             AS 1200, AS 4000, AS 4100
+	  Ruffian             RPX164-2, AlphaPC164-UX, AlphaPC164-BX
+	  SX164               AlphaPC164-SX
+	  Sable               AS 2000, AS 2100
+	  Shark               DS 20L
+	  Takara              Takara (OEM)
+	  Titan               AlphaServer ES45 / DS25 / DS15
+	  Wildfire            AlphaServer GS 40/80/160/320
+
+	  If you don't know what to do, choose "generic".
+
+config ALPHA_GENERIC
+	bool "Generic"
+	depends on TTY
+	select HAVE_EISA
+	help
+	  A generic kernel will run on all supported Alpha hardware.
+
+config ALPHA_ALCOR
+	bool "Alcor/Alpha-XLT"
+	select HAVE_EISA
+	help
+	  For systems using the Digital ALCOR chipset: 5 chips (4, 64-bit data
+	  slices (Data Switch, DSW) - 208-pin PQFP and 1 control (Control, I/O
+	  Address, CIA) - a 383 pin plastic PGA).  It provides a DRAM
+	  controller (256-bit memory bus) and a PCI interface.  It also does
+	  all the work required to support an external Bcache and to maintain
+	  memory coherence when a PCI device DMAs into (or out of) memory.
+
+config ALPHA_XL
+	bool "Alpha-XL"
+	help
+	  XL-233 and XL-266-based Alpha systems.
+
+config ALPHA_BOOK1
+	bool "AlphaBook1"
+	help
+	  Dec AlphaBook1/Burns Alpha-based laptops.
+
+config ALPHA_AVANTI_CH
+	bool "Avanti"
+
+config ALPHA_CABRIOLET
+	bool "Cabriolet"
+	help
+	  Cabriolet AlphaPC64, AlphaPCI64 systems.  Derived from EB64+ but now
+	  baby-AT with Flash boot ROM, no on-board SCSI or Ethernet. 3 ISA
+	  slots, 4 PCI slots (one pair are on a shared slot), uses plug-in
+	  Bcache SIMMs.  Requires power supply with 3.3V output.
+
+config ALPHA_DP264
+	bool "DP264"
+	help
+	  Various 21264 systems with the tsunami core logic chipset.
+	  API Networks: 264DP, UP2000(+), CS20;
+	  Compaq: DS10(E,L), XP900, XP1000, DS20(E), ES40.
+
+config ALPHA_EB164
+	bool "EB164"
+	help
+	  EB164 21164 evaluation board from DEC.  Uses 21164 and ALCOR.  Has
+	  ISA and PCI expansion (3 ISA slots, 2 64-bit PCI slots (one is
+	  shared with an ISA slot) and 2 32-bit PCI slots.  Uses plus-in
+	  Bcache SIMMs. I/O sub-system provides SuperI/O (2S, 1P, FD), KBD,
+	  MOUSE (PS2 style), RTC/NVRAM.  Boot ROM is Flash.  PC-AT-sized
+	  motherboard.  Requires power supply with 3.3V output.
+
+config ALPHA_EB64P_CH
+	bool "EB64+"
+
+config ALPHA_EB66
+	bool "EB66"
+	help
+	  A Digital DS group board.  Uses 21066 or 21066A.  I/O sub-system is
+	  identical to EB64+.  Baby PC-AT size.  Runs from standard PC power
+	  supply.  The EB66 schematic was published as a marketing poster
+	  advertising the 21066 as "the first microprocessor in the world with
+	  embedded PCI".
+
+config ALPHA_EB66P
+	bool "EB66+"
+	help
+	  Later variant of the EB66 board.
+
+config ALPHA_EIGER
+	bool "Eiger"
+	help
+	  Apparently an obscure OEM single-board computer based on the
+	  Typhoon/Tsunami chipset family. Information on it is scanty.
+
+config ALPHA_JENSEN
+	bool "Jensen"
+	select HAVE_EISA
+	help
+	  DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one
+	  of the first-generation Alpha systems. A number of these systems
+	  seem to be available on the second- hand market. The Jensen is a
+	  floor-standing tower system which originally used a 150MHz 21064 It
+	  used programmable logic to interface a 486 EISA I/O bridge to the
+	  CPU.
+
+config ALPHA_LX164
+	bool "LX164"
+	help
+	  A technical overview of this board is available at
+	  <http://www.unix-ag.org/Linux-Alpha/Architectures/LX164.html>.
+
+config ALPHA_LYNX
+	bool "Lynx"
+	select HAVE_EISA
+	help
+	  AlphaServer 2100A-based systems.
+
+config ALPHA_MARVEL
+	bool "Marvel"
+	help
+	  AlphaServer ES47 / ES80 / GS1280 based on EV7.
+
+config ALPHA_MIATA
+	bool "Miata"
+	select HAVE_EISA
+	help
+	  The Digital PersonalWorkStation (PWS 433a, 433au, 500a, 500au, 600a,
+	  or 600au).
+
+config ALPHA_MIKASA
+	bool "Mikasa"
+	help
+	  AlphaServer 1000-based Alpha systems.
+
+config ALPHA_NAUTILUS
+	bool "Nautilus"
+	help
+	  Alpha systems based on the AMD 751 & ALI 1543C chipsets.
+
+config ALPHA_NONAME_CH
+	bool "Noname"
+
+config ALPHA_NORITAKE
+	bool "Noritake"
+	select HAVE_EISA
+	help
+	  AlphaServer 1000A, AlphaServer 600A, and AlphaServer 800-based
+	  systems.
+
+config ALPHA_PC164
+	bool "PC164"
+
+config ALPHA_P2K
+	bool "Platform2000"
+
+config ALPHA_RAWHIDE
+	bool "Rawhide"
+	select HAVE_EISA
+	help
+	  AlphaServer 1200, AlphaServer 4000 and AlphaServer 4100 machines.
+	  See HOWTO at
+	  <http://www.alphalinux.org/docs/rawhide/4100_install.shtml>.
+
+config ALPHA_RUFFIAN
+	bool "Ruffian"
+	help
+	  Samsung APC164UX.  There is a page on known problems and workarounds
+	  at <http://www.alphalinux.org/faq/FAQ-11.html>.
+
+config ALPHA_RX164
+	bool "RX164"
+
+config ALPHA_SX164
+	bool "SX164"
+
+config ALPHA_SABLE
+	bool "Sable"
+	select HAVE_EISA
+	help
+	  Digital AlphaServer 2000 and 2100-based systems.
+
+config ALPHA_SHARK
+	bool "Shark"
+
+config ALPHA_TAKARA
+	bool "Takara"
+	help
+	  Alpha 11164-based OEM single-board computer.
+
+config ALPHA_TITAN
+	bool "Titan"
+	help
+	  AlphaServer ES45/DS25 SMP based on EV68 and Titan chipset.
+
+config ALPHA_WILDFIRE
+	bool "Wildfire"
+	help
+	  AlphaServer GS 40/80/160/320 SMP based on the EV67 core.
+
+endchoice
+
+# clear all implied options (don't want default values for those):
+# Most of these machines have ISA slots; not exactly sure which don't,
+# and this doesn't activate hordes of code, so do it always.
+config ISA
+	bool
+	default y
+	help
+	  Find out whether you have ISA slots on your motherboard.  ISA is the
+	  name of a bus system, i.e. the way the CPU talks to the other stuff
+	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
+	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
+	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
+
+config ISA_DMA_API
+	bool
+	default y
+
+config ALPHA_NONAME
+	bool
+	depends on ALPHA_BOOK1 || ALPHA_NONAME_CH
+	default y
+	help
+	  The AXPpci33 (aka NoName), is based on the EB66 (includes the Multia
+	  UDB).  This design was produced by Digital's Technical OEM (TOEM)
+	  group. It uses the 21066 processor running at 166MHz or 233MHz. It
+	  is a baby-AT size, and runs from a standard PC power supply. It has
+	  5 ISA slots and 3 PCI slots (one pair are a shared slot). There are
+	  2 versions, with either PS/2 or large DIN connectors for the
+	  keyboard.
+
+config ALPHA_EV4
+	bool
+	depends on ALPHA_JENSEN || (ALPHA_SABLE && !ALPHA_GAMMA) || ALPHA_LYNX || ALPHA_NORITAKE && !ALPHA_PRIMO || ALPHA_MIKASA && !ALPHA_PRIMO || ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P_CH || ALPHA_XL || ALPHA_NONAME || ALPHA_EB66 || ALPHA_EB66P || ALPHA_P2K
+	default y if !ALPHA_LYNX
+
+config ALPHA_LCA
+	bool
+	depends on ALPHA_NONAME || ALPHA_EB66 || ALPHA_EB66P || ALPHA_P2K
+	default y
+
+config ALPHA_APECS
+	bool
+	depends on !ALPHA_PRIMO && (ALPHA_NORITAKE || ALPHA_MIKASA) || ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P_CH || ALPHA_XL
+	default y
+
+config ALPHA_EB64P
+	bool
+	depends on ALPHA_CABRIOLET || ALPHA_EB64P_CH
+	default y
+	help
+	  Uses 21064 or 21064A and APECs.  Has ISA and PCI expansion (3 ISA,
+	  2 PCI, one pair are on a shared slot). Supports 36-bit DRAM SIMs.
+	  ISA bus generated by Intel SaturnI/O PCI-ISA bridge. On-board SCSI
+	  (NCR 810 on PCI) Ethernet (Digital 21040), KBD, MOUSE (PS2 style),
+	  SuperI/O (2S, 1P, FD), RTC/NVRAM. Boot ROM is EPROM.  PC-AT size.
+	  Runs from standard PC power supply.
+
+config ALPHA_EV5
+	bool "EV5 CPU(s) (model 5/xxx)?" if ALPHA_LYNX
+	default y if ALPHA_RX164 || ALPHA_RAWHIDE || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_SABLE && ALPHA_GAMMA || ALPHA_NORITAKE && ALPHA_PRIMO || ALPHA_MIKASA && ALPHA_PRIMO || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR
+
+config ALPHA_EV4
+	bool
+	default y if ALPHA_LYNX && !ALPHA_EV5
+
+config ALPHA_CIA
+	bool
+	depends on ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_NORITAKE && ALPHA_PRIMO || ALPHA_MIKASA && ALPHA_PRIMO || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR
+	default y
+
+config ALPHA_EV56
+	bool "EV56 CPU (speed >= 366MHz)?" if ALPHA_ALCOR
+	default y if ALPHA_RX164 || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_PC164 || ALPHA_TAKARA
+
+config ALPHA_EV56
+	prompt "EV56 CPU (speed >= 333MHz)?"
+	depends on ALPHA_NORITAKE || ALPHA_PRIMO
+
+config ALPHA_EV56
+	prompt "EV56 CPU (speed >= 400MHz)?"
+	depends on ALPHA_RAWHIDE
+
+config ALPHA_PRIMO
+	bool "EV5 CPU daughtercard (model 5/xxx)?"
+	depends on ALPHA_NORITAKE || ALPHA_MIKASA
+	help
+	  Say Y if you have an AS 1000 5/xxx or an AS 1000A 5/xxx.
+
+config ALPHA_GAMMA
+	bool "EV5 CPU(s) (model 5/xxx)?"
+	depends on ALPHA_SABLE
+	help
+	  Say Y if you have an AS 2000 5/xxx or an AS 2100 5/xxx.
+
+config ALPHA_GAMMA
+	bool
+	depends on ALPHA_LYNX
+	default y
+
+config ALPHA_T2
+	bool
+	depends on ALPHA_SABLE || ALPHA_LYNX
+	default y
+
+config ALPHA_PYXIS
+	bool
+	depends on ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN
+	default y
+
+config ALPHA_EV6
+	bool
+	depends on ALPHA_NAUTILUS || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_SHARK || ALPHA_DP264 || ALPHA_EIGER || ALPHA_MARVEL
+	default y
+
+config ALPHA_TSUNAMI
+	bool
+	depends on ALPHA_SHARK || ALPHA_DP264 || ALPHA_EIGER
+	default y
+
+config ALPHA_EV67
+	bool "EV67 (or later) CPU (speed > 600MHz)?" if ALPHA_DP264 || ALPHA_EIGER
+	default y if ALPHA_NAUTILUS || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_SHARK || ALPHA_MARVEL
+	help
+	  Is this a machine based on the EV67 core?  If in doubt, select N here
+	  and the machine will be treated as an EV6.
+
+config ALPHA_MCPCIA
+	bool
+	depends on ALPHA_RAWHIDE
+	default y
+
+config ALPHA_POLARIS
+	bool
+	depends on ALPHA_RX164
+	default y
+
+config ALPHA_IRONGATE
+	bool
+	depends on ALPHA_NAUTILUS
+	default y
+
+config GENERIC_HWEIGHT
+	bool
+	default y if !ALPHA_EV67
+
+config ALPHA_AVANTI
+	bool
+	depends on ALPHA_XL || ALPHA_AVANTI_CH
+	default y
+	help
+	  Avanti AS 200, AS 205, AS 250, AS 255, AS 300, and AS 400-based
+	  Alphas. Info at
+	  <http://www.unix-ag.org/Linux-Alpha/Architectures/Avanti.html>.
+
+config ALPHA_BROKEN_IRQ_MASK
+	bool
+	depends on ALPHA_GENERIC || ALPHA_PC164
+	default y
+
+config VGA_HOSE
+	bool
+	depends on VGA_CONSOLE && (ALPHA_GENERIC || ALPHA_TITAN || ALPHA_MARVEL || ALPHA_TSUNAMI)
+	default y
+	help
+	  Support VGA on an arbitrary hose; needed for several platforms
+	  which always have multiple hoses, and whose consoles support it.
+
+
+config ALPHA_QEMU
+	bool "Run under QEMU emulation"
+	depends on !ALPHA_GENERIC
+	help
+	  Assume the presence of special features supported by QEMU PALcode
+	  that reduce the overhead of system emulation.
+
+	  Generic kernels will auto-detect QEMU.  But when building a
+	  system-specific kernel, the assumption is that we want to
+	  eliminate as many runtime tests as possible.
+
+	  If unsure, say N.
+
+
+config ALPHA_SRM
+	bool "Use SRM as bootloader" if ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_NAUTILUS || ALPHA_NONAME
+	depends on TTY
+	default y if ALPHA_JENSEN || ALPHA_MIKASA || ALPHA_SABLE || ALPHA_LYNX || ALPHA_NORITAKE || ALPHA_DP264 || ALPHA_RAWHIDE || ALPHA_EIGER || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_SHARK || ALPHA_MARVEL
+	help
+	  There are two different types of booting firmware on Alphas: SRM,
+	  which is command line driven, and ARC, which uses menus and arrow
+	  keys. Details about the Linux/Alpha booting process are contained in
+	  the Linux/Alpha FAQ, accessible on the WWW from
+	  <http://www.alphalinux.org/>.
+
+	  The usual way to load Linux on an Alpha machine is to use MILO
+	  (a bootloader that lets you pass command line parameters to the
+	  kernel just like lilo does for the x86 architecture) which can be
+	  loaded either from ARC or can be installed directly as a permanent
+	  firmware replacement from floppy (which requires changing a certain
+	  jumper on the motherboard). If you want to do either of these, say N
+	  here. If MILO doesn't work on your system (true for Jensen
+	  motherboards), you can bypass it altogether and boot Linux directly
+	  from an SRM console; say Y here in order to do that. Note that you
+	  won't be able to boot from an IDE disk using SRM.
+
+	  If unsure, say N.
+
+config ARCH_MAY_HAVE_PC_FDC
+	def_bool y
+
+config SMP
+	bool "Symmetric multi-processing support"
+	depends on ALPHA_SABLE || ALPHA_LYNX || ALPHA_RAWHIDE || ALPHA_DP264 || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_GENERIC || ALPHA_SHARK || ALPHA_MARVEL
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N. If you have a system with more
+	  than one CPU, say Y.
+
+	  If you say N here, the kernel will run on uni- and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on many, but not all,
+	  uniprocessor machines. On a uniprocessor machine, the kernel
+	  will run faster if you say N here.
+
+	  See also the SMP-HOWTO available at
+	  <https://www.tldp.org/docs.html#howto>.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-32)"
+	range 2 32
+	depends on SMP
+	default "32" if ALPHA_GENERIC || ALPHA_MARVEL
+	default "4" if !ALPHA_GENERIC && !ALPHA_MARVEL
+	help
+	  MARVEL support can handle a maximum of 32 CPUs, all the others
+	  with working support have a maximum of 4 CPUs.
+
+config ARCH_SPARSEMEM_ENABLE
+	bool "Sparse Memory Support"
+	help
+	  Say Y to support efficient handling of discontiguous physical memory,
+	  for systems that have huge holes in the physical address space.
+
+config ALPHA_WTINT
+	bool "Use WTINT" if ALPHA_SRM || ALPHA_GENERIC
+	default y if ALPHA_QEMU
+	default n if ALPHA_EV5 || ALPHA_EV56 || (ALPHA_EV4 && !ALPHA_LCA)
+	default n if !ALPHA_SRM && !ALPHA_GENERIC
+	default y if SMP
+	help
+	  The Wait for Interrupt (WTINT) PALcall attempts to place the CPU
+	  to sleep until the next interrupt.  This may reduce the power
+	  consumed, and the heat produced by the computer.  However, it has
+	  the side effect of making the cycle counter unreliable as a timing
+	  device across the sleep.
+
+	  For emulation under QEMU, definitely say Y here, as we have other
+	  mechanisms for measuring time than the cycle counter.
+
+	  For EV4 (but not LCA), EV5 and EV56 systems, or for systems running
+	  MILO, sleep mode is not supported so you might as well say N here.
+
+	  For SMP systems we cannot use the cycle counter for timing anyway,
+	  so you might as well say Y here.
+
+	  If unsure, say N.
+
+# LARGE_VMALLOC is racy, if you *really* need it then fix it first
+config ALPHA_LARGE_VMALLOC
+	bool
+	help
+	  Process creation and other aspects of virtual memory management can
+	  be streamlined if we restrict the kernel to one PGD for all vmalloc
+	  allocations.  This equates to about 8GB.
+
+	  Under normal circumstances, this is so far and above what is needed
+	  as to be laughable.  However, there are certain applications (such
+	  as benchmark-grade in-kernel web serving) that can make use of as
+	  much vmalloc space as is available.
+
+	  Say N unless you know you need gobs and gobs of vmalloc space.
+
+config VERBOSE_MCHECK
+	bool "Verbose Machine Checks"
+
+config VERBOSE_MCHECK_ON
+	int "Verbose Printing Mode (0=off, 1=on, 2=all)"
+	depends on VERBOSE_MCHECK
+	default 1
+	help
+	  This option allows the default printing mode to be set, and then
+	  possibly overridden by a boot command argument.
+
+	  For example, if one wanted the option of printing verbose
+	  machine checks, but wanted the default to be as if verbose
+	  machine check printing was turned off, then one would choose
+	  the printing mode to be 0. Then, upon reboot, one could add
+	  the boot command line "verbose_mcheck=1" to get the normal
+	  verbose machine check printing, or "verbose_mcheck=2" to get
+	  the maximum information available.
+
+	  Take the default (1) unless you want more control or more info.
+
+choice
+	prompt "Timer interrupt frequency (HZ)?"
+	default HZ_128 if ALPHA_QEMU
+	default HZ_1200 if ALPHA_RAWHIDE
+	default HZ_1024
+	help
+	  The frequency at which timer interrupts occur.  A high frequency
+	  minimizes latency, whereas a low frequency minimizes overhead of
+	  process accounting.  The later effect is especially significant
+	  when being run under QEMU.
+
+	  Note that some Alpha hardware cannot change the interrupt frequency
+	  of the timer.  If unsure, say 1024 (or 1200 for Rawhide).
+
+	config HZ_32
+		bool "32 Hz"
+	config HZ_64
+		bool "64 Hz"
+	config HZ_128
+		bool "128 Hz"
+	config HZ_256
+		bool "256 Hz"
+	config HZ_1024
+		bool "1024 Hz"
+	config HZ_1200
+		bool "1200 Hz"
+endchoice
+
+config HZ
+	int
+	default 32 if HZ_32
+	default 64 if HZ_64
+	default 128 if HZ_128
+	default 256 if HZ_256
+	default 1200 if HZ_1200
+	default 1024
+
+config SRM_ENV
+	tristate "SRM environment through procfs"
+	depends on PROC_FS
+	help
+	  If you enable this option, a subdirectory inside /proc called
+	  /proc/srm_environment will give you access to the all important
+	  SRM environment variables (those which have a name) and also
+	  to all others (by their internal number).
+
+	  SRM is something like a BIOS for Alpha machines. There are some
+	  other such BIOSes, like AlphaBIOS, which this driver cannot
+	  support (hey, that's not SRM!).
+
+	  Despite the fact that this driver doesn't work on all Alphas (but
+	  only on those which have SRM as their firmware), it's save to
+	  build it even if your particular machine doesn't know about SRM
+	  (or if you intend to compile a generic kernel). It will simply
+	  not create those subdirectory in /proc (and give you some warning,
+	  of course).
+
+	  This driver is also available as a module and will be called
+	  srm_env then.
+
+endmenu
+
+# DUMMY_CONSOLE may be defined in drivers/video/console/Kconfig
+# but we also need it if VGA_HOSE is set
+config DUMMY_CONSOLE
+	bool
+	depends on VGA_HOSE
+	default y
diff --git a/arch/alpha/Kconfig.debug b/arch/alpha/Kconfig.debug
new file mode 100644
index 0000000000..f85f4281cc
--- /dev/null
+++ b/arch/alpha/Kconfig.debug
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config EARLY_PRINTK
+	bool
+	depends on ALPHA_GENERIC || ALPHA_SRM
+	default y
+
+config ALPHA_LEGACY_START_ADDRESS
+	bool "Legacy kernel start address"
+	depends on ALPHA_GENERIC
+	default n
+	help
+	  The 2.4 kernel changed the kernel start address from 0x310000
+	  to 0x810000 to make room for the Wildfire's larger SRM console.
+	  Recent consoles on Titan and Marvel machines also require the
+	  extra room.
+
+	  If you're using aboot 0.7 or later, the bootloader will examine the
+	  ELF headers to determine where to transfer control. Unfortunately,
+	  most older bootloaders -- APB or MILO -- hardcoded the kernel start
+	  address rather than examining the ELF headers, and the result is a
+	  hard lockup.
+
+	  Say Y if you have a broken bootloader.  Say N if you do not, or if
+	  you wish to run on Wildfire, Titan, or Marvel.
+
+config ALPHA_LEGACY_START_ADDRESS
+	bool
+	depends on !ALPHA_GENERIC && !ALPHA_TITAN && !ALPHA_MARVEL && !ALPHA_WILDFIRE
+	default y
+
+config MATHEMU
+	tristate "Kernel FP software completion" if DEBUG_KERNEL && !SMP
+	default y if !DEBUG_KERNEL || SMP
+	help
+	  This option is required for IEEE compliant floating point arithmetic
+	  on the Alpha. The only time you would ever not say Y is to say M in
+	  order to debug the code. Say Y unless you know what you are doing.
diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile
new file mode 100644
index 0000000000..881cb913e2
--- /dev/null
+++ b/arch/alpha/Makefile
@@ -0,0 +1,66 @@
+#
+# alpha/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+
+NM := $(NM) -B
+
+LDFLAGS_vmlinux	:= -static -N #-relax
+CHECKFLAGS	+= -D__alpha__
+cflags-y	:= -pipe -mno-fp-regs -ffixed-8
+cflags-y	+= $(call cc-option, -fno-jump-tables)
+
+cpuflags-$(CONFIG_ALPHA_EV4)		:= -mcpu=ev4
+cpuflags-$(CONFIG_ALPHA_EV5)		:= -mcpu=ev5
+cpuflags-$(CONFIG_ALPHA_EV56)		:= -mcpu=ev56
+cpuflags-$(CONFIG_ALPHA_POLARIS)	:= -mcpu=pca56
+cpuflags-$(CONFIG_ALPHA_SX164)		:= -mcpu=pca56
+cpuflags-$(CONFIG_ALPHA_EV6)		:= -mcpu=ev6
+cpuflags-$(CONFIG_ALPHA_EV67)		:= -mcpu=ev67
+# If GENERIC, make sure to turn off any instruction set extensions that
+# the host compiler might have on by default.  Given that EV4 and EV5
+# have the same instruction set, prefer EV5 because an EV5 schedule is
+# more likely to keep an EV4 processor busy than vice-versa.
+cpuflags-$(CONFIG_ALPHA_GENERIC)	:= -mcpu=ev5
+
+cflags-y				+= $(cpuflags-y)
+
+
+# For TSUNAMI, we must have the assembler not emulate our instructions.
+# The same is true for IRONGATE, POLARIS, PYXIS.
+# BWX is most important, but we don't really want any emulation ever.
+KBUILD_CFLAGS += $(cflags-y) -Wa,-mev6
+
+head-y := arch/alpha/kernel/head.o
+
+libs-y				+= arch/alpha/lib/
+
+# export what is needed by arch/alpha/boot/Makefile
+LIBS_Y := $(patsubst %/, %/lib.a, $(libs-y))
+export LIBS_Y
+
+boot := arch/alpha/boot
+
+#Default target when executing make with no arguments
+all boot: $(boot)/vmlinux.gz
+
+$(boot)/vmlinux.gz: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+bootimage bootpfile bootpzfile: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+archheaders:
+	$(Q)$(MAKE) $(build)=arch/alpha/kernel/syscalls all
+
+define archhelp
+  echo '* boot		- Compressed kernel image (arch/alpha/boot/vmlinux.gz)'
+  echo '  bootimage	- SRM bootable image (arch/alpha/boot/bootimage)'
+  echo '  bootpfile	- BOOTP bootable image (arch/alpha/boot/bootpfile)'
+  echo '  bootpzfile	- compressed kernel BOOTP image (arch/alpha/boot/bootpzfile)'
+endef
diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile
new file mode 100644
index 0000000000..d8dba85e60
--- /dev/null
+++ b/arch/alpha/boot/Makefile
@@ -0,0 +1,120 @@
+#
+# arch/alpha/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+
+hostprogs	:= tools/mkbb tools/objstrip
+targets		:= vmlinux.gz vmlinux \
+		   vmlinux.nh tools/lxboot tools/bootlx tools/bootph \
+		   tools/bootpzh bootloader bootpheader bootpzheader 
+OBJSTRIP	:= $(obj)/tools/objstrip
+
+KBUILD_HOSTCFLAGS := -Wall -I$(objtree)/usr/include
+BOOTCFLAGS	+= -I$(objtree)/$(obj) -I$(srctree)/$(obj)
+
+# SRM bootable image.  Copy to offset 512 of a partition.
+$(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
+	( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ 
+	$(obj)/tools/mkbb $@ $(obj)/tools/lxboot
+	@echo '  Bootimage $@ is ready'
+
+# BOOTP bootable image.  Define INITRD during make to append initrd image.
+$(obj)/bootpfile: $(obj)/tools/bootph $(obj)/vmlinux.nh
+	cat $(obj)/tools/bootph $(obj)/vmlinux.nh > $@
+ifdef INITRD
+	cat $(INITRD) >> $@
+endif
+
+# Compressed kernel BOOTP bootable image.
+# Define INITRD during make to append initrd image.
+$(obj)/bootpzfile: $(obj)/tools/bootpzh $(obj)/vmlinux.nh.gz
+	cat $(obj)/tools/bootpzh $(obj)/vmlinux.nh.gz > $@
+ifdef INITRD
+	cat $(INITRD) >> $@
+endif
+
+# Compressed kernel image
+$(obj)/vmlinux.gz: $(obj)/vmlinux FORCE
+	$(call if_changed,gzip)
+	@echo '  Kernel $@ is ready'
+
+$(obj)/main.o: $(obj)/ksize.h
+$(obj)/bootp.o: $(obj)/ksize.h
+$(obj)/bootpz.o: $(obj)/kzsize.h
+
+$(obj)/ksize.h: $(obj)/vmlinux.nh FORCE
+	echo "#define KERNEL_SIZE `ls -l $(obj)/vmlinux.nh | awk '{print $$5}'`" > $@T
+ifdef INITRD
+	[ -f $(INITRD) ] || exit 1
+	echo "#define INITRD_IMAGE_SIZE `ls -l $(INITRD) | awk '{print $$5}'`" >> $@T
+endif
+	cmp -s $@T $@ || mv -f $@T $@
+	rm -f $@T
+
+$(obj)/kzsize.h: $(obj)/vmlinux.nh.gz FORCE
+	echo "#define KERNEL_SIZE `ls -l $(obj)/vmlinux.nh | awk '{print $$5}'`" > $@T
+	echo "#define KERNEL_Z_SIZE `ls -l $(obj)/vmlinux.nh.gz | awk '{print $$5}'`" >> $@T
+ifdef INITRD
+	[ -f $(INITRD) ] || exit 1
+	echo "#define INITRD_IMAGE_SIZE `ls -l $(INITRD) | awk '{print $$5}'`" >> $@T
+endif
+	cmp -s $@T $@ || mv -f $@T $@
+	rm -f $@T
+
+quiet_cmd_strip = STRIP  $@
+      cmd_strip = $(STRIP) -o $@ $<
+
+$(obj)/vmlinux: vmlinux FORCE
+	$(call if_changed,strip)
+
+quiet_cmd_objstrip = OBJSTRIP $@
+      cmd_objstrip = $(OBJSTRIP) $(OSFLAGS_$(@F)) $< $@
+
+OSFLAGS_vmlinux.nh	:= -v
+OSFLAGS_lxboot		:= -p
+OSFLAGS_bootlx		:= -vb
+OSFLAGS_bootph		:= -vb
+OSFLAGS_bootpzh		:= -vb
+
+$(obj)/vmlinux.nh: vmlinux $(OBJSTRIP) FORCE
+	$(call if_changed,objstrip)
+
+$(obj)/vmlinux.nh.gz: $(obj)/vmlinux.nh FORCE
+	$(call if_changed,gzip)
+
+$(obj)/tools/lxboot: $(obj)/bootloader $(OBJSTRIP) FORCE
+	$(call if_changed,objstrip)
+
+$(obj)/tools/bootlx: $(obj)/bootloader $(OBJSTRIP) FORCE
+	$(call if_changed,objstrip)
+
+$(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE
+	$(call if_changed,objstrip)
+
+$(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE
+	$(call if_changed,objstrip)
+
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootpheader  := -static -T # -N -relax
+LDFLAGS_bootpzheader := -static -T # -N -relax
+
+OBJ_bootlx   := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o
+OBJ_bootph   := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o
+OBJ_bootpzh  := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o
+
+$(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE
+	$(call if_changed,ld)
+
+$(obj)/bootpheader: $(obj)/bootloader.lds $(OBJ_bootph) $(LIBS_Y) FORCE
+	$(call if_changed,ld)
+
+$(obj)/bootpzheader: $(obj)/bootloader.lds $(OBJ_bootpzh) $(LIBS_Y) FORCE
+	$(call if_changed,ld)
+
+$(obj)/misc.o: lib/inflate.c
diff --git a/arch/alpha/boot/bootloader.lds b/arch/alpha/boot/bootloader.lds
new file mode 100644
index 0000000000..8cdff54c6e
--- /dev/null
+++ b/arch/alpha/boot/bootloader.lds
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+OUTPUT_FORMAT("elf64-alpha")
+ENTRY(__start)
+printk = srm_printk;
+SECTIONS
+{
+  . = 0x20000000;
+  .text : { *(.text) }
+  _etext = .;
+  PROVIDE (etext = .);
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .data : { *(.data) CONSTRUCTORS }
+  .got : { *(.got) }
+  .sdata : { *(.sdata) }
+  _edata = .;
+  PROVIDE (edata = .);
+  .sbss : { *(.sbss) *(.scommon) }
+  .bss : { *(.bss) *(COMMON) }
+  _end = . ;
+  PROVIDE (end = .);
+
+  .mdebug 0 : { *(.mdebug) }
+  .note 0 : { *(.note) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c
new file mode 100644
index 0000000000..b4faba2432
--- /dev/null
+++ b/arch/alpha/boot/bootp.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/alpha/boot/bootp.c
+ *
+ * Copyright (C) 1997 Jay Estabrook
+ *
+ * This file is used for creating a bootp file for the Linux/AXP kernel
+ *
+ * based significantly on the arch/alpha/boot/main.c of Linus Torvalds
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <generated/utsrelease.h>
+#include <linux/mm.h>
+
+#include <asm/console.h>
+#include <asm/hwrpb.h>
+#include <asm/io.h>
+
+#include <stdarg.h>
+
+#include "ksize.h"
+
+extern unsigned long switch_to_osf_pal(unsigned long nr,
+	struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa,
+	unsigned long *vptb);
+
+extern void move_stack(unsigned long new_stack);
+
+struct hwrpb_struct *hwrpb = INIT_HWRPB;
+static struct pcb_struct pcb_va[1];
+
+/*
+ * Find a physical address of a virtual object..
+ *
+ * This is easy using the virtual page table address.
+ */
+
+static inline void *
+find_pa(unsigned long *vptb, void *ptr)
+{
+	unsigned long address = (unsigned long) ptr;
+	unsigned long result;
+
+	result = vptb[address >> 13];
+	result >>= 32;
+	result <<= 13;
+	result |= address & 0x1fff;
+	return (void *) result;
+}	
+
+/*
+ * This function moves into OSF/1 pal-code, and has a temporary
+ * PCB for that. The kernel proper should replace this PCB with
+ * the real one as soon as possible.
+ *
+ * The page table muckery in here depends on the fact that the boot
+ * code has the L1 page table identity-map itself in the second PTE
+ * in the L1 page table. Thus the L1-page is virtually addressable
+ * itself (through three levels) at virtual address 0x200802000.
+ */
+
+#define VPTB	((unsigned long *) 0x200000000)
+#define L1	((unsigned long *) 0x200802000)
+
+void
+pal_init(void)
+{
+	unsigned long i, rev;
+	struct percpu_struct * percpu;
+	struct pcb_struct * pcb_pa;
+
+	/* Create the dummy PCB.  */
+	pcb_va->ksp = 0;
+	pcb_va->usp = 0;
+	pcb_va->ptbr = L1[1] >> 32;
+	pcb_va->asn = 0;
+	pcb_va->pcc = 0;
+	pcb_va->unique = 0;
+	pcb_va->flags = 1;
+	pcb_va->res1 = 0;
+	pcb_va->res2 = 0;
+	pcb_pa = find_pa(VPTB, pcb_va);
+
+	/*
+	 * a0 = 2 (OSF)
+	 * a1 = return address, but we give the asm the vaddr of the PCB
+	 * a2 = physical addr of PCB
+	 * a3 = new virtual page table pointer
+	 * a4 = KSP (but the asm sets it)
+	 */
+	srm_printk("Switching to OSF PAL-code .. ");
+
+	i = switch_to_osf_pal(2, pcb_va, pcb_pa, VPTB);
+	if (i) {
+		srm_printk("failed, code %ld\n", i);
+		__halt();
+	}
+
+	percpu = (struct percpu_struct *)
+		(INIT_HWRPB->processor_offset + (unsigned long) INIT_HWRPB);
+	rev = percpu->pal_revision = percpu->palcode_avail[2];
+
+	srm_printk("Ok (rev %lx)\n", rev);
+
+	tbia(); /* do it directly in case we are SMP */
+}
+
+static inline void
+load(unsigned long dst, unsigned long src, unsigned long count)
+{
+	memcpy((void *)dst, (void *)src, count);
+}
+
+/*
+ * Start the kernel.
+ */
+static inline void
+runkernel(void)
+{
+	__asm__ __volatile__(
+		"bis %0,%0,$27\n\t"
+		"jmp ($27)"
+		: /* no outputs: it doesn't even return */
+		: "r" (START_ADDR));
+}
+
+extern char _end;
+#define KERNEL_ORIGIN \
+	((((unsigned long)&_end) + 511) & ~511)
+
+void
+start_kernel(void)
+{
+	/*
+	 * Note that this crufty stuff with static and envval
+	 * and envbuf is because:
+	 *
+	 * 1. Frequently, the stack is short, and we don't want to overrun;
+	 * 2. Frequently the stack is where we are going to copy the kernel to;
+	 * 3. A certain SRM console required the GET_ENV output to stack.
+	 *    ??? A comment in the aboot sources indicates that the GET_ENV
+	 *    destination must be quadword aligned.  Might this explain the
+	 *    behaviour, rather than requiring output to the stack, which
+	 *    seems rather far-fetched.
+	 */
+	static long nbytes;
+	static char envval[256] __attribute__((aligned(8)));
+	static unsigned long initrd_start;
+
+	srm_printk("Linux/AXP bootp loader for Linux " UTS_RELEASE "\n");
+	if (INIT_HWRPB->pagesize != 8192) {
+		srm_printk("Expected 8kB pages, got %ldkB\n",
+		           INIT_HWRPB->pagesize >> 10);
+		return;
+	}
+	if (INIT_HWRPB->vptb != (unsigned long) VPTB) {
+		srm_printk("Expected vptb at %p, got %p\n",
+			   VPTB, (void *)INIT_HWRPB->vptb);
+		return;
+	}
+	pal_init();
+
+	/* The initrd must be page-aligned.  See below for the 
+	   cause of the magic number 5.  */
+	initrd_start = ((START_ADDR + 5*KERNEL_SIZE + PAGE_SIZE) |
+			(PAGE_SIZE-1)) + 1;
+#ifdef INITRD_IMAGE_SIZE
+	srm_printk("Initrd positioned at %#lx\n", initrd_start);
+#endif
+
+	/*
+	 * Move the stack to a safe place to ensure it won't be
+	 * overwritten by kernel image.
+	 */
+	move_stack(initrd_start - PAGE_SIZE);
+
+	nbytes = callback_getenv(ENV_BOOTED_OSFLAGS, envval, sizeof(envval));
+	if (nbytes < 0 || nbytes >= sizeof(envval)) {
+		nbytes = 0;
+	}
+	envval[nbytes] = '\0';
+	srm_printk("Loading the kernel...'%s'\n", envval);
+
+	/* NOTE: *no* callbacks or printouts from here on out!!! */
+
+	/* This is a hack, as some consoles seem to get virtual 20000000 (ie
+	 * where the SRM console puts the kernel bootp image) memory
+	 * overlapping physical memory where the kernel wants to be put,
+	 * which causes real problems when attempting to copy the former to
+	 * the latter... :-(
+	 *
+	 * So, we first move the kernel virtual-to-physical way above where
+	 * we physically want the kernel to end up, then copy it from there
+	 * to its final resting place... ;-}
+	 *
+	 * Sigh...  */
+
+#ifdef INITRD_IMAGE_SIZE
+	load(initrd_start, KERNEL_ORIGIN+KERNEL_SIZE, INITRD_IMAGE_SIZE);
+#endif
+        load(START_ADDR+(4*KERNEL_SIZE), KERNEL_ORIGIN, KERNEL_SIZE);
+        load(START_ADDR, START_ADDR+(4*KERNEL_SIZE), KERNEL_SIZE);
+
+	memset((char*)ZERO_PGE, 0, PAGE_SIZE);
+	strcpy((char*)ZERO_PGE, envval);
+#ifdef INITRD_IMAGE_SIZE
+	((long *)(ZERO_PGE+256))[0] = initrd_start;
+	((long *)(ZERO_PGE+256))[1] = INITRD_IMAGE_SIZE;
+#endif
+
+	runkernel();
+}
diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c
new file mode 100644
index 0000000000..90a2b341e9
--- /dev/null
+++ b/arch/alpha/boot/bootpz.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/alpha/boot/bootpz.c
+ *
+ * Copyright (C) 1997 Jay Estabrook
+ *
+ * This file is used for creating a compressed BOOTP file for the
+ * Linux/AXP kernel
+ *
+ * based significantly on the arch/alpha/boot/main.c of Linus Torvalds
+ * and the decompression code from MILO.
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <generated/utsrelease.h>
+#include <linux/mm.h>
+
+#include <asm/console.h>
+#include <asm/hwrpb.h>
+#include <asm/io.h>
+
+#include <stdarg.h>
+
+#include "kzsize.h"
+
+/* FIXME FIXME FIXME */
+#define MALLOC_AREA_SIZE 0x200000 /* 2MB for now */
+/* FIXME FIXME FIXME */
+
+
+/*
+  WARNING NOTE
+
+  It is very possible that turning on additional messages may cause
+  kernel image corruption due to stack usage to do the printing.
+
+*/
+
+#undef DEBUG_CHECK_RANGE
+#undef DEBUG_ADDRESSES
+#undef DEBUG_LAST_STEPS
+
+extern unsigned long switch_to_osf_pal(unsigned long nr,
+	struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
+	unsigned long *vptb);
+
+extern int decompress_kernel(void* destination, void *source,
+			     size_t ksize, size_t kzsize);
+
+extern void move_stack(unsigned long new_stack);
+
+struct hwrpb_struct *hwrpb = INIT_HWRPB;
+static struct pcb_struct pcb_va[1];
+
+/*
+ * Find a physical address of a virtual object..
+ *
+ * This is easy using the virtual page table address.
+ */
+#define VPTB	((unsigned long *) 0x200000000)
+
+static inline unsigned long
+find_pa(unsigned long address)
+{
+	unsigned long result;
+
+	result = VPTB[address >> 13];
+	result >>= 32;
+	result <<= 13;
+	result |= address & 0x1fff;
+	return result;
+}	
+
+int
+check_range(unsigned long vstart, unsigned long vend,
+	    unsigned long kstart, unsigned long kend)
+{
+	unsigned long vaddr, kaddr;
+
+#ifdef DEBUG_CHECK_RANGE
+	srm_printk("check_range: V[0x%lx:0x%lx] K[0x%lx:0x%lx]\n",
+		   vstart, vend, kstart, kend);
+#endif
+	/* do some range checking for detecting an overlap... */
+	for (vaddr = vstart; vaddr <= vend; vaddr += PAGE_SIZE)
+	{
+		kaddr = (find_pa(vaddr) | PAGE_OFFSET);
+		if (kaddr >= kstart && kaddr <= kend)
+		{
+#ifdef DEBUG_CHECK_RANGE
+			srm_printk("OVERLAP: vaddr 0x%lx kaddr 0x%lx"
+				   " [0x%lx:0x%lx]\n",
+				   vaddr, kaddr, kstart, kend);
+#endif
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * This function moves into OSF/1 pal-code, and has a temporary
+ * PCB for that. The kernel proper should replace this PCB with
+ * the real one as soon as possible.
+ *
+ * The page table muckery in here depends on the fact that the boot
+ * code has the L1 page table identity-map itself in the second PTE
+ * in the L1 page table. Thus the L1-page is virtually addressable
+ * itself (through three levels) at virtual address 0x200802000.
+ */
+
+#define L1	((unsigned long *) 0x200802000)
+
+void
+pal_init(void)
+{
+	unsigned long i, rev;
+	struct percpu_struct * percpu;
+	struct pcb_struct * pcb_pa;
+
+	/* Create the dummy PCB.  */
+	pcb_va->ksp = 0;
+	pcb_va->usp = 0;
+	pcb_va->ptbr = L1[1] >> 32;
+	pcb_va->asn = 0;
+	pcb_va->pcc = 0;
+	pcb_va->unique = 0;
+	pcb_va->flags = 1;
+	pcb_va->res1 = 0;
+	pcb_va->res2 = 0;
+	pcb_pa = (struct pcb_struct *)find_pa((unsigned long)pcb_va);
+
+	/*
+	 * a0 = 2 (OSF)
+	 * a1 = return address, but we give the asm the vaddr of the PCB
+	 * a2 = physical addr of PCB
+	 * a3 = new virtual page table pointer
+	 * a4 = KSP (but the asm sets it)
+	 */
+	srm_printk("Switching to OSF PAL-code... ");
+
+	i = switch_to_osf_pal(2, pcb_va, pcb_pa, VPTB);
+	if (i) {
+		srm_printk("failed, code %ld\n", i);
+		__halt();
+	}
+
+	percpu = (struct percpu_struct *)
+		(INIT_HWRPB->processor_offset + (unsigned long) INIT_HWRPB);
+	rev = percpu->pal_revision = percpu->palcode_avail[2];
+
+	srm_printk("OK (rev %lx)\n", rev);
+
+	tbia(); /* do it directly in case we are SMP */
+}
+
+/*
+ * Start the kernel.
+ */
+static inline void
+runkernel(void)
+{
+	__asm__ __volatile__(
+		"bis %0,%0,$27\n\t"
+		"jmp ($27)"
+		: /* no outputs: it doesn't even return */
+		: "r" (START_ADDR));
+}
+
+/* Must record the SP (it is virtual) on entry, so we can make sure
+   not to overwrite it during movement or decompression. */
+unsigned long SP_on_entry;
+
+/* Calculate the kernel image address based on the end of the BOOTP
+   bootstrapper (ie this program).
+*/
+extern char _end;
+#define KERNEL_ORIGIN \
+	((((unsigned long)&_end) + 511) & ~511)
+
+/* Round address to next higher page boundary. */
+#define NEXT_PAGE(a)	(((a) | (PAGE_SIZE - 1)) + 1)
+
+#ifdef INITRD_IMAGE_SIZE
+# define REAL_INITRD_SIZE INITRD_IMAGE_SIZE
+#else
+# define REAL_INITRD_SIZE 0
+#endif
+
+/* Defines from include/asm-alpha/system.h
+
+	BOOT_ADDR	Virtual address at which the consoles loads
+			the BOOTP image.
+
+	KERNEL_START    KSEG address at which the kernel is built to run,
+			which includes some initial data pages before the
+			code.
+
+	START_ADDR	KSEG address of the entry point of kernel code.
+
+	ZERO_PGE	KSEG address of page full of zeroes, but 
+			upon entry to kernel, it can be expected
+			to hold the parameter list and possible
+			INTRD information.
+
+   These are used in the local defines below.
+*/
+  
+
+/* Virtual addresses for the BOOTP image. Note that this includes the
+   bootstrapper code as well as the compressed kernel image, and
+   possibly the INITRD image.
+
+   Oh, and do NOT forget the STACK, which appears to be placed virtually
+   beyond the end of the loaded image.
+*/
+#define V_BOOT_IMAGE_START	BOOT_ADDR
+#define V_BOOT_IMAGE_END	SP_on_entry
+
+/* Virtual addresses for just the bootstrapper part of the BOOTP image. */
+#define V_BOOTSTRAPPER_START	BOOT_ADDR
+#define V_BOOTSTRAPPER_END	KERNEL_ORIGIN
+
+/* Virtual addresses for just the data part of the BOOTP
+   image. This may also include the INITRD image, but always
+   includes the STACK.
+*/
+#define V_DATA_START		KERNEL_ORIGIN
+#define V_INITRD_START		(KERNEL_ORIGIN + KERNEL_Z_SIZE)
+#define V_INTRD_END		(V_INITRD_START + REAL_INITRD_SIZE)
+#define V_DATA_END	 	V_BOOT_IMAGE_END
+
+/* KSEG addresses for the uncompressed kernel.
+
+   Note that the end address includes workspace for the decompression.
+   Note also that the DATA_START address is ZERO_PGE, to which we write
+   just before jumping to the kernel image at START_ADDR.
+ */
+#define K_KERNEL_DATA_START	ZERO_PGE
+#define K_KERNEL_IMAGE_START	START_ADDR
+#define K_KERNEL_IMAGE_END	(START_ADDR + KERNEL_SIZE)
+
+/* Define to where we may have to decompress the kernel image, before
+   we move it to the final position, in case of overlap. This will be
+   above the final position of the kernel.
+
+   Regardless of overlap, we move the INITRD image to the end of this
+   copy area, because there needs to be a buffer area after the kernel
+   for "bootmem" anyway.
+*/
+#define K_COPY_IMAGE_START	NEXT_PAGE(K_KERNEL_IMAGE_END)
+/* Reserve one page below INITRD for the new stack. */
+#define K_INITRD_START \
+    NEXT_PAGE(K_COPY_IMAGE_START + KERNEL_SIZE + PAGE_SIZE)
+#define K_COPY_IMAGE_END \
+    (K_INITRD_START + REAL_INITRD_SIZE + MALLOC_AREA_SIZE)
+#define K_COPY_IMAGE_SIZE \
+    NEXT_PAGE(K_COPY_IMAGE_END - K_COPY_IMAGE_START)
+
+void
+start_kernel(void)
+{
+	int must_move = 0;
+
+	/* Initialize these for the decompression-in-place situation,
+	   which is the smallest amount of work and most likely to
+	   occur when using the normal START_ADDR of the kernel
+	   (currently set to 16MB, to clear all console code.
+	*/
+	unsigned long uncompressed_image_start = K_KERNEL_IMAGE_START;
+	unsigned long uncompressed_image_end = K_KERNEL_IMAGE_END;
+
+	unsigned long initrd_image_start = K_INITRD_START;
+
+	/*
+	 * Note that this crufty stuff with static and envval
+	 * and envbuf is because:
+	 *
+	 * 1. Frequently, the stack is short, and we don't want to overrun;
+	 * 2. Frequently the stack is where we are going to copy the kernel to;
+	 * 3. A certain SRM console required the GET_ENV output to stack.
+	 *    ??? A comment in the aboot sources indicates that the GET_ENV
+	 *    destination must be quadword aligned.  Might this explain the
+	 *    behaviour, rather than requiring output to the stack, which
+	 *    seems rather far-fetched.
+	 */
+	static long nbytes;
+	static char envval[256] __attribute__((aligned(8)));
+	register unsigned long asm_sp asm("30");
+
+	SP_on_entry = asm_sp;
+
+	srm_printk("Linux/Alpha BOOTPZ Loader for Linux " UTS_RELEASE "\n");
+
+	/* Validity check the HWRPB. */
+	if (INIT_HWRPB->pagesize != 8192) {
+		srm_printk("Expected 8kB pages, got %ldkB\n",
+		           INIT_HWRPB->pagesize >> 10);
+		return;
+	}
+	if (INIT_HWRPB->vptb != (unsigned long) VPTB) {
+		srm_printk("Expected vptb at %p, got %p\n",
+			   VPTB, (void *)INIT_HWRPB->vptb);
+		return;
+	}
+
+	/* PALcode (re)initialization. */
+	pal_init();
+
+	/* Get the parameter list from the console environment variable. */
+	nbytes = callback_getenv(ENV_BOOTED_OSFLAGS, envval, sizeof(envval));
+	if (nbytes < 0 || nbytes >= sizeof(envval)) {
+		nbytes = 0;
+	}
+	envval[nbytes] = '\0';
+
+#ifdef DEBUG_ADDRESSES
+	srm_printk("START_ADDR 0x%lx\n", START_ADDR);
+	srm_printk("KERNEL_ORIGIN 0x%lx\n", KERNEL_ORIGIN);
+	srm_printk("KERNEL_SIZE 0x%x\n", KERNEL_SIZE);
+	srm_printk("KERNEL_Z_SIZE 0x%x\n", KERNEL_Z_SIZE);
+#endif
+
+	/* Since all the SRM consoles load the BOOTP image at virtual
+	 * 0x20000000, we have to ensure that the physical memory
+	 * pages occupied by that image do NOT overlap the physical
+	 * address range where the kernel wants to be run.  This
+	 * causes real problems when attempting to cdecompress the
+	 * former into the latter... :-(
+	 *
+	 * So, we may have to decompress/move the kernel/INITRD image
+	 * virtual-to-physical someplace else first before moving
+	 * kernel /INITRD to their final resting places... ;-}
+	 *
+	 * Sigh...
+	 */
+
+	/* First, check to see if the range of addresses occupied by
+	   the bootstrapper part of the BOOTP image include any of the
+	   physical pages into which the kernel will be placed for
+	   execution.
+
+	   We only need check on the final kernel image range, since we
+	   will put the INITRD someplace that we can be sure is not
+	   in conflict.
+	 */
+	if (check_range(V_BOOTSTRAPPER_START, V_BOOTSTRAPPER_END,
+			K_KERNEL_DATA_START, K_KERNEL_IMAGE_END))
+	{
+		srm_printk("FATAL ERROR: overlap of bootstrapper code\n");
+		__halt();
+	}
+
+	/* Next, check to see if the range of addresses occupied by
+	   the compressed kernel/INITRD/stack portion of the BOOTP
+	   image include any of the physical pages into which the
+	   decompressed kernel or the INITRD will be placed for
+	   execution.
+	 */
+	if (check_range(V_DATA_START, V_DATA_END,
+			K_KERNEL_IMAGE_START, K_COPY_IMAGE_END))
+	{
+#ifdef DEBUG_ADDRESSES
+		srm_printk("OVERLAP: cannot decompress in place\n");
+#endif
+		uncompressed_image_start = K_COPY_IMAGE_START;
+		uncompressed_image_end = K_COPY_IMAGE_END;
+		must_move = 1;
+
+		/* Finally, check to see if the range of addresses
+		   occupied by the compressed kernel/INITRD part of
+		   the BOOTP image include any of the physical pages
+		   into which that part is to be copied for
+		   decompression.
+		*/
+		while (check_range(V_DATA_START, V_DATA_END,
+				   uncompressed_image_start,
+				   uncompressed_image_end))
+		{
+#if 0
+			uncompressed_image_start += K_COPY_IMAGE_SIZE;
+			uncompressed_image_end += K_COPY_IMAGE_SIZE;
+			initrd_image_start += K_COPY_IMAGE_SIZE;
+#else
+			/* Keep as close as possible to end of BOOTP image. */
+			uncompressed_image_start += PAGE_SIZE;
+			uncompressed_image_end += PAGE_SIZE;
+			initrd_image_start += PAGE_SIZE;
+#endif
+		}
+	}
+
+	srm_printk("Starting to load the kernel with args '%s'\n", envval);
+
+#ifdef DEBUG_ADDRESSES
+	srm_printk("Decompressing the kernel...\n"
+		   "...from 0x%lx to 0x%lx size 0x%x\n",
+		   V_DATA_START,
+		   uncompressed_image_start,
+		   KERNEL_SIZE);
+#endif
+        decompress_kernel((void *)uncompressed_image_start,
+			  (void *)V_DATA_START,
+			  KERNEL_SIZE, KERNEL_Z_SIZE);
+
+	/*
+	 * Now, move things to their final positions, if/as required.
+	 */
+
+#ifdef INITRD_IMAGE_SIZE
+
+	/* First, we always move the INITRD image, if present. */
+#ifdef DEBUG_ADDRESSES
+	srm_printk("Moving the INITRD image...\n"
+		   " from 0x%lx to 0x%lx size 0x%x\n",
+		   V_INITRD_START,
+		   initrd_image_start,
+		   INITRD_IMAGE_SIZE);
+#endif
+	memcpy((void *)initrd_image_start, (void *)V_INITRD_START,
+	       INITRD_IMAGE_SIZE);
+
+#endif /* INITRD_IMAGE_SIZE */
+
+	/* Next, we may have to move the uncompressed kernel to the
+	   final destination.
+	 */
+	if (must_move) {
+#ifdef DEBUG_ADDRESSES
+		srm_printk("Moving the uncompressed kernel...\n"
+			   "...from 0x%lx to 0x%lx size 0x%x\n",
+			   uncompressed_image_start,
+			   K_KERNEL_IMAGE_START,
+			   (unsigned)KERNEL_SIZE);
+#endif
+		/*
+		 * Move the stack to a safe place to ensure it won't be
+		 * overwritten by kernel image.
+		 */
+		move_stack(initrd_image_start - PAGE_SIZE);
+
+		memcpy((void *)K_KERNEL_IMAGE_START,
+		       (void *)uncompressed_image_start, KERNEL_SIZE);
+	}
+	
+	/* Clear the zero page, then move the argument list in. */
+#ifdef DEBUG_LAST_STEPS
+	srm_printk("Preparing ZERO_PGE...\n");
+#endif
+	memset((char*)ZERO_PGE, 0, PAGE_SIZE);
+	strcpy((char*)ZERO_PGE, envval);
+
+#ifdef INITRD_IMAGE_SIZE
+
+#ifdef DEBUG_LAST_STEPS
+	srm_printk("Preparing INITRD info...\n");
+#endif
+	/* Finally, set the INITRD paramenters for the kernel. */
+	((long *)(ZERO_PGE+256))[0] = initrd_image_start;
+	((long *)(ZERO_PGE+256))[1] = INITRD_IMAGE_SIZE;
+
+#endif /* INITRD_IMAGE_SIZE */
+
+#ifdef DEBUG_LAST_STEPS
+	srm_printk("Doing 'runkernel()'...\n");
+#endif
+	runkernel();
+}
+
+ /* dummy function, should never be called. */
+void *__kmalloc(size_t size, gfp_t flags)
+{
+	return (void *)NULL;
+}
diff --git a/arch/alpha/boot/head.S b/arch/alpha/boot/head.S
new file mode 100644
index 0000000000..06a7c95fe9
--- /dev/null
+++ b/arch/alpha/boot/head.S
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/boot/head.S
+ *
+ * initial bootloader stuff..
+ */
+
+#include <asm/pal.h>
+
+	.set noreorder
+	.globl	__start
+	.ent	__start
+__start:
+	br	$29,2f
+2:	ldgp	$29,0($29)
+	jsr	$26,start_kernel
+	call_pal PAL_halt
+	.end __start
+
+	.align 5
+	.globl	wrent
+	.ent	wrent
+wrent:
+	.prologue 0
+	call_pal PAL_wrent
+	ret ($26)
+	.end wrent
+
+	.align 5
+	.globl	wrkgp
+	.ent	wrkgp
+wrkgp:
+	.prologue 0
+	call_pal PAL_wrkgp
+	ret ($26)
+	.end wrkgp
+
+	.align 5
+	.globl	switch_to_osf_pal
+	.ent	switch_to_osf_pal
+switch_to_osf_pal:
+	subq	$30,128,$30
+	.frame	$30,128,$26
+	stq	$26,0($30)
+	stq	$1,8($30)
+	stq	$2,16($30)
+	stq	$3,24($30)
+	stq	$4,32($30)
+	stq	$5,40($30)
+	stq	$6,48($30)
+	stq	$7,56($30)
+	stq	$8,64($30)
+	stq	$9,72($30)
+	stq	$10,80($30)
+	stq	$11,88($30)
+	stq	$12,96($30)
+	stq	$13,104($30)
+	stq	$14,112($30)
+	stq	$15,120($30)
+	.prologue 0
+
+	stq	$30,0($17)	/* save KSP in PCB */
+
+	bis	$30,$30,$20	/* a4 = KSP */
+	br	$17,1f
+
+	ldq	$26,0($30)
+	ldq	$1,8($30)
+	ldq	$2,16($30)
+	ldq	$3,24($30)
+	ldq	$4,32($30)
+	ldq	$5,40($30)
+	ldq	$6,48($30)
+	ldq	$7,56($30)
+	ldq	$8,64($30)
+	ldq	$9,72($30)
+	ldq	$10,80($30)
+	ldq	$11,88($30)
+	ldq	$12,96($30)
+	ldq	$13,104($30)
+	ldq	$14,112($30)
+	ldq	$15,120($30)
+	addq	$30,128,$30
+	ret ($26)
+1:	call_pal PAL_swppal
+	.end	switch_to_osf_pal
+
+	.align 3
+	.globl	tbi
+	.ent	tbi
+tbi:
+	.prologue 0
+	call_pal PAL_tbi
+	ret	($26)
+	.end tbi
+
+	.align 3
+	.globl	halt
+	.ent	halt
+halt:
+	.prologue 0
+	call_pal PAL_halt
+	.end halt
+
+/* $16 - new stack page */
+	.align 3
+	.globl	move_stack
+	.ent	move_stack
+move_stack:
+	.prologue 0
+	lda	$0, 0x1fff($31)
+	and	$0, $30, $1			/* Stack offset */
+	or	$1, $16, $16			/* New stack pointer */
+	mov	$30, $1
+	mov	$16, $2
+1:	ldq	$3, 0($1)			/* Move the stack */
+	addq	$1, 8, $1
+	stq	$3, 0($2)
+	and	$0, $1, $4
+	addq	$2, 8, $2
+	bne	$4, 1b
+	mov	$16, $30
+	ret	($26)
+	.end move_stack
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
new file mode 100644
index 0000000000..e5347a0800
--- /dev/null
+++ b/arch/alpha/boot/main.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/alpha/boot/main.c
+ *
+ * Copyright (C) 1994, 1995 Linus Torvalds
+ *
+ * This file is the bootloader for the Linux/AXP kernel
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <generated/utsrelease.h>
+#include <linux/mm.h>
+
+#include <asm/console.h>
+#include <asm/hwrpb.h>
+
+#include <stdarg.h>
+
+#include "ksize.h"
+
+extern unsigned long switch_to_osf_pal(unsigned long nr,
+	struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
+	unsigned long *vptb);
+struct hwrpb_struct *hwrpb = INIT_HWRPB;
+static struct pcb_struct pcb_va[1];
+
+/*
+ * Find a physical address of a virtual object..
+ *
+ * This is easy using the virtual page table address.
+ */
+
+static inline void *
+find_pa(unsigned long *vptb, void *ptr)
+{
+	unsigned long address = (unsigned long) ptr;
+	unsigned long result;
+
+	result = vptb[address >> 13];
+	result >>= 32;
+	result <<= 13;
+	result |= address & 0x1fff;
+	return (void *) result;
+}	
+
+/*
+ * This function moves into OSF/1 pal-code, and has a temporary
+ * PCB for that. The kernel proper should replace this PCB with
+ * the real one as soon as possible.
+ *
+ * The page table muckery in here depends on the fact that the boot
+ * code has the L1 page table identity-map itself in the second PTE
+ * in the L1 page table. Thus the L1-page is virtually addressable
+ * itself (through three levels) at virtual address 0x200802000.
+ */
+
+#define VPTB	((unsigned long *) 0x200000000)
+#define L1	((unsigned long *) 0x200802000)
+
+void
+pal_init(void)
+{
+	unsigned long i, rev;
+	struct percpu_struct * percpu;
+	struct pcb_struct * pcb_pa;
+
+	/* Create the dummy PCB.  */
+	pcb_va->ksp = 0;
+	pcb_va->usp = 0;
+	pcb_va->ptbr = L1[1] >> 32;
+	pcb_va->asn = 0;
+	pcb_va->pcc = 0;
+	pcb_va->unique = 0;
+	pcb_va->flags = 1;
+	pcb_va->res1 = 0;
+	pcb_va->res2 = 0;
+	pcb_pa = find_pa(VPTB, pcb_va);
+
+	/*
+	 * a0 = 2 (OSF)
+	 * a1 = return address, but we give the asm the vaddr of the PCB
+	 * a2 = physical addr of PCB
+	 * a3 = new virtual page table pointer
+	 * a4 = KSP (but the asm sets it)
+	 */
+	srm_printk("Switching to OSF PAL-code .. ");
+
+	i = switch_to_osf_pal(2, pcb_va, pcb_pa, VPTB);
+	if (i) {
+		srm_printk("failed, code %ld\n", i);
+		__halt();
+	}
+
+	percpu = (struct percpu_struct *)
+		(INIT_HWRPB->processor_offset + (unsigned long) INIT_HWRPB);
+	rev = percpu->pal_revision = percpu->palcode_avail[2];
+
+	srm_printk("Ok (rev %lx)\n", rev);
+
+	tbia(); /* do it directly in case we are SMP */
+}
+
+static inline long openboot(void)
+{
+	char bootdev[256];
+	long result;
+
+	result = callback_getenv(ENV_BOOTED_DEV, bootdev, 255);
+	if (result < 0)
+		return result;
+	return callback_open(bootdev, result & 255);
+}
+
+static inline long close(long dev)
+{
+	return callback_close(dev);
+}
+
+static inline long load(long dev, unsigned long addr, unsigned long count)
+{
+	char bootfile[256];
+	extern char _end;
+	long result, boot_size = &_end - (char *) BOOT_ADDR;
+
+	result = callback_getenv(ENV_BOOTED_FILE, bootfile, 255);
+	if (result < 0)
+		return result;
+	result &= 255;
+	bootfile[result] = '\0';
+	if (result)
+		srm_printk("Boot file specification (%s) not implemented\n",
+		       bootfile);
+	return callback_read(dev, count, (void *)addr, boot_size/512 + 1);
+}
+
+/*
+ * Start the kernel.
+ */
+static void runkernel(void)
+{
+	__asm__ __volatile__(
+		"bis %1,%1,$30\n\t"
+		"bis %0,%0,$26\n\t"
+		"ret ($26)"
+		: /* no outputs: it doesn't even return */
+		: "r" (START_ADDR),
+		  "r" (PAGE_SIZE + INIT_STACK));
+}
+
+void start_kernel(void)
+{
+	long i;
+	long dev;
+	int nbytes;
+	char envval[256];
+
+	srm_printk("Linux/AXP bootloader for Linux " UTS_RELEASE "\n");
+	if (INIT_HWRPB->pagesize != 8192) {
+		srm_printk("Expected 8kB pages, got %ldkB\n", INIT_HWRPB->pagesize >> 10);
+		return;
+	}
+	pal_init();
+	dev = openboot();
+	if (dev < 0) {
+		srm_printk("Unable to open boot device: %016lx\n", dev);
+		return;
+	}
+	dev &= 0xffffffff;
+	srm_printk("Loading vmlinux ...");
+	i = load(dev, START_ADDR, KERNEL_SIZE);
+	close(dev);
+	if (i != KERNEL_SIZE) {
+		srm_printk("Failed (%lx)\n", i);
+		return;
+	}
+
+	nbytes = callback_getenv(ENV_BOOTED_OSFLAGS, envval, sizeof(envval));
+	if (nbytes < 0) {
+		nbytes = 0;
+	}
+	envval[nbytes] = '\0';
+	strcpy((char*)ZERO_PGE, envval);
+
+	srm_printk(" Ok\nNow booting the kernel\n");
+	runkernel();
+	for (i = 0 ; i < 0x100000000 ; i++)
+		/* nothing */;
+	__halt();
+}
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
new file mode 100644
index 0000000000..325d4dd4f9
--- /dev/null
+++ b/arch/alpha/boot/misc.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * misc.c
+ * 
+ * This is a collection of several routines from gzip-1.0.3 
+ * adapted for Linux.
+ *
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ *
+ * Modified for ARM Linux by Russell King
+ *
+ * Nicolas Pitre <nico@visuaide.com>  1999/04/14 :
+ *  For this code to run directly from Flash, all constant variables must
+ *  be marked with 'const' and all other variables initialized at run-time 
+ *  only.  This way all non constant variables will end up in the bss segment,
+ *  which should point to addresses in RAM and cleared to 0 on start.
+ *  This allows for a much quicker boot time.
+ *
+ * Modified for Alpha, from the ARM version, by Jay Estabrook 2003.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include <linux/uaccess.h>
+
+#define memzero(s,n)	memset ((s),0,(n))
+#define puts		srm_printk
+extern long srm_printk(const char *, ...)
+     __attribute__ ((format (printf, 1, 2)));
+
+/*
+ * gzip declarations
+ */
+#define OF(args)  args
+#define STATIC static
+
+typedef unsigned char  uch;
+typedef unsigned short ush;
+typedef unsigned long  ulg;
+
+#define WSIZE 0x8000		/* Window size must be at least 32k, */
+				/* and a power of two */
+
+static uch *inbuf;		/* input buffer */
+static uch *window;		/* Sliding window buffer */
+
+static unsigned insize;		/* valid bytes in inbuf */
+static unsigned inptr;		/* index of next byte to be processed in inbuf */
+static unsigned outcnt;		/* bytes in output buffer */
+
+/* gzip flag byte */
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
+#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
+#define COMMENT      0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
+#define RESERVED     0xC0 /* bit 6,7:   reserved */
+
+#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond,msg) {if(!(cond)) error(msg);}
+#  define Trace(x) fprintf x
+#  define Tracev(x) {if (verbose) fprintf x ;}
+#  define Tracevv(x) {if (verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+static int  fill_inbuf(void);
+static void flush_window(void);
+static void error(char *m);
+
+static char *input_data;
+static int  input_data_size;
+
+static uch *output_data;
+static ulg output_ptr;
+static ulg bytes_out;
+
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+
+extern int end;
+static ulg free_mem_ptr;
+static ulg free_mem_end_ptr;
+
+#define HEAP_SIZE 0x3000
+
+#include "../../../lib/inflate.c"
+
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+int fill_inbuf(void)
+{
+	if (insize != 0)
+		error("ran out of input data");
+
+	inbuf = input_data;
+	insize = input_data_size;
+
+	inptr = 1;
+	return inbuf[0];
+}
+
+/* ===========================================================================
+ * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+ * (Used for the decompressed data only.)
+ */
+void flush_window(void)
+{
+	ulg c = crc;
+	unsigned n;
+	uch *in, *out, ch;
+
+	in = window;
+	out = &output_data[output_ptr];
+	for (n = 0; n < outcnt; n++) {
+		ch = *out++ = *in++;
+		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+	}
+	crc = c;
+	bytes_out += (ulg)outcnt;
+	output_ptr += (ulg)outcnt;
+	outcnt = 0;
+/*	puts("."); */
+}
+
+static void error(char *x)
+{
+	puts("\n\n");
+	puts(x);
+	puts("\n\n -- System halted");
+
+	while(1);	/* Halt */
+}
+
+unsigned int
+decompress_kernel(void *output_start,
+		  void *input_start,
+		  size_t ksize,
+		  size_t kzsize)
+{
+	output_data		= (uch *)output_start;
+	input_data		= (uch *)input_start;
+	input_data_size		= kzsize; /* use compressed size */
+
+	/* FIXME FIXME FIXME */
+	free_mem_ptr		= (ulg)output_start + ksize;
+	free_mem_end_ptr	= (ulg)output_start + ksize + 0x200000;
+	/* FIXME FIXME FIXME */
+
+	/* put in temp area to reduce initial footprint */
+	window = malloc(WSIZE);
+
+	makecrc();
+/*	puts("Uncompressing Linux..."); */
+	gunzip();
+/*	puts(" done, booting the kernel.\n"); */
+	return output_ptr;
+}
diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c
new file mode 100644
index 0000000000..60f73ccd2e
--- /dev/null
+++ b/arch/alpha/boot/stdio.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+
+size_t strnlen(const char * s, size_t count)
+{
+	const char *sc;
+
+	for (sc = s; count-- && *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+
+# define do_div(n, base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	__rem = ((unsigned long long)(n)) % __base;			\
+	(n) = ((unsigned long long)(n)) / __base;			\
+	__rem;								\
+})
+
+
+static int skip_atoi(const char **s)
+{
+	int i, c;
+
+	for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+		i = i*10 + c - '0';
+	return i;
+}
+
+#define ZEROPAD	1		/* pad with zero */
+#define SIGN	2		/* unsigned/signed long */
+#define PLUS	4		/* show plus */
+#define SPACE	8		/* space if plus */
+#define LEFT	16		/* left justified */
+#define SPECIAL	32		/* 0x */
+#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
+{
+	char c,sign,tmp[66];
+	const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+	int i;
+
+	if (type & LARGE)
+		digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	if (type & LEFT)
+		type &= ~ZEROPAD;
+	if (base < 2 || base > 36)
+		return 0;
+	c = (type & ZEROPAD) ? '0' : ' ';
+	sign = 0;
+	if (type & SIGN) {
+		if ((signed long long)num < 0) {
+			sign = '-';
+			num = - (signed long long)num;
+			size--;
+		} else if (type & PLUS) {
+			sign = '+';
+			size--;
+		} else if (type & SPACE) {
+			sign = ' ';
+			size--;
+		}
+	}
+	if (type & SPECIAL) {
+		if (base == 16)
+			size -= 2;
+		else if (base == 8)
+			size--;
+	}
+	i = 0;
+	if (num == 0)
+		tmp[i++]='0';
+	else while (num != 0) {
+		tmp[i++] = digits[do_div(num, base)];
+	}
+	if (i > precision)
+		precision = i;
+	size -= precision;
+	if (!(type&(ZEROPAD+LEFT)))
+		while(size-->0)
+			*str++ = ' ';
+	if (sign)
+		*str++ = sign;
+	if (type & SPECIAL) {
+		if (base==8)
+			*str++ = '0';
+		else if (base==16) {
+			*str++ = '0';
+			*str++ = digits[33];
+		}
+	}
+	if (!(type & LEFT))
+		while (size-- > 0)
+			*str++ = c;
+	while (i < precision--)
+		*str++ = '0';
+	while (i-- > 0)
+		*str++ = tmp[i];
+	while (size-- > 0)
+		*str++ = ' ';
+	return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+	int len;
+	unsigned long long num;
+	int i, base;
+	char * str;
+	const char *s;
+
+	int flags;		/* flags to number() */
+
+	int field_width;	/* width of output field */
+	int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+	                        /* 'z' support added 23/7/1999 S.H.    */
+				/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+
+	for (str=buf ; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			*str++ = *fmt;
+			continue;
+		}
+
+		/* process flags */
+		flags = 0;
+		repeat:
+			++fmt;		/* this also skips first '%' */
+			switch (*fmt) {
+				case '-': flags |= LEFT; goto repeat;
+				case '+': flags |= PLUS; goto repeat;
+				case ' ': flags |= SPACE; goto repeat;
+				case '#': flags |= SPECIAL; goto repeat;
+				case '0': flags |= ZEROPAD; goto repeat;
+				}
+
+		/* get field width */
+		field_width = -1;
+		if ('0' <= *fmt && *fmt <= '9')
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = va_arg(args, int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;
+			if ('0' <= *fmt && *fmt <= '9')
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = va_arg(args, int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'l' && *(fmt + 1) == 'l') {
+			qualifier = 'q';
+			fmt += 2;
+		} else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
+			|| *fmt == 'Z') {
+			qualifier = *fmt;
+			++fmt;
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+		case 'c':
+			if (!(flags & LEFT))
+				while (--field_width > 0)
+					*str++ = ' ';
+			*str++ = (unsigned char) va_arg(args, int);
+			while (--field_width > 0)
+				*str++ = ' ';
+			continue;
+
+		case 's':
+			s = va_arg(args, char *);
+			if (!s)
+				s = "<NULL>";
+
+			len = strnlen(s, precision);
+
+			if (!(flags & LEFT))
+				while (len < field_width--)
+					*str++ = ' ';
+			for (i = 0; i < len; ++i)
+				*str++ = *s++;
+			while (len < field_width--)
+				*str++ = ' ';
+			continue;
+
+		case 'p':
+			if (field_width == -1) {
+				field_width = 2*sizeof(void *);
+				flags |= ZEROPAD;
+			}
+			str = number(str,
+				(unsigned long) va_arg(args, void *), 16,
+				field_width, precision, flags);
+			continue;
+
+
+		case 'n':
+			if (qualifier == 'l') {
+				long * ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z') {
+				size_t * ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int * ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			continue;
+
+		case '%':
+			*str++ = '%';
+			continue;
+
+		/* integer number formats - set up the flags and "break" */
+		case 'o':
+			base = 8;
+			break;
+
+		case 'X':
+			flags |= LARGE;
+		case 'x':
+			base = 16;
+			break;
+
+		case 'd':
+		case 'i':
+			flags |= SIGN;
+		case 'u':
+			break;
+
+		default:
+			*str++ = '%';
+			if (*fmt)
+				*str++ = *fmt;
+			else
+				--fmt;
+			continue;
+		}
+		if (qualifier == 'l') {
+			num = va_arg(args, unsigned long);
+			if (flags & SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'q') {
+			num = va_arg(args, unsigned long long);
+			if (flags & SIGN)
+				num = (signed long long) num;
+		} else if (qualifier == 'Z') {
+			num = va_arg(args, size_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) va_arg(args, int);
+			if (flags & SIGN)
+				num = (signed short) num;
+		} else {
+			num = va_arg(args, unsigned int);
+			if (flags & SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, num, base, field_width, precision, flags);
+	}
+	*str = '\0';
+	return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i=vsprintf(buf,fmt,args);
+	va_end(args);
+	return i;
+}
diff --git a/arch/alpha/boot/tools/mkbb.c b/arch/alpha/boot/tools/mkbb.c
new file mode 100644
index 0000000000..fc47f33f8a
--- /dev/null
+++ b/arch/alpha/boot/tools/mkbb.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/* This utility makes a bootblock suitable for the SRM console/miniloader */
+
+/* Usage:
+ *	mkbb <device> <lxboot>
+ *
+ * Where <device> is the name of the device to install the bootblock on,
+ * and <lxboot> is the name of a bootblock to merge in.  This bootblock
+ * contains the offset and size of the bootloader.  It must be exactly
+ * 512 bytes long.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Minimal definition of disklabel, so we don't have to include
+ * asm/disklabel.h (confuses make)
+ */
+#ifndef MAXPARTITIONS
+#define MAXPARTITIONS   8                       /* max. # of partitions */
+#endif
+
+#ifndef u8
+#define u8 unsigned char
+#endif
+
+#ifndef u16
+#define u16 unsigned short
+#endif
+
+#ifndef u32
+#define u32 unsigned int
+#endif
+
+struct disklabel {
+    u32	d_magic;				/* must be DISKLABELMAGIC */
+    u16	d_type, d_subtype;
+    u8	d_typename[16];
+    u8	d_packname[16];
+    u32	d_secsize;
+    u32	d_nsectors;
+    u32	d_ntracks;
+    u32	d_ncylinders;
+    u32	d_secpercyl;
+    u32	d_secprtunit;
+    u16	d_sparespertrack;
+    u16	d_sparespercyl;
+    u32	d_acylinders;
+    u16	d_rpm, d_interleave, d_trackskew, d_cylskew;
+    u32	d_headswitch, d_trkseek, d_flags;
+    u32	d_drivedata[5];
+    u32	d_spare[5];
+    u32	d_magic2;				/* must be DISKLABELMAGIC */
+    u16	d_checksum;
+    u16	d_npartitions;
+    u32	d_bbsize, d_sbsize;
+    struct d_partition {
+	u32	p_size;
+	u32	p_offset;
+	u32	p_fsize;
+	u8	p_fstype;
+	u8	p_frag;
+	u16	p_cpg;
+    } d_partitions[MAXPARTITIONS];
+};
+
+
+typedef union __bootblock {
+    struct {
+        char			__pad1[64];
+        struct disklabel	__label;
+    } __u1;
+    struct {
+	unsigned long		__pad2[63];
+	unsigned long		__checksum;
+    } __u2;
+    char		bootblock_bytes[512];
+    unsigned long	bootblock_quadwords[64];
+} bootblock;
+
+#define	bootblock_label		__u1.__label
+#define bootblock_checksum	__u2.__checksum
+
+int main(int argc, char ** argv)
+{
+    bootblock		bootblock_from_disk;
+    bootblock		bootloader_image;
+    int			dev, fd;
+    int			i;
+    int			nread;
+
+    /* Make sure of the arg count */
+    if(argc != 3) {
+	fprintf(stderr, "Usage: %s device lxboot\n", argv[0]);
+	exit(0);
+    }
+
+    /* First, open the device and make sure it's accessible */
+    dev = open(argv[1], O_RDWR);
+    if(dev < 0) {
+	perror(argv[1]);
+	exit(0);
+    }
+
+    /* Now open the lxboot and make sure it's reasonable */
+    fd = open(argv[2], O_RDONLY);
+    if(fd < 0) {
+	perror(argv[2]);
+	close(dev);
+	exit(0);
+    }
+
+    /* Read in the lxboot */
+    nread = read(fd, &bootloader_image, sizeof(bootblock));
+    if(nread != sizeof(bootblock)) {
+	perror("lxboot read");
+	fprintf(stderr, "expected %zd, got %d\n", sizeof(bootblock), nread);
+	exit(0);
+    }
+
+    /* Read in the bootblock from disk. */
+    nread = read(dev, &bootblock_from_disk, sizeof(bootblock));
+    if(nread != sizeof(bootblock)) {
+	perror("bootblock read");
+	fprintf(stderr, "expected %zd, got %d\n", sizeof(bootblock), nread);
+	exit(0);
+    }
+
+    /* Swap the bootblock's disklabel into the bootloader */
+    bootloader_image.bootblock_label = bootblock_from_disk.bootblock_label;
+
+    /* Calculate the bootblock checksum */
+    bootloader_image.bootblock_checksum = 0;
+    for(i = 0; i < 63; i++) {
+	bootloader_image.bootblock_checksum += 
+			bootloader_image.bootblock_quadwords[i];
+    }
+
+    /* Write the whole thing out! */
+    lseek(dev, 0L, SEEK_SET);
+    if(write(dev, &bootloader_image, sizeof(bootblock)) != sizeof(bootblock)) {
+	perror("bootblock write");
+	exit(0);
+    }
+
+    close(fd);
+    close(dev);
+    exit(0);
+}
+
+
diff --git a/arch/alpha/boot/tools/objstrip.c b/arch/alpha/boot/tools/objstrip.c
new file mode 100644
index 0000000000..08b430d25a
--- /dev/null
+++ b/arch/alpha/boot/tools/objstrip.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/alpha/boot/tools/objstrip.c
+ *
+ * Strip the object file headers/trailers from an executable (ELF or ECOFF).
+ *
+ * Copyright (C) 1996 David Mosberger-Tang.
+ */
+/*
+ * Converts an ECOFF or ELF object file into a bootable file.  The
+ * object file must be a OMAGIC file (i.e., data and bss follow immediately
+ * behind the text).  See DEC "Assembly Language Programmer's Guide"
+ * documentation for details.  The SRM boot process is documented in
+ * the Alpha AXP Architecture Reference Manual, Second Edition by
+ * Richard L. Sites and Richard T. Witek.
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <linux/a.out.h>
+#include <linux/coff.h>
+#include <linux/param.h>
+#ifdef __ELF__
+# include <linux/elf.h>
+# define elfhdr elf64_hdr
+# define elf_phdr elf64_phdr
+# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
+#endif
+
+/* bootfile size must be multiple of BLOCK_SIZE: */
+#define BLOCK_SIZE	512
+
+const char * prog_name;
+
+
+static void
+usage (void)
+{
+    fprintf(stderr,
+	    "usage: %s [-v] -p file primary\n"
+	    "       %s [-vb] file [secondary]\n", prog_name, prog_name);
+    exit(1);
+}
+
+
+int
+main (int argc, char *argv[])
+{
+    size_t nwritten, tocopy, n, mem_size, fil_size, pad = 0;
+    int fd, ofd, i, j, verbose = 0, primary = 0;
+    char buf[8192], *inname;
+    struct exec * aout;		/* includes file & aout header */
+    long offset;
+#ifdef __ELF__
+    struct elfhdr *elf;
+    struct elf_phdr *elf_phdr;	/* program header */
+    unsigned long long e_entry;
+#endif
+
+    prog_name = argv[0];
+
+    for (i = 1; i < argc && argv[i][0] == '-'; ++i) {
+	for (j = 1; argv[i][j]; ++j) {
+	    switch (argv[i][j]) {
+	      case 'v':
+		  verbose = ~verbose;
+		  break;
+
+	      case 'b':
+		  pad = BLOCK_SIZE;
+		  break;
+
+	      case 'p':
+		  primary = 1;		/* make primary bootblock */
+		  break;
+	    }
+	}
+    }
+
+    if (i >= argc) {
+	usage();
+    }
+    inname = argv[i++];
+
+    fd = open(inname, O_RDONLY);
+    if (fd == -1) {
+	perror("open");
+	exit(1);
+    }
+
+    ofd = 1;
+    if (i < argc) {
+	ofd = open(argv[i++], O_WRONLY | O_CREAT | O_TRUNC, 0666);
+	if (ofd == -1) {
+	    perror("open");
+	    exit(1);
+	}
+    }
+
+    if (primary) {
+	/* generate bootblock for primary loader */
+	
+	unsigned long bb[64], sum = 0;
+	struct stat st;
+	off_t size;
+	int i;
+
+	if (ofd == 1) {
+	    usage();
+	}
+
+	if (fstat(fd, &st) == -1) {
+	    perror("fstat");
+	    exit(1);
+	}
+
+	size = (st.st_size + BLOCK_SIZE - 1) & ~(BLOCK_SIZE - 1);
+	memset(bb, 0, sizeof(bb));
+	strcpy((char *) bb, "Linux SRM bootblock");
+	bb[60] = size / BLOCK_SIZE;	/* count */
+	bb[61] = 1;			/* starting sector # */
+	bb[62] = 0;			/* flags---must be 0 */
+	for (i = 0; i < 63; ++i) {
+	    sum += bb[i];
+	}
+	bb[63] = sum;
+	if (write(ofd, bb, sizeof(bb)) != sizeof(bb)) {
+	    perror("boot-block write");
+	    exit(1);
+	}
+	printf("%lu\n", size);
+	return 0;
+    }
+
+    /* read and inspect exec header: */
+
+    if (read(fd, buf, sizeof(buf)) < 0) {
+	perror("read");
+	exit(1);
+    }
+
+#ifdef __ELF__
+    elf = (struct elfhdr *) buf;
+
+    if (elf->e_ident[0] == 0x7f && str_has_prefix((char *)elf->e_ident + 1, "ELF")) {
+	if (elf->e_type != ET_EXEC) {
+	    fprintf(stderr, "%s: %s is not an ELF executable\n",
+		    prog_name, inname);
+	    exit(1);
+	}
+	if (!elf_check_arch(elf)) {
+	    fprintf(stderr, "%s: is not for this processor (e_machine=%d)\n",
+		    prog_name, elf->e_machine);
+	    exit(1);
+	}
+	if (elf->e_phnum != 1) {
+	    fprintf(stderr,
+		    "%s: %d program headers (forgot to link with -N?)\n",
+		    prog_name, elf->e_phnum);
+	}
+
+	e_entry = elf->e_entry;
+
+	lseek(fd, elf->e_phoff, SEEK_SET);
+	if (read(fd, buf, sizeof(*elf_phdr)) != sizeof(*elf_phdr)) {
+	    perror("read");
+	    exit(1);
+	}
+
+	elf_phdr = (struct elf_phdr *) buf;
+	offset	 = elf_phdr->p_offset;
+	mem_size = elf_phdr->p_memsz;
+	fil_size = elf_phdr->p_filesz;
+
+	/* work around ELF bug: */
+	if (elf_phdr->p_vaddr < e_entry) {
+	    unsigned long delta = e_entry - elf_phdr->p_vaddr;
+	    offset   += delta;
+	    mem_size -= delta;
+	    fil_size -= delta;
+	    elf_phdr->p_vaddr += delta;
+	}
+
+	if (verbose) {
+	    fprintf(stderr, "%s: extracting %#016lx-%#016lx (at %lx)\n",
+		    prog_name, (long) elf_phdr->p_vaddr,
+		    elf_phdr->p_vaddr + fil_size, offset);
+	}
+    } else
+#endif
+    {
+	aout = (struct exec *) buf;
+
+	if (!(aout->fh.f_flags & COFF_F_EXEC)) {
+	    fprintf(stderr, "%s: %s is not in executable format\n",
+		    prog_name, inname);
+	    exit(1);
+	}
+
+	if (aout->fh.f_opthdr != sizeof(aout->ah)) {
+	    fprintf(stderr, "%s: %s has unexpected optional header size\n",
+		    prog_name, inname);
+	    exit(1);
+	}
+
+	if (N_MAGIC(*aout) != OMAGIC) {
+	    fprintf(stderr, "%s: %s is not an OMAGIC file\n",
+		    prog_name, inname);
+	    exit(1);
+	}
+	offset = N_TXTOFF(*aout);
+	fil_size = aout->ah.tsize + aout->ah.dsize;
+	mem_size = fil_size + aout->ah.bsize;
+
+	if (verbose) {
+	    fprintf(stderr, "%s: extracting %#016lx-%#016lx (at %lx)\n",
+		    prog_name, aout->ah.text_start,
+		    aout->ah.text_start + fil_size, offset);
+	}
+    }
+
+    if (lseek(fd, offset, SEEK_SET) != offset) {
+	perror("lseek");
+	exit(1);
+    }
+
+    if (verbose) {
+	fprintf(stderr, "%s: copying %lu byte from %s\n",
+		prog_name, (unsigned long) fil_size, inname);
+    }
+
+    tocopy = fil_size;
+    while (tocopy > 0) {
+	n = tocopy;
+	if (n > sizeof(buf)) {
+	    n = sizeof(buf);
+	}
+	tocopy -= n;
+	if ((size_t) read(fd, buf, n) != n) {
+	    perror("read");
+	    exit(1);
+	}
+	do {
+	    nwritten = write(ofd, buf, n);
+	    if ((ssize_t) nwritten == -1) {
+		perror("write");
+		exit(1);
+	    }
+	    n -= nwritten;
+	} while (n > 0);
+    }
+
+    if (pad) {
+	mem_size = ((mem_size + pad - 1) / pad) * pad;
+    }
+
+    tocopy = mem_size - fil_size;
+    if (tocopy > 0) {
+	fprintf(stderr,
+		"%s: zero-filling bss and aligning to %lu with %lu bytes\n",
+		prog_name, pad, (unsigned long) tocopy);
+
+	memset(buf, 0x00, sizeof(buf));
+	do {
+	    n = tocopy;
+	    if (n > sizeof(buf)) {
+		n = sizeof(buf);
+	    }
+	    nwritten = write(ofd, buf, n);
+	    if ((ssize_t) nwritten == -1) {
+		perror("write");
+		exit(1);
+	    }
+	    tocopy -= nwritten;
+	} while (tocopy > 0);
+    }
+    return 0;
+}
diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig
new file mode 100644
index 0000000000..7f1ca30b11
--- /dev/null
+++ b/arch/alpha/configs/defconfig
@@ -0,0 +1,73 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_VERBOSE_MCHECK=y
+CONFIG_SRM_ENV=m
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_VLAN_8021Q=m
+CONFIG_PNP=y
+CONFIG_ISAPNP=y
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=253
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+CONFIG_PATA_ALI=y
+CONFIG_PATA_CMD64X=y
+CONFIG_PATA_CYPRESS=y
+CONFIG_ATA_GENERIC=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_VORTEX=y
+CONFIG_NET_TULIP=y
+CONFIG_DE2104X=m
+CONFIG_TULIP=y
+CONFIG_TULIP_MMIO=y
+CONFIG_NET_PCI=y
+CONFIG_YELLOWFIN=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_EXT2_FS=y
+CONFIG_REISERFS_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO=y
+CONFIG_ALPHA_LEGACY_START_ADDRESS=y
+CONFIG_MATHEMU=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_DEVTMPFS=y
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
new file mode 100644
index 0000000000..42911c8340
--- /dev/null
+++ b/arch/alpha/include/asm/Kbuild
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+generated-y += syscall_table.h
+generic-y += export.h
+generic-y += kvm_para.h
+generic-y += mcs_spinlock.h
diff --git a/arch/alpha/include/asm/a.out.h b/arch/alpha/include/asm/a.out.h
new file mode 100644
index 0000000000..d2346b7caf
--- /dev/null
+++ b/arch/alpha/include/asm/a.out.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_A_OUT_H__
+#define __ALPHA_A_OUT_H__
+
+#include <uapi/asm/a.out.h>
+
+
+/* Assume that start addresses below 4G belong to a TASO application.
+   Unfortunately, there is no proper bit in the exec header to check.
+   Worse, we have to notice the start address before swapping to use
+   /sbin/loader, which of course is _not_ a TASO application.  */
+#define SET_AOUT_PERSONALITY(BFPM, EX) \
+	set_personality (((BFPM->taso || EX.ah.entry < 0x100000000L \
+			   ? ADDR_LIMIT_32BIT : 0) | PER_OSF4))
+
+#endif /* __A_OUT_GNU_H__ */
diff --git a/arch/alpha/include/asm/agp.h b/arch/alpha/include/asm/agp.h
new file mode 100644
index 0000000000..7874f063d0
--- /dev/null
+++ b/arch/alpha/include/asm/agp.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef AGP_H
+#define AGP_H 1
+
+#include <asm/io.h>
+
+/* dummy for now */
+
+#define map_page_into_agp(page)		do { } while (0)
+#define unmap_page_from_agp(page)	do { } while (0)
+#define flush_agp_cache() mb()
+
+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
+#define alloc_gatt_pages(order)		\
+	((char *)__get_free_pages(GFP_KERNEL, (order)))
+#define free_gatt_pages(table, order)	\
+	free_pages((unsigned long)(table), (order))
+
+#endif
diff --git a/arch/alpha/include/asm/agp_backend.h b/arch/alpha/include/asm/agp_backend.h
new file mode 100644
index 0000000000..2718802407
--- /dev/null
+++ b/arch/alpha/include/asm/agp_backend.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_AGP_BACKEND_H
+#define _ALPHA_AGP_BACKEND_H 1
+
+typedef	union _alpha_agp_mode {
+	struct {
+		u32 rate : 3;
+		u32 reserved0 : 1;
+		u32 fw : 1;
+		u32 fourgb : 1;
+		u32 reserved1 : 2;
+		u32 enable : 1;
+		u32 sba : 1;
+		u32 reserved2 : 14;
+		u32 rq : 8;
+	} bits;
+	u32 lw;
+} alpha_agp_mode;
+
+typedef struct _alpha_agp_info {
+	struct pci_controller *hose;
+	struct {
+		dma_addr_t bus_base;
+		unsigned long size;
+		void *sysdata;
+	} aperture;
+	alpha_agp_mode capability;
+	alpha_agp_mode mode;
+	void *private;
+	struct alpha_agp_ops *ops;
+} alpha_agp_info;
+
+struct alpha_agp_ops {
+	int (*setup)(alpha_agp_info *);
+	void (*cleanup)(alpha_agp_info *);
+	int (*configure)(alpha_agp_info *);
+	int (*bind)(alpha_agp_info *, off_t, struct agp_memory *);
+	int (*unbind)(alpha_agp_info *, off_t, struct agp_memory *);
+	unsigned long (*translate)(alpha_agp_info *, dma_addr_t);
+};
+
+
+#endif /* _ALPHA_AGP_BACKEND_H */
diff --git a/arch/alpha/include/asm/asm-offsets.h b/arch/alpha/include/asm/asm-offsets.h
new file mode 100644
index 0000000000..d370ee36a1
--- /dev/null
+++ b/arch/alpha/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/alpha/include/asm/asm-prototypes.h b/arch/alpha/include/asm/asm-prototypes.h
new file mode 100644
index 0000000000..c8ae46fc2e
--- /dev/null
+++ b/arch/alpha/include/asm/asm-prototypes.h
@@ -0,0 +1,19 @@
+#include <linux/spinlock.h>
+
+#include <asm/checksum.h>
+#include <asm/console.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <linux/uaccess.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+extern void __divl(void);
+extern void __reml(void);
+extern void __divq(void);
+extern void __remq(void);
+extern void __divlu(void);
+extern void __remlu(void);
+extern void __divqu(void);
+extern void __remqu(void);
+extern unsigned long __udiv_qrnnd(unsigned long *, unsigned long, unsigned long , unsigned long);
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
new file mode 100644
index 0000000000..f2861a43a6
--- /dev/null
+++ b/arch/alpha/include/asm/atomic.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_ATOMIC_H
+#define _ALPHA_ATOMIC_H
+
+#include <linux/types.h>
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc...
+ *
+ * But use these as seldom as possible since they are much slower
+ * than regular operations.
+ */
+
+/*
+ * To ensure dependency ordering is preserved for the _relaxed and
+ * _release atomics, an smp_mb() is unconditionally inserted into the
+ * _relaxed variants, which are used to build the barriered versions.
+ * Avoid redundant back-to-back fences in the _acquire and _fence
+ * versions.
+ */
+#define __atomic_acquire_fence()
+#define __atomic_post_full_fence()
+
+#define ATOMIC64_INIT(i)	{ (i) }
+
+#define arch_atomic_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic64_read(v)	READ_ONCE((v)->counter)
+
+#define arch_atomic_set(v,i)	WRITE_ONCE((v)->counter, (i))
+#define arch_atomic64_set(v,i)	WRITE_ONCE((v)->counter, (i))
+
+/*
+ * To get proper branch prediction for the main line, we must branch
+ * forward to code at the end of this object's .text section, then
+ * branch back to restart the operation.
+ */
+
+#define ATOMIC_OP(op, asm_op)						\
+static __inline__ void arch_atomic_##op(int i, atomic_t * v)		\
+{									\
+	unsigned long temp;						\
+	__asm__ __volatile__(						\
+	"1:	ldl_l %0,%1\n"						\
+	"	" #asm_op " %0,%2,%0\n"					\
+	"	stl_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter)				\
+	:"Ir" (i), "m" (v->counter));					\
+}									\
+
+#define ATOMIC_OP_RETURN(op, asm_op)					\
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+{									\
+	long temp, result;						\
+	__asm__ __volatile__(						\
+	"1:	ldl_l %0,%1\n"						\
+	"	" #asm_op " %0,%3,%2\n"					\
+	"	" #asm_op " %0,%3,%0\n"					\
+	"	stl_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
+	:"Ir" (i), "m" (v->counter) : "memory");			\
+	smp_mb();							\
+	return result;							\
+}
+
+#define ATOMIC_FETCH_OP(op, asm_op)					\
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	long temp, result;						\
+	__asm__ __volatile__(						\
+	"1:	ldl_l %2,%1\n"						\
+	"	" #asm_op " %2,%3,%0\n"					\
+	"	stl_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
+	:"Ir" (i), "m" (v->counter) : "memory");			\
+	smp_mb();							\
+	return result;							\
+}
+
+#define ATOMIC64_OP(op, asm_op)						\
+static __inline__ void arch_atomic64_##op(s64 i, atomic64_t * v)	\
+{									\
+	s64 temp;							\
+	__asm__ __volatile__(						\
+	"1:	ldq_l %0,%1\n"						\
+	"	" #asm_op " %0,%2,%0\n"					\
+	"	stq_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter)				\
+	:"Ir" (i), "m" (v->counter));					\
+}									\
+
+#define ATOMIC64_OP_RETURN(op, asm_op)					\
+static __inline__ s64							\
+arch_atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)		\
+{									\
+	s64 temp, result;						\
+	__asm__ __volatile__(						\
+	"1:	ldq_l %0,%1\n"						\
+	"	" #asm_op " %0,%3,%2\n"					\
+	"	" #asm_op " %0,%3,%0\n"					\
+	"	stq_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
+	:"Ir" (i), "m" (v->counter) : "memory");			\
+	smp_mb();							\
+	return result;							\
+}
+
+#define ATOMIC64_FETCH_OP(op, asm_op)					\
+static __inline__ s64							\
+arch_atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)		\
+{									\
+	s64 temp, result;						\
+	__asm__ __volatile__(						\
+	"1:	ldq_l %2,%1\n"						\
+	"	" #asm_op " %2,%3,%0\n"					\
+	"	stq_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
+	:"Ir" (i), "m" (v->counter) : "memory");			\
+	smp_mb();							\
+	return result;							\
+}
+
+#define ATOMIC_OPS(op)							\
+	ATOMIC_OP(op, op##l)						\
+	ATOMIC_OP_RETURN(op, op##l)					\
+	ATOMIC_FETCH_OP(op, op##l)					\
+	ATOMIC64_OP(op, op##q)						\
+	ATOMIC64_OP_RETURN(op, op##q)					\
+	ATOMIC64_FETCH_OP(op, op##q)
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(sub)
+
+#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
+
+#define arch_atomic_andnot			arch_atomic_andnot
+#define arch_atomic64_andnot			arch_atomic64_andnot
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm)						\
+	ATOMIC_OP(op, asm)						\
+	ATOMIC_FETCH_OP(op, asm)					\
+	ATOMIC64_OP(op, asm)						\
+	ATOMIC64_FETCH_OP(op, asm)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, bis)
+ATOMIC_OPS(xor, xor)
+
+#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed
+
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
+
+#undef ATOMIC_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#define arch_atomic64_cmpxchg(v, old, new) \
+	(arch_cmpxchg(&((v)->counter), old, new))
+#define arch_atomic64_xchg(v, new) \
+	(arch_xchg(&((v)->counter), new))
+
+#define arch_atomic_cmpxchg(v, old, new) \
+	(arch_cmpxchg(&((v)->counter), old, new))
+#define arch_atomic_xchg(v, new) \
+	(arch_xchg(&((v)->counter), new))
+
+/**
+ * arch_atomic_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns the old value of @v.
+ */
+static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	int c, new, old;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldl_l	%[old],%[mem]\n"
+	"	cmpeq	%[old],%[u],%[c]\n"
+	"	addl	%[old],%[a],%[new]\n"
+	"	bne	%[c],2f\n"
+	"	stl_c	%[new],%[mem]\n"
+	"	beq	%[new],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u)
+	: "memory");
+	smp_mb();
+	return old;
+}
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+
+/**
+ * arch_atomic64_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns the old value of @v.
+ */
+static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	s64 c, new, old;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[old],%[mem]\n"
+	"	cmpeq	%[old],%[u],%[c]\n"
+	"	addq	%[old],%[a],%[new]\n"
+	"	bne	%[c],2f\n"
+	"	stq_c	%[new],%[mem]\n"
+	"	beq	%[new],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
+	: "memory");
+	smp_mb();
+	return old;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+
+/*
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+	s64 old, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[old],%[mem]\n"
+	"	subq	%[old],1,%[tmp]\n"
+	"	ble	%[old],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [tmp] "=&r"(tmp)
+	: [mem] "m"(*v)
+	: "memory");
+	smp_mb();
+	return old - 1;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+#endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h
new file mode 100644
index 0000000000..c56bfffc99
--- /dev/null
+++ b/arch/alpha/include/asm/barrier.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BARRIER_H
+#define __BARRIER_H
+
+#define mb()	__asm__ __volatile__("mb": : :"memory")
+#define rmb()	__asm__ __volatile__("mb": : :"memory")
+#define wmb()	__asm__ __volatile__("wmb": : :"memory")
+
+#define __smp_load_acquire(p)						\
+({									\
+	compiletime_assert_atomic_type(*p);				\
+	__READ_ONCE(*p);						\
+})
+
+#ifdef CONFIG_SMP
+#define __ASM_SMP_MB	"\tmb\n"
+#else
+#define __ASM_SMP_MB
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif		/* __BARRIER_H */
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
new file mode 100644
index 0000000000..e1d8483a45
--- /dev/null
+++ b/arch/alpha/include/asm/bitops.h
@@ -0,0 +1,459 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_BITOPS_H
+#define _ALPHA_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <asm/compiler.h>
+#include <asm/barrier.h>
+
+/*
+ * Copyright 1994, Linus Torvalds.
+ */
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * To get proper branch prediction for the main line, we must branch
+ * forward to code at the end of this object's .text section, then
+ * branch back to restart the operation.
+ *
+ * bit 0 is the LSB of addr; bit 64 is the LSB of (addr+1).
+ */
+
+static inline void
+set_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long temp;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+	"1:	ldl_l %0,%3\n"
+	"	bis %0,%2,%0\n"
+	"	stl_c %0,%1\n"
+	"	beq %0,2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (*m)
+	:"Ir" (1UL << (nr & 31)), "m" (*m));
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline void
+__set_bit(unsigned long nr, volatile void * addr)
+{
+	int *m = ((int *) addr) + (nr >> 5);
+
+	*m |= 1 << (nr & 31);
+}
+
+static inline void
+clear_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long temp;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+	"1:	ldl_l %0,%3\n"
+	"	bic %0,%2,%0\n"
+	"	stl_c %0,%1\n"
+	"	beq %0,2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (*m)
+	:"Ir" (1UL << (nr & 31)), "m" (*m));
+}
+
+static inline void
+clear_bit_unlock(unsigned long nr, volatile void * addr)
+{
+	smp_mb();
+	clear_bit(nr, addr);
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static __inline__ void
+__clear_bit(unsigned long nr, volatile void * addr)
+{
+	int *m = ((int *) addr) + (nr >> 5);
+
+	*m &= ~(1 << (nr & 31));
+}
+
+static inline void
+__clear_bit_unlock(unsigned long nr, volatile void * addr)
+{
+	smp_mb();
+	__clear_bit(nr, addr);
+}
+
+static inline void
+change_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long temp;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+	"1:	ldl_l %0,%3\n"
+	"	xor %0,%2,%0\n"
+	"	stl_c %0,%1\n"
+	"	beq %0,2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (*m)
+	:"Ir" (1UL << (nr & 31)), "m" (*m));
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static __inline__ void
+__change_bit(unsigned long nr, volatile void * addr)
+{
+	int *m = ((int *) addr) + (nr >> 5);
+
+	*m ^= 1 << (nr & 31);
+}
+
+static inline int
+test_and_set_bit(unsigned long nr, volatile void *addr)
+{
+	unsigned long oldbit;
+	unsigned long temp;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_SMP
+	"	mb\n"
+#endif
+	"1:	ldl_l %0,%4\n"
+	"	and %0,%3,%2\n"
+	"	bne %2,2f\n"
+	"	xor %0,%3,%0\n"
+	"	stl_c %0,%1\n"
+	"	beq %0,3f\n"
+	"2:\n"
+#ifdef CONFIG_SMP
+	"	mb\n"
+#endif
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (*m), "=&r" (oldbit)
+	:"Ir" (1UL << (nr & 31)), "m" (*m) : "memory");
+
+	return oldbit != 0;
+}
+
+static inline int
+test_and_set_bit_lock(unsigned long nr, volatile void *addr)
+{
+	unsigned long oldbit;
+	unsigned long temp;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+	"1:	ldl_l %0,%4\n"
+	"	and %0,%3,%2\n"
+	"	bne %2,2f\n"
+	"	xor %0,%3,%0\n"
+	"	stl_c %0,%1\n"
+	"	beq %0,3f\n"
+	"2:\n"
+#ifdef CONFIG_SMP
+	"	mb\n"
+#endif
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (*m), "=&r" (oldbit)
+	:"Ir" (1UL << (nr & 31)), "m" (*m) : "memory");
+
+	return oldbit != 0;
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline int
+__test_and_set_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long mask = 1 << (nr & 0x1f);
+	int *m = ((int *) addr) + (nr >> 5);
+	int old = *m;
+
+	*m = old | mask;
+	return (old & mask) != 0;
+}
+
+static inline int
+test_and_clear_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long oldbit;
+	unsigned long temp;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_SMP
+	"	mb\n"
+#endif
+	"1:	ldl_l %0,%4\n"
+	"	and %0,%3,%2\n"
+	"	beq %2,2f\n"
+	"	xor %0,%3,%0\n"
+	"	stl_c %0,%1\n"
+	"	beq %0,3f\n"
+	"2:\n"
+#ifdef CONFIG_SMP
+	"	mb\n"
+#endif
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (*m), "=&r" (oldbit)
+	:"Ir" (1UL << (nr & 31)), "m" (*m) : "memory");
+
+	return oldbit != 0;
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static inline int
+__test_and_clear_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long mask = 1 << (nr & 0x1f);
+	int *m = ((int *) addr) + (nr >> 5);
+	int old = *m;
+
+	*m = old & ~mask;
+	return (old & mask) != 0;
+}
+
+static inline int
+test_and_change_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long oldbit;
+	unsigned long temp;
+	int *m = ((int *) addr) + (nr >> 5);
+
+	__asm__ __volatile__(
+#ifdef CONFIG_SMP
+	"	mb\n"
+#endif
+	"1:	ldl_l %0,%4\n"
+	"	and %0,%3,%2\n"
+	"	xor %0,%3,%0\n"
+	"	stl_c %0,%1\n"
+	"	beq %0,3f\n"
+#ifdef CONFIG_SMP
+	"	mb\n"
+#endif
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (*m), "=&r" (oldbit)
+	:"Ir" (1UL << (nr & 31)), "m" (*m) : "memory");
+
+	return oldbit != 0;
+}
+
+/*
+ * WARNING: non atomic version.
+ */
+static __inline__ int
+__test_and_change_bit(unsigned long nr, volatile void * addr)
+{
+	unsigned long mask = 1 << (nr & 0x1f);
+	int *m = ((int *) addr) + (nr >> 5);
+	int old = *m;
+
+	*m = old ^ mask;
+	return (old & mask) != 0;
+}
+
+static inline int
+test_bit(int nr, const volatile void * addr)
+{
+	return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL;
+}
+
+/*
+ * ffz = Find First Zero in word. Undefined if no zero exists,
+ * so code should check against ~0UL first..
+ *
+ * Do a binary search on the bits.  Due to the nature of large
+ * constants on the alpha, it is worthwhile to split the search.
+ */
+static inline unsigned long ffz_b(unsigned long x)
+{
+	unsigned long sum, x1, x2, x4;
+
+	x = ~x & -~x;		/* set first 0 bit, clear others */
+	x1 = x & 0xAA;
+	x2 = x & 0xCC;
+	x4 = x & 0xF0;
+	sum = x2 ? 2 : 0;
+	sum += (x4 != 0) * 4;
+	sum += (x1 != 0);
+
+	return sum;
+}
+
+static inline unsigned long ffz(unsigned long word)
+{
+#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
+	/* Whee.  EV67 can calculate it directly.  */
+	return __kernel_cttz(~word);
+#else
+	unsigned long bits, qofs, bofs;
+
+	bits = __kernel_cmpbge(word, ~0UL);
+	qofs = ffz_b(bits);
+	bits = __kernel_extbl(word, qofs);
+	bofs = ffz_b(bits);
+
+	return qofs*8 + bofs;
+#endif
+}
+
+/*
+ * __ffs = Find First set bit in word.  Undefined if no set bit exists.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
+	/* Whee.  EV67 can calculate it directly.  */
+	return __kernel_cttz(word);
+#else
+	unsigned long bits, qofs, bofs;
+
+	bits = __kernel_cmpbge(0, word);
+	qofs = ffz_b(bits);
+	bits = __kernel_extbl(word, qofs);
+	bofs = ffz_b(~bits);
+
+	return qofs*8 + bofs;
+#endif
+}
+
+#ifdef __KERNEL__
+
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above __ffs.
+ */
+
+static inline int ffs(int word)
+{
+	int result = __ffs(word) + 1;
+	return word ? result : 0;
+}
+
+/*
+ * fls: find last bit set.
+ */
+#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
+static inline int fls64(unsigned long word)
+{
+	return 64 - __kernel_ctlz(word);
+}
+#else
+extern const unsigned char __flsm1_tab[256];
+
+static inline int fls64(unsigned long x)
+{
+	unsigned long t, a, r;
+
+	t = __kernel_cmpbge (x, 0x0101010101010101UL);
+	a = __flsm1_tab[t];
+	t = __kernel_extbl (x, a);
+	r = a*8 + __flsm1_tab[t] + (x != 0);
+
+	return r;
+}
+#endif
+
+static inline unsigned long __fls(unsigned long x)
+{
+	return fls64(x) - 1;
+}
+
+static inline int fls(unsigned int x)
+{
+	return fls64(x);
+}
+
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+
+#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
+/* Whee.  EV67 can calculate it directly.  */
+static inline unsigned long __arch_hweight64(unsigned long w)
+{
+	return __kernel_ctpop(w);
+}
+
+static inline unsigned int __arch_hweight32(unsigned int w)
+{
+	return __arch_hweight64(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+	return __arch_hweight64(w & 0xffff);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+	return __arch_hweight64(w & 0xff);
+}
+#else
+#include <asm-generic/bitops/arch_hweight.h>
+#endif
+
+#include <asm-generic/bitops/const_hweight.h>
+
+#endif /* __KERNEL__ */
+
+#ifdef __KERNEL__
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 100-bit bitmap.  It's guaranteed that at least
+ * one of the 100 bits is cleared.
+ */
+static inline unsigned long
+sched_find_first_bit(const unsigned long b[2])
+{
+	unsigned long b0, b1, ofs, tmp;
+
+	b0 = b[0];
+	b1 = b[1];
+	ofs = (b0 ? 0 : 64);
+	tmp = (b0 ? b0 : b1);
+
+	return __ffs(tmp) + ofs;
+}
+
+#include <asm-generic/bitops/le.h>
+
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* _ALPHA_BITOPS_H */
diff --git a/arch/alpha/include/asm/bug.h b/arch/alpha/include/asm/bug.h
new file mode 100644
index 0000000000..b88ebdfedf
--- /dev/null
+++ b/arch/alpha/include/asm/bug.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_BUG_H
+#define _ALPHA_BUG_H
+
+#include <linux/linkage.h>
+
+#ifdef CONFIG_BUG
+#include <asm/pal.h>
+
+/* ??? Would be nice to use .gprel32 here, but we can't be sure that the
+   function loaded the GP, so this could fail in modules.  */
+#define BUG()	do {							\
+	__asm__ __volatile__(						\
+		"call_pal %0  # bugchk\n\t"				\
+		".long %1\n\t.8byte %2"					\
+		: : "i"(PAL_bugchk), "i"(__LINE__), "i"(__FILE__));	\
+	unreachable();							\
+  } while (0)
+
+#define HAVE_ARCH_BUG
+#endif
+
+#include <asm-generic/bug.h>
+
+#endif
diff --git a/arch/alpha/include/asm/bugs.h b/arch/alpha/include/asm/bugs.h
new file mode 100644
index 0000000000..78030d1c7e
--- /dev/null
+++ b/arch/alpha/include/asm/bugs.h
@@ -0,0 +1,20 @@
+/*
+ *  include/asm-alpha/bugs.h
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ */
+
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *	void check_bugs(void);
+ */
+
+/*
+ * I don't know of any alpha bugs yet.. Nice chip
+ */
+
+static void check_bugs(void)
+{
+}
diff --git a/arch/alpha/include/asm/cache.h b/arch/alpha/include/asm/cache.h
new file mode 100644
index 0000000000..6ce508c689
--- /dev/null
+++ b/arch/alpha/include/asm/cache.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm-alpha/cache.h
+ */
+#ifndef __ARCH_ALPHA_CACHE_H
+#define __ARCH_ALPHA_CACHE_H
+
+
+/* Bytes per L1 (data) cache line. */
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EV6)
+# define L1_CACHE_BYTES     64
+# define L1_CACHE_SHIFT     6
+#else
+/* Both EV4 and EV5 are write-through, read-allocate,
+   direct-mapped, physical.
+*/
+# define L1_CACHE_BYTES     32
+# define L1_CACHE_SHIFT     5
+#endif
+
+#define SMP_CACHE_BYTES    L1_CACHE_BYTES
+
+#endif
diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h
new file mode 100644
index 0000000000..9945ff483e
--- /dev/null
+++ b/arch/alpha/include/asm/cacheflush.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_CACHEFLUSH_H
+#define _ALPHA_CACHEFLUSH_H
+
+#include <linux/mm.h>
+
+/* Note that the following two definitions are _highly_ dependent
+   on the contexts in which they are used in the kernel.  I personally
+   think it is criminal how loosely defined these macros are.  */
+
+/* We need to flush the kernel's icache after loading modules.  The
+   only other use of this macro is in load_aout_interp which is not
+   used on Alpha. 
+
+   Note that this definition should *not* be used for userspace
+   icache flushing.  While functional, it is _way_ overkill.  The
+   icache is tagged with ASNs and it suffices to allocate a new ASN
+   for the process.  */
+#ifndef CONFIG_SMP
+#define flush_icache_range(start, end)		imb()
+#else
+#define flush_icache_range(start, end)		smp_imb()
+extern void smp_imb(void);
+#endif
+
+/* We need to flush the userspace icache after setting breakpoints in
+   ptrace.
+
+   Instead of indiscriminately using imb, take advantage of the fact
+   that icache entries are tagged with the ASN and load a new mm context.  */
+/* ??? Ought to use this in arch/alpha/kernel/signal.c too.  */
+
+#ifndef CONFIG_SMP
+#include <linux/sched.h>
+
+extern void __load_new_mm_context(struct mm_struct *);
+static inline void
+flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+			unsigned long addr, int len)
+{
+	if (vma->vm_flags & VM_EXEC) {
+		struct mm_struct *mm = vma->vm_mm;
+		if (current->active_mm == mm)
+			__load_new_mm_context(mm);
+		else
+			mm->context[smp_processor_id()] = 0;
+	}
+}
+#define flush_icache_user_page flush_icache_user_page
+#else /* CONFIG_SMP */
+extern void flush_icache_user_page(struct vm_area_struct *vma,
+		struct page *page, unsigned long addr, int len);
+#define flush_icache_user_page flush_icache_user_page
+#endif /* CONFIG_SMP */
+
+/* This is used only in __do_fault and do_swap_page.  */
+#define flush_icache_page(vma, page) \
+	flush_icache_user_page((vma), (page), 0, 0)
+
+#include <asm-generic/cacheflush.h>
+
+#endif /* _ALPHA_CACHEFLUSH_H */
diff --git a/arch/alpha/include/asm/checksum.h b/arch/alpha/include/asm/checksum.h
new file mode 100644
index 0000000000..99d631e146
--- /dev/null
+++ b/arch/alpha/include/asm/checksum.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_CHECKSUM_H
+#define _ALPHA_CHECKSUM_H
+
+#include <linux/in6.h>
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+__sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+			  __u32 len, __u8 proto, __wsum sum);
+
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+			  __u32 len, __u8 proto, __wsum sum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+#define _HAVE_ARCH_CSUM_AND_COPY
+__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
+
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
+
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+/*
+ *	Fold a partial checksum without adding pseudo headers
+ */
+
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __sum16)~sum;
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			       const struct in6_addr *daddr,
+			       __u32 len, __u8 proto, __wsum sum);
+#endif
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
new file mode 100644
index 0000000000..6e0a850aa9
--- /dev/null
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_CMPXCHG_H
+#define _ALPHA_CMPXCHG_H
+
+/*
+ * Atomic exchange routines.
+ */
+
+#define ____xchg(type, args...)		__xchg ## type ## _local(args)
+#define ____cmpxchg(type, args...)	__cmpxchg ## type ## _local(args)
+#include <asm/xchg.h>
+
+#define xchg_local(ptr, x)						\
+({									\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	(__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_,	\
+				       sizeof(*(ptr)));			\
+})
+
+#define arch_cmpxchg_local(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_,	\
+					  (unsigned long)_n_,		\
+					  sizeof(*(ptr)));		\
+})
+
+#define arch_cmpxchg64_local(ptr, o, n)					\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg_local((ptr), (o), (n));					\
+})
+
+#undef ____xchg
+#undef ____cmpxchg
+#define ____xchg(type, args...)		__xchg ##type(args)
+#define ____cmpxchg(type, args...)	__cmpxchg ##type(args)
+#include <asm/xchg.h>
+
+/*
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ */
+#define arch_xchg(ptr, x)						\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	smp_mb();							\
+	__ret = (__typeof__(*(ptr)))					\
+		__xchg((ptr), (unsigned long)_x_, sizeof(*(ptr)));	\
+	smp_mb();							\
+	__ret;								\
+})
+
+#define arch_cmpxchg(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	smp_mb();							\
+	__ret = (__typeof__(*(ptr))) __cmpxchg((ptr),			\
+		(unsigned long)_o_, (unsigned long)_n_, sizeof(*(ptr)));\
+	smp_mb();							\
+	__ret;								\
+})
+
+#define arch_cmpxchg64(ptr, o, n)					\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	arch_cmpxchg((ptr), (o), (n));					\
+})
+
+#undef ____cmpxchg
+
+#endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h
new file mode 100644
index 0000000000..ae64595901
--- /dev/null
+++ b/arch/alpha/include/asm/compiler.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_COMPILER_H
+#define __ALPHA_COMPILER_H
+
+#include <uapi/asm/compiler.h>
+
+#endif /* __ALPHA_COMPILER_H */
diff --git a/arch/alpha/include/asm/console.h b/arch/alpha/include/asm/console.h
new file mode 100644
index 0000000000..088b7b9eb1
--- /dev/null
+++ b/arch/alpha/include/asm/console.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __AXP_CONSOLE_H
+#define __AXP_CONSOLE_H
+
+#include <uapi/asm/console.h>
+
+#ifndef __ASSEMBLY__
+extern long callback_puts(long unit, const char *s, long length);
+extern long callback_getc(long unit);
+extern long callback_open_console(void);
+extern long callback_close_console(void);
+extern long callback_open(const char *device, long length);
+extern long callback_close(long unit);
+extern long callback_read(long channel, long count, const char *buf, long lbn);
+extern long callback_getenv(long id, const char *buf, unsigned long buf_size);
+extern long callback_setenv(long id, const char *buf, unsigned long buf_size);
+extern long callback_save_env(void);
+
+extern int srm_fixup(unsigned long new_callback_addr,
+		     unsigned long new_hwrpb_addr);
+extern long srm_puts(const char *, long);
+extern long srm_printk(const char *, ...)
+	__attribute__ ((format (printf, 1, 2)));
+
+struct crb_struct;
+struct hwrpb_struct;
+extern int callback_init_done;
+extern void * callback_init(void *);
+#endif /* __ASSEMBLY__ */
+#endif /* __AXP_CONSOLE_H */
diff --git a/arch/alpha/include/asm/core_apecs.h b/arch/alpha/include/asm/core_apecs.h
new file mode 100644
index 0000000000..2d9726fc02
--- /dev/null
+++ b/arch/alpha/include/asm/core_apecs.h
@@ -0,0 +1,518 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_APECS__H__
+#define __ALPHA_APECS__H__
+
+#include <linux/types.h>
+#include <asm/compiler.h>
+
+/*
+ * APECS is the internal name for the 2107x chipset which provides
+ * memory controller and PCI access for the 21064 chip based systems.
+ *
+ * This file is based on:
+ *
+ * DECchip 21071-AA and DECchip 21072-AA Core Logic Chipsets
+ * Data Sheet
+ *
+ * EC-N0648-72
+ *
+ *
+ * david.rusling@reo.mts.dec.com Initial Version.
+ *
+ */
+
+/*
+   An AVANTI *might* be an XL, and an XL has only 27 bits of ISA address
+   that get passed through the PCI<->ISA bridge chip. So we've gotta use
+   both windows to max out the physical memory we can DMA to. Sigh...
+
+   If we try a window at 0 for 1GB as a work-around, we run into conflicts
+   with ISA/PCI bus memory which can't be relocated, like VGA aperture and
+   BIOS ROMs. So we must put the windows high enough to avoid these areas.
+
+   We put window 1 at BUS 64Mb for 64Mb, mapping physical 0 to 64Mb-1,
+   and window 2 at BUS 1Gb for 1Gb, mapping physical 0 to 1Gb-1.
+   Yes, this does map 0 to 64Mb-1 twice, but only window 1 will actually
+   be used for that range (via virt_to_bus()).
+
+   Note that we actually fudge the window 1 maximum as 48Mb instead of 64Mb,
+   to keep virt_to_bus() from returning an address in the first window, for
+   a data area that goes beyond the 64Mb first DMA window.  Sigh...
+   The fudge factor MUST match with <asm/dma.h> MAX_DMA_ADDRESS, but
+   we can't just use that here, because of header file looping... :-(
+
+   Window 1 will be used for all DMA from the ISA bus; yes, that does
+   limit what memory an ISA floppy or sound card or Ethernet can touch, but
+   it's also a known limitation on other platforms as well. We use the
+   same technique that is used on INTEL platforms with similar limitation:
+   set MAX_DMA_ADDRESS and clear some pages' DMAable flags during mem_init().
+   We trust that any ISA bus device drivers will *always* ask for DMAable
+   memory explicitly via kmalloc()/get_free_pages() flags arguments.
+
+   Note that most PCI bus devices' drivers do *not* explicitly ask for
+   DMAable memory; they count on being able to DMA to any memory they
+   get from kmalloc()/get_free_pages(). They will also use window 1 for
+   any physical memory accesses below 64Mb; the rest will be handled by
+   window 2, maxing out at 1Gb of memory. I trust this is enough... :-)
+
+   We hope that the area before the first window is large enough so that
+   there will be no overlap at the top end (64Mb). We *must* locate the
+   PCI cards' memory just below window 1, so that there's still the
+   possibility of being able to access it via SPARSE space. This is
+   important for cards such as the Matrox Millennium, whose Xserver
+   wants to access memory-mapped registers in byte and short lengths.
+
+   Note that the XL is treated differently from the AVANTI, even though
+   for most other things they are identical. It didn't seem reasonable to
+   make the AVANTI support pay for the limitations of the XL. It is true,
+   however, that an XL kernel will run on an AVANTI without problems.
+
+   %%% All of this should be obviated by the ability to route
+   everything through the iommu.
+*/
+
+/*
+ * 21071-DA Control and Status registers.
+ * These are used for PCI memory access.
+ */
+#define APECS_IOC_DCSR                  (IDENT_ADDR + 0x1A0000000UL)
+#define APECS_IOC_PEAR                  (IDENT_ADDR + 0x1A0000020UL)
+#define APECS_IOC_SEAR                  (IDENT_ADDR + 0x1A0000040UL)
+#define APECS_IOC_DR1                   (IDENT_ADDR + 0x1A0000060UL)
+#define APECS_IOC_DR2                   (IDENT_ADDR + 0x1A0000080UL)
+#define APECS_IOC_DR3                   (IDENT_ADDR + 0x1A00000A0UL)
+
+#define APECS_IOC_TB1R                  (IDENT_ADDR + 0x1A00000C0UL)
+#define APECS_IOC_TB2R                  (IDENT_ADDR + 0x1A00000E0UL)
+
+#define APECS_IOC_PB1R                  (IDENT_ADDR + 0x1A0000100UL)
+#define APECS_IOC_PB2R                  (IDENT_ADDR + 0x1A0000120UL)
+
+#define APECS_IOC_PM1R                  (IDENT_ADDR + 0x1A0000140UL)
+#define APECS_IOC_PM2R                  (IDENT_ADDR + 0x1A0000160UL)
+
+#define APECS_IOC_HAXR0                 (IDENT_ADDR + 0x1A0000180UL)
+#define APECS_IOC_HAXR1                 (IDENT_ADDR + 0x1A00001A0UL)
+#define APECS_IOC_HAXR2                 (IDENT_ADDR + 0x1A00001C0UL)
+
+#define APECS_IOC_PMLT                  (IDENT_ADDR + 0x1A00001E0UL)
+
+#define APECS_IOC_TLBTAG0               (IDENT_ADDR + 0x1A0000200UL)
+#define APECS_IOC_TLBTAG1               (IDENT_ADDR + 0x1A0000220UL)
+#define APECS_IOC_TLBTAG2               (IDENT_ADDR + 0x1A0000240UL)
+#define APECS_IOC_TLBTAG3               (IDENT_ADDR + 0x1A0000260UL)
+#define APECS_IOC_TLBTAG4               (IDENT_ADDR + 0x1A0000280UL)
+#define APECS_IOC_TLBTAG5               (IDENT_ADDR + 0x1A00002A0UL)
+#define APECS_IOC_TLBTAG6               (IDENT_ADDR + 0x1A00002C0UL)
+#define APECS_IOC_TLBTAG7               (IDENT_ADDR + 0x1A00002E0UL)
+
+#define APECS_IOC_TLBDATA0              (IDENT_ADDR + 0x1A0000300UL)
+#define APECS_IOC_TLBDATA1              (IDENT_ADDR + 0x1A0000320UL)
+#define APECS_IOC_TLBDATA2              (IDENT_ADDR + 0x1A0000340UL)
+#define APECS_IOC_TLBDATA3              (IDENT_ADDR + 0x1A0000360UL)
+#define APECS_IOC_TLBDATA4              (IDENT_ADDR + 0x1A0000380UL)
+#define APECS_IOC_TLBDATA5              (IDENT_ADDR + 0x1A00003A0UL)
+#define APECS_IOC_TLBDATA6              (IDENT_ADDR + 0x1A00003C0UL)
+#define APECS_IOC_TLBDATA7              (IDENT_ADDR + 0x1A00003E0UL)
+
+#define APECS_IOC_TBIA                  (IDENT_ADDR + 0x1A0000400UL)
+
+
+/*
+ * 21071-CA Control and Status registers.
+ * These are used to program memory timing,
+ *  configure memory and initialise the B-Cache.
+ */
+#define APECS_MEM_GCR		        (IDENT_ADDR + 0x180000000UL)
+#define APECS_MEM_EDSR		        (IDENT_ADDR + 0x180000040UL)
+#define APECS_MEM_TAR  		        (IDENT_ADDR + 0x180000060UL)
+#define APECS_MEM_ELAR		        (IDENT_ADDR + 0x180000080UL)
+#define APECS_MEM_EHAR  		(IDENT_ADDR + 0x1800000a0UL)
+#define APECS_MEM_SFT_RST		(IDENT_ADDR + 0x1800000c0UL)
+#define APECS_MEM_LDxLAR 		(IDENT_ADDR + 0x1800000e0UL)
+#define APECS_MEM_LDxHAR 		(IDENT_ADDR + 0x180000100UL)
+#define APECS_MEM_GTR    		(IDENT_ADDR + 0x180000200UL)
+#define APECS_MEM_RTR    		(IDENT_ADDR + 0x180000220UL)
+#define APECS_MEM_VFPR   		(IDENT_ADDR + 0x180000240UL)
+#define APECS_MEM_PDLDR  		(IDENT_ADDR + 0x180000260UL)
+#define APECS_MEM_PDhDR  		(IDENT_ADDR + 0x180000280UL)
+
+/* Bank x Base Address Register */
+#define APECS_MEM_B0BAR  		(IDENT_ADDR + 0x180000800UL)
+#define APECS_MEM_B1BAR  		(IDENT_ADDR + 0x180000820UL)
+#define APECS_MEM_B2BAR  		(IDENT_ADDR + 0x180000840UL)
+#define APECS_MEM_B3BAR  		(IDENT_ADDR + 0x180000860UL)
+#define APECS_MEM_B4BAR  		(IDENT_ADDR + 0x180000880UL)
+#define APECS_MEM_B5BAR  		(IDENT_ADDR + 0x1800008A0UL)
+#define APECS_MEM_B6BAR  		(IDENT_ADDR + 0x1800008C0UL)
+#define APECS_MEM_B7BAR  		(IDENT_ADDR + 0x1800008E0UL)
+#define APECS_MEM_B8BAR  		(IDENT_ADDR + 0x180000900UL)
+
+/* Bank x Configuration Register */
+#define APECS_MEM_B0BCR  		(IDENT_ADDR + 0x180000A00UL)
+#define APECS_MEM_B1BCR  		(IDENT_ADDR + 0x180000A20UL)
+#define APECS_MEM_B2BCR  		(IDENT_ADDR + 0x180000A40UL)
+#define APECS_MEM_B3BCR  		(IDENT_ADDR + 0x180000A60UL)
+#define APECS_MEM_B4BCR  		(IDENT_ADDR + 0x180000A80UL)
+#define APECS_MEM_B5BCR  		(IDENT_ADDR + 0x180000AA0UL)
+#define APECS_MEM_B6BCR  		(IDENT_ADDR + 0x180000AC0UL)
+#define APECS_MEM_B7BCR  		(IDENT_ADDR + 0x180000AE0UL)
+#define APECS_MEM_B8BCR  		(IDENT_ADDR + 0x180000B00UL)
+
+/* Bank x Timing Register A */
+#define APECS_MEM_B0TRA  		(IDENT_ADDR + 0x180000C00UL)
+#define APECS_MEM_B1TRA  		(IDENT_ADDR + 0x180000C20UL)
+#define APECS_MEM_B2TRA  		(IDENT_ADDR + 0x180000C40UL)
+#define APECS_MEM_B3TRA  		(IDENT_ADDR + 0x180000C60UL)
+#define APECS_MEM_B4TRA  		(IDENT_ADDR + 0x180000C80UL)
+#define APECS_MEM_B5TRA  		(IDENT_ADDR + 0x180000CA0UL)
+#define APECS_MEM_B6TRA  		(IDENT_ADDR + 0x180000CC0UL)
+#define APECS_MEM_B7TRA  		(IDENT_ADDR + 0x180000CE0UL)
+#define APECS_MEM_B8TRA  		(IDENT_ADDR + 0x180000D00UL)
+
+/* Bank x Timing Register B */
+#define APECS_MEM_B0TRB                 (IDENT_ADDR + 0x180000E00UL)
+#define APECS_MEM_B1TRB  		(IDENT_ADDR + 0x180000E20UL)
+#define APECS_MEM_B2TRB  		(IDENT_ADDR + 0x180000E40UL)
+#define APECS_MEM_B3TRB  		(IDENT_ADDR + 0x180000E60UL)
+#define APECS_MEM_B4TRB  		(IDENT_ADDR + 0x180000E80UL)
+#define APECS_MEM_B5TRB  		(IDENT_ADDR + 0x180000EA0UL)
+#define APECS_MEM_B6TRB  		(IDENT_ADDR + 0x180000EC0UL)
+#define APECS_MEM_B7TRB  		(IDENT_ADDR + 0x180000EE0UL)
+#define APECS_MEM_B8TRB  		(IDENT_ADDR + 0x180000F00UL)
+
+
+/*
+ * Memory spaces:
+ */
+#define APECS_IACK_SC		        (IDENT_ADDR + 0x1b0000000UL)
+#define APECS_CONF		        (IDENT_ADDR + 0x1e0000000UL)
+#define APECS_IO			(IDENT_ADDR + 0x1c0000000UL)
+#define APECS_SPARSE_MEM		(IDENT_ADDR + 0x200000000UL)
+#define APECS_DENSE_MEM		        (IDENT_ADDR + 0x300000000UL)
+
+
+/*
+ * Bit definitions for I/O Controller status register 0:
+ */
+#define APECS_IOC_STAT0_CMD		0xf
+#define APECS_IOC_STAT0_ERR		(1<<4)
+#define APECS_IOC_STAT0_LOST		(1<<5)
+#define APECS_IOC_STAT0_THIT		(1<<6)
+#define APECS_IOC_STAT0_TREF		(1<<7)
+#define APECS_IOC_STAT0_CODE_SHIFT	8
+#define APECS_IOC_STAT0_CODE_MASK	0x7
+#define APECS_IOC_STAT0_P_NBR_SHIFT	13
+#define APECS_IOC_STAT0_P_NBR_MASK	0x7ffff
+
+#define APECS_HAE_ADDRESS		APECS_IOC_HAXR1
+
+
+/*
+ * Data structure for handling APECS machine checks:
+ */
+
+struct el_apecs_mikasa_sysdata_mcheck
+{
+	unsigned long coma_gcr;
+	unsigned long coma_edsr;
+	unsigned long coma_ter;
+	unsigned long coma_elar;
+	unsigned long coma_ehar;
+	unsigned long coma_ldlr;
+	unsigned long coma_ldhr;
+	unsigned long coma_base0;
+	unsigned long coma_base1;
+	unsigned long coma_base2;
+	unsigned long coma_base3;
+	unsigned long coma_cnfg0;
+	unsigned long coma_cnfg1;
+	unsigned long coma_cnfg2;
+	unsigned long coma_cnfg3;
+	unsigned long epic_dcsr;
+	unsigned long epic_pear;
+	unsigned long epic_sear;
+	unsigned long epic_tbr1;
+	unsigned long epic_tbr2;
+	unsigned long epic_pbr1;
+	unsigned long epic_pbr2;
+	unsigned long epic_pmr1;
+	unsigned long epic_pmr2;
+	unsigned long epic_harx1;
+	unsigned long epic_harx2;
+	unsigned long epic_pmlt;
+	unsigned long epic_tag0;
+	unsigned long epic_tag1;
+	unsigned long epic_tag2;
+	unsigned long epic_tag3;
+	unsigned long epic_tag4;
+	unsigned long epic_tag5;
+	unsigned long epic_tag6;
+	unsigned long epic_tag7;
+	unsigned long epic_data0;
+	unsigned long epic_data1;
+	unsigned long epic_data2;
+	unsigned long epic_data3;
+	unsigned long epic_data4;
+	unsigned long epic_data5;
+	unsigned long epic_data6;
+	unsigned long epic_data7;
+
+	unsigned long pceb_vid;
+	unsigned long pceb_did;
+	unsigned long pceb_revision;
+	unsigned long pceb_command;
+	unsigned long pceb_status;
+	unsigned long pceb_latency;
+	unsigned long pceb_control;
+	unsigned long pceb_arbcon;
+	unsigned long pceb_arbpri;
+
+	unsigned long esc_id;
+	unsigned long esc_revision;
+	unsigned long esc_int0;
+	unsigned long esc_int1;
+	unsigned long esc_elcr0;
+	unsigned long esc_elcr1;
+	unsigned long esc_last_eisa;
+	unsigned long esc_nmi_stat;
+
+	unsigned long pci_ir;
+	unsigned long pci_imr;
+	unsigned long svr_mgr;
+};
+
+/* This for the normal APECS machines.  */
+struct el_apecs_sysdata_mcheck
+{
+	unsigned long coma_gcr;
+	unsigned long coma_edsr;
+	unsigned long coma_ter;
+	unsigned long coma_elar;
+	unsigned long coma_ehar;
+	unsigned long coma_ldlr;
+	unsigned long coma_ldhr;
+	unsigned long coma_base0;
+	unsigned long coma_base1;
+	unsigned long coma_base2;
+	unsigned long coma_cnfg0;
+	unsigned long coma_cnfg1;
+	unsigned long coma_cnfg2;
+	unsigned long epic_dcsr;
+	unsigned long epic_pear;
+	unsigned long epic_sear;
+	unsigned long epic_tbr1;
+	unsigned long epic_tbr2;
+	unsigned long epic_pbr1;
+	unsigned long epic_pbr2;
+	unsigned long epic_pmr1;
+	unsigned long epic_pmr2;
+	unsigned long epic_harx1;
+	unsigned long epic_harx2;
+	unsigned long epic_pmlt;
+	unsigned long epic_tag0;
+	unsigned long epic_tag1;
+	unsigned long epic_tag2;
+	unsigned long epic_tag3;
+	unsigned long epic_tag4;
+	unsigned long epic_tag5;
+	unsigned long epic_tag6;
+	unsigned long epic_tag7;
+	unsigned long epic_data0;
+	unsigned long epic_data1;
+	unsigned long epic_data2;
+	unsigned long epic_data3;
+	unsigned long epic_data4;
+	unsigned long epic_data5;
+	unsigned long epic_data6;
+	unsigned long epic_data7;
+};
+
+struct el_apecs_procdata
+{
+	unsigned long paltemp[32];  /* PAL TEMP REGS. */
+	/* EV4-specific fields */
+	unsigned long exc_addr;     /* Address of excepting instruction. */
+	unsigned long exc_sum;      /* Summary of arithmetic traps. */
+	unsigned long exc_mask;     /* Exception mask (from exc_sum). */
+	unsigned long iccsr;        /* IBox hardware enables. */
+	unsigned long pal_base;     /* Base address for PALcode. */
+	unsigned long hier;         /* Hardware Interrupt Enable. */
+	unsigned long hirr;         /* Hardware Interrupt Request. */
+	unsigned long csr;          /* D-stream fault info. */
+	unsigned long dc_stat;      /* D-cache status (ECC/Parity Err). */
+	unsigned long dc_addr;      /* EV3 Phys Addr for ECC/DPERR. */
+	unsigned long abox_ctl;     /* ABox Control Register. */
+	unsigned long biu_stat;     /* BIU Status. */
+	unsigned long biu_addr;     /* BUI Address. */
+	unsigned long biu_ctl;      /* BIU Control. */
+	unsigned long fill_syndrome;/* For correcting ECC errors. */
+	unsigned long fill_addr;    /* Cache block which was being read */
+	unsigned long va;           /* Effective VA of fault or miss. */
+	unsigned long bc_tag;       /* Backup Cache Tag Probe Results.*/
+};
+
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * Unlike Jensen, the APECS machines have no concept of local
+ * I/O---everything goes over the PCI bus.
+ *
+ * There is plenty room for optimization here.  In particular,
+ * the Alpha's insb/insw/extb/extw should be useful in moving
+ * data to/from the right byte-lanes.
+ */
+
+#define vip	volatile int __force *
+#define vuip	volatile unsigned int __force *
+#define vulp	volatile unsigned long __force *
+
+#define APECS_SET_HAE						\
+	do {							\
+		if (addr >= (1UL << 24)) {			\
+			unsigned long msb = addr & 0xf8000000;	\
+			addr -= msb;				\
+			set_hae(msb);				\
+		}						\
+	} while (0)
+
+__EXTERN_INLINE unsigned int apecs_ioread8(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long result, base_and_type;
+
+	if (addr >= APECS_DENSE_MEM) {
+		addr -= APECS_DENSE_MEM;
+		APECS_SET_HAE;
+		base_and_type = APECS_SPARSE_MEM + 0x00;
+	} else {
+		addr -= APECS_IO;
+		base_and_type = APECS_IO + 0x00;
+	}
+
+	result = *(vip) ((addr << 5) + base_and_type);
+	return __kernel_extbl(result, addr & 3);
+}
+
+__EXTERN_INLINE void apecs_iowrite8(u8 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long w, base_and_type;
+
+	if (addr >= APECS_DENSE_MEM) {
+		addr -= APECS_DENSE_MEM;
+		APECS_SET_HAE;
+		base_and_type = APECS_SPARSE_MEM + 0x00;
+	} else {
+		addr -= APECS_IO;
+		base_and_type = APECS_IO + 0x00;
+	}
+
+	w = __kernel_insbl(b, addr & 3);
+	*(vuip) ((addr << 5) + base_and_type) = w;
+}
+
+__EXTERN_INLINE unsigned int apecs_ioread16(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long result, base_and_type;
+
+	if (addr >= APECS_DENSE_MEM) {
+		addr -= APECS_DENSE_MEM;
+		APECS_SET_HAE;
+		base_and_type = APECS_SPARSE_MEM + 0x08;
+	} else {
+		addr -= APECS_IO;
+		base_and_type = APECS_IO + 0x08;
+	}
+
+	result = *(vip) ((addr << 5) + base_and_type);
+	return __kernel_extwl(result, addr & 3);
+}
+
+__EXTERN_INLINE void apecs_iowrite16(u16 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long w, base_and_type;
+
+	if (addr >= APECS_DENSE_MEM) {
+		addr -= APECS_DENSE_MEM;
+		APECS_SET_HAE;
+		base_and_type = APECS_SPARSE_MEM + 0x08;
+	} else {
+		addr -= APECS_IO;
+		base_and_type = APECS_IO + 0x08;
+	}
+
+	w = __kernel_inswl(b, addr & 3);
+	*(vuip) ((addr << 5) + base_and_type) = w;
+}
+
+__EXTERN_INLINE unsigned int apecs_ioread32(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (addr < APECS_DENSE_MEM)
+		addr = ((addr - APECS_IO) << 5) + APECS_IO + 0x18;
+	return *(vuip)addr;
+}
+
+__EXTERN_INLINE void apecs_iowrite32(u32 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (addr < APECS_DENSE_MEM)
+		addr = ((addr - APECS_IO) << 5) + APECS_IO + 0x18;
+	*(vuip)addr = b;
+}
+
+__EXTERN_INLINE void __iomem *apecs_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + APECS_IO);
+}
+
+__EXTERN_INLINE void __iomem *apecs_ioremap(unsigned long addr,
+					    unsigned long size)
+{
+	return (void __iomem *)(addr + APECS_DENSE_MEM);
+}
+
+__EXTERN_INLINE int apecs_is_ioaddr(unsigned long addr)
+{
+	return addr >= IDENT_ADDR + 0x180000000UL;
+}
+
+__EXTERN_INLINE int apecs_is_mmio(const volatile void __iomem *addr)
+{
+	return (unsigned long)addr >= APECS_DENSE_MEM;
+}
+
+#undef APECS_SET_HAE
+
+#undef vip
+#undef vuip
+#undef vulp
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		apecs
+#define apecs_trivial_io_bw	0
+#define apecs_trivial_io_lq	0
+#define apecs_trivial_rw_bw	2
+#define apecs_trivial_rw_lq	1
+#define apecs_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_APECS__H__ */
diff --git a/arch/alpha/include/asm/core_cia.h b/arch/alpha/include/asm/core_cia.h
new file mode 100644
index 0000000000..cb22991f67
--- /dev/null
+++ b/arch/alpha/include/asm/core_cia.h
@@ -0,0 +1,501 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_CIA__H__
+#define __ALPHA_CIA__H__
+
+/* Define to experiment with fitting everything into one 512MB HAE window.  */
+#define CIA_ONE_HAE_WINDOW 1
+
+#include <linux/types.h>
+#include <asm/compiler.h>
+
+/*
+ * CIA is the internal name for the 21171 chipset which provides
+ * memory controller and PCI access for the 21164 chip based systems.
+ * Also supported here is the 21172 (CIA-2) and 21174 (PYXIS).
+ *
+ * The lineage is a bit confused, since the 21174 was reportedly started
+ * from the 21171 Pass 1 mask, and so is missing bug fixes that appear
+ * in 21171 Pass 2 and 21172, but it also contains additional features.
+ *
+ * This file is based on:
+ *
+ * DECchip 21171 Core Logic Chipset
+ * Technical Reference Manual
+ *
+ * EC-QE18B-TE
+ *
+ * david.rusling@reo.mts.dec.com Initial Version.
+ *
+ */
+
+/*
+ * CIA ADDRESS BIT DEFINITIONS
+ *
+ *  3333 3333 3322 2222 2222 1111 1111 11
+ *  9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
+ *  ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
+ *  1                                             000
+ *  ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
+ *  |                                             |\|
+ *  |                               Byte Enable --+ |
+ *  |                             Transfer Length --+
+ *  +-- IO space, not cached
+ *
+ *   Byte      Transfer
+ *   Enable    Length    Transfer  Byte    Address
+ *   adr<6:5>  adr<4:3>  Length    Enable  Adder
+ *   ---------------------------------------------
+ *      00        00      Byte      1110   0x000
+ *      01        00      Byte      1101   0x020
+ *      10        00      Byte      1011   0x040
+ *      11        00      Byte      0111   0x060
+ *
+ *      00        01      Word      1100   0x008
+ *      01        01      Word      1001   0x028 <= Not supported in this code.
+ *      10        01      Word      0011   0x048
+ *
+ *      00        10      Tribyte   1000   0x010
+ *      01        10      Tribyte   0001   0x030
+ *
+ *      10        11      Longword  0000   0x058
+ *
+ *      Note that byte enables are asserted low.
+ *
+ */
+
+#define CIA_MEM_R1_MASK 0x1fffffff  /* SPARSE Mem region 1 mask is 29 bits */
+#define CIA_MEM_R2_MASK 0x07ffffff  /* SPARSE Mem region 2 mask is 27 bits */
+#define CIA_MEM_R3_MASK 0x03ffffff  /* SPARSE Mem region 3 mask is 26 bits */
+
+/*
+ * 21171-CA Control and Status Registers
+ */
+#define CIA_IOC_CIA_REV			(IDENT_ADDR + 0x8740000080UL)
+#  define CIA_REV_MASK			0xff
+#define CIA_IOC_PCI_LAT			(IDENT_ADDR + 0x87400000C0UL)
+#define CIA_IOC_CIA_CTRL		(IDENT_ADDR + 0x8740000100UL)
+#  define CIA_CTRL_PCI_EN		(1 << 0)
+#  define CIA_CTRL_PCI_LOCK_EN		(1 << 1)
+#  define CIA_CTRL_PCI_LOOP_EN		(1 << 2)
+#  define CIA_CTRL_FST_BB_EN		(1 << 3)
+#  define CIA_CTRL_PCI_MST_EN		(1 << 4)
+#  define CIA_CTRL_PCI_MEM_EN		(1 << 5)
+#  define CIA_CTRL_PCI_REQ64_EN		(1 << 6)
+#  define CIA_CTRL_PCI_ACK64_EN		(1 << 7)
+#  define CIA_CTRL_ADDR_PE_EN		(1 << 8)
+#  define CIA_CTRL_PERR_EN		(1 << 9)
+#  define CIA_CTRL_FILL_ERR_EN		(1 << 10)
+#  define CIA_CTRL_MCHK_ERR_EN		(1 << 11)
+#  define CIA_CTRL_ECC_CHK_EN		(1 << 12)
+#  define CIA_CTRL_ASSERT_IDLE_BC	(1 << 13)
+#  define CIA_CTRL_COM_IDLE_BC		(1 << 14)
+#  define CIA_CTRL_CSR_IOA_BYPASS	(1 << 15)
+#  define CIA_CTRL_IO_FLUSHREQ_EN	(1 << 16)
+#  define CIA_CTRL_CPU_FLUSHREQ_EN	(1 << 17)
+#  define CIA_CTRL_ARB_CPU_EN		(1 << 18)
+#  define CIA_CTRL_EN_ARB_LINK		(1 << 19)
+#  define CIA_CTRL_RD_TYPE_SHIFT	20
+#  define CIA_CTRL_RL_TYPE_SHIFT	24
+#  define CIA_CTRL_RM_TYPE_SHIFT	28
+#  define CIA_CTRL_EN_DMA_RD_PERF	(1 << 31)
+#define CIA_IOC_CIA_CNFG		(IDENT_ADDR + 0x8740000140UL)
+#  define CIA_CNFG_IOA_BWEN		(1 << 0)
+#  define CIA_CNFG_PCI_MWEN		(1 << 4)
+#  define CIA_CNFG_PCI_DWEN		(1 << 5)
+#  define CIA_CNFG_PCI_WLEN		(1 << 8)
+#define CIA_IOC_FLASH_CTRL		(IDENT_ADDR + 0x8740000200UL)
+#define CIA_IOC_HAE_MEM			(IDENT_ADDR + 0x8740000400UL)
+#define CIA_IOC_HAE_IO			(IDENT_ADDR + 0x8740000440UL)
+#define CIA_IOC_CFG			(IDENT_ADDR + 0x8740000480UL)
+#define CIA_IOC_CACK_EN			(IDENT_ADDR + 0x8740000600UL)
+#  define CIA_CACK_EN_LOCK_EN		(1 << 0)
+#  define CIA_CACK_EN_MB_EN		(1 << 1)
+#  define CIA_CACK_EN_SET_DIRTY_EN	(1 << 2)
+#  define CIA_CACK_EN_BC_VICTIM_EN	(1 << 3)
+
+
+/*
+ * 21171-CA Diagnostic Registers
+ */
+#define CIA_IOC_CIA_DIAG		(IDENT_ADDR + 0x8740002000UL)
+#define CIA_IOC_DIAG_CHECK		(IDENT_ADDR + 0x8740003000UL)
+
+/*
+ * 21171-CA Performance Monitor registers
+ */
+#define CIA_IOC_PERF_MONITOR		(IDENT_ADDR + 0x8740004000UL)
+#define CIA_IOC_PERF_CONTROL		(IDENT_ADDR + 0x8740004040UL)
+
+/*
+ * 21171-CA Error registers
+ */
+#define CIA_IOC_CPU_ERR0		(IDENT_ADDR + 0x8740008000UL)
+#define CIA_IOC_CPU_ERR1		(IDENT_ADDR + 0x8740008040UL)
+#define CIA_IOC_CIA_ERR			(IDENT_ADDR + 0x8740008200UL)
+#  define CIA_ERR_COR_ERR		(1 << 0)
+#  define CIA_ERR_UN_COR_ERR		(1 << 1)
+#  define CIA_ERR_CPU_PE		(1 << 2)
+#  define CIA_ERR_MEM_NEM		(1 << 3)
+#  define CIA_ERR_PCI_SERR		(1 << 4)
+#  define CIA_ERR_PERR			(1 << 5)
+#  define CIA_ERR_PCI_ADDR_PE		(1 << 6)
+#  define CIA_ERR_RCVD_MAS_ABT		(1 << 7)
+#  define CIA_ERR_RCVD_TAR_ABT		(1 << 8)
+#  define CIA_ERR_PA_PTE_INV		(1 << 9)
+#  define CIA_ERR_FROM_WRT_ERR		(1 << 10)
+#  define CIA_ERR_IOA_TIMEOUT		(1 << 11)
+#  define CIA_ERR_LOST_CORR_ERR		(1 << 16)
+#  define CIA_ERR_LOST_UN_CORR_ERR	(1 << 17)
+#  define CIA_ERR_LOST_CPU_PE		(1 << 18)
+#  define CIA_ERR_LOST_MEM_NEM		(1 << 19)
+#  define CIA_ERR_LOST_PERR		(1 << 21)
+#  define CIA_ERR_LOST_PCI_ADDR_PE	(1 << 22)
+#  define CIA_ERR_LOST_RCVD_MAS_ABT	(1 << 23)
+#  define CIA_ERR_LOST_RCVD_TAR_ABT	(1 << 24)
+#  define CIA_ERR_LOST_PA_PTE_INV	(1 << 25)
+#  define CIA_ERR_LOST_FROM_WRT_ERR	(1 << 26)
+#  define CIA_ERR_LOST_IOA_TIMEOUT	(1 << 27)
+#  define CIA_ERR_VALID			(1 << 31)
+#define CIA_IOC_CIA_STAT		(IDENT_ADDR + 0x8740008240UL)
+#define CIA_IOC_ERR_MASK		(IDENT_ADDR + 0x8740008280UL)
+#define CIA_IOC_CIA_SYN			(IDENT_ADDR + 0x8740008300UL)
+#define CIA_IOC_MEM_ERR0		(IDENT_ADDR + 0x8740008400UL)
+#define CIA_IOC_MEM_ERR1		(IDENT_ADDR + 0x8740008440UL)
+#define CIA_IOC_PCI_ERR0		(IDENT_ADDR + 0x8740008800UL)
+#define CIA_IOC_PCI_ERR1		(IDENT_ADDR + 0x8740008840UL)
+#define CIA_IOC_PCI_ERR3		(IDENT_ADDR + 0x8740008880UL)
+
+/*
+ * 21171-CA System configuration registers
+ */
+#define CIA_IOC_MCR			(IDENT_ADDR + 0x8750000000UL)
+#define CIA_IOC_MBA0			(IDENT_ADDR + 0x8750000600UL)
+#define CIA_IOC_MBA2			(IDENT_ADDR + 0x8750000680UL)
+#define CIA_IOC_MBA4			(IDENT_ADDR + 0x8750000700UL)
+#define CIA_IOC_MBA6			(IDENT_ADDR + 0x8750000780UL)
+#define CIA_IOC_MBA8			(IDENT_ADDR + 0x8750000800UL)
+#define CIA_IOC_MBAA			(IDENT_ADDR + 0x8750000880UL)
+#define CIA_IOC_MBAC			(IDENT_ADDR + 0x8750000900UL)
+#define CIA_IOC_MBAE			(IDENT_ADDR + 0x8750000980UL)
+#define CIA_IOC_TMG0			(IDENT_ADDR + 0x8750000B00UL)
+#define CIA_IOC_TMG1			(IDENT_ADDR + 0x8750000B40UL)
+#define CIA_IOC_TMG2			(IDENT_ADDR + 0x8750000B80UL)
+
+/*
+ * 2117A-CA PCI Address and Scatter-Gather Registers.
+ */
+#define CIA_IOC_PCI_TBIA		(IDENT_ADDR + 0x8760000100UL)
+
+#define CIA_IOC_PCI_W0_BASE		(IDENT_ADDR + 0x8760000400UL)
+#define CIA_IOC_PCI_W0_MASK		(IDENT_ADDR + 0x8760000440UL)
+#define CIA_IOC_PCI_T0_BASE		(IDENT_ADDR + 0x8760000480UL)
+
+#define CIA_IOC_PCI_W1_BASE		(IDENT_ADDR + 0x8760000500UL)
+#define CIA_IOC_PCI_W1_MASK		(IDENT_ADDR + 0x8760000540UL)
+#define CIA_IOC_PCI_T1_BASE		(IDENT_ADDR + 0x8760000580UL)
+
+#define CIA_IOC_PCI_W2_BASE		(IDENT_ADDR + 0x8760000600UL)
+#define CIA_IOC_PCI_W2_MASK		(IDENT_ADDR + 0x8760000640UL)
+#define CIA_IOC_PCI_T2_BASE		(IDENT_ADDR + 0x8760000680UL)
+
+#define CIA_IOC_PCI_W3_BASE		(IDENT_ADDR + 0x8760000700UL)
+#define CIA_IOC_PCI_W3_MASK		(IDENT_ADDR + 0x8760000740UL)
+#define CIA_IOC_PCI_T3_BASE		(IDENT_ADDR + 0x8760000780UL)
+
+#define CIA_IOC_PCI_Wn_BASE(N)	(IDENT_ADDR + 0x8760000400UL + (N)*0x100) 
+#define CIA_IOC_PCI_Wn_MASK(N)	(IDENT_ADDR + 0x8760000440UL + (N)*0x100) 
+#define CIA_IOC_PCI_Tn_BASE(N)	(IDENT_ADDR + 0x8760000480UL + (N)*0x100) 
+
+#define CIA_IOC_PCI_W_DAC		(IDENT_ADDR + 0x87600007C0UL)
+
+/*
+ * 2117A-CA Address Translation Registers.
+ */
+
+/* 8 tag registers, the first 4 of which are lockable.  */
+#define CIA_IOC_TB_TAGn(n) \
+	(IDENT_ADDR + 0x8760000800UL + (n)*0x40)
+
+/* 4 page registers per tag register.  */
+#define CIA_IOC_TBn_PAGEm(n,m) \
+	(IDENT_ADDR + 0x8760001000UL + (n)*0x100 + (m)*0x40)
+
+/*
+ * Memory spaces:
+ */
+#define CIA_IACK_SC			(IDENT_ADDR + 0x8720000000UL)
+#define CIA_CONF			(IDENT_ADDR + 0x8700000000UL)
+#define CIA_IO				(IDENT_ADDR + 0x8580000000UL)
+#define CIA_SPARSE_MEM			(IDENT_ADDR + 0x8000000000UL)
+#define CIA_SPARSE_MEM_R2		(IDENT_ADDR + 0x8400000000UL)
+#define CIA_SPARSE_MEM_R3		(IDENT_ADDR + 0x8500000000UL)
+#define CIA_DENSE_MEM		        (IDENT_ADDR + 0x8600000000UL)
+#define CIA_BW_MEM			(IDENT_ADDR + 0x8800000000UL)
+#define CIA_BW_IO			(IDENT_ADDR + 0x8900000000UL)
+#define CIA_BW_CFG_0			(IDENT_ADDR + 0x8a00000000UL)
+#define CIA_BW_CFG_1			(IDENT_ADDR + 0x8b00000000UL)
+
+/*
+ * ALCOR's GRU ASIC registers
+ */
+#define GRU_INT_REQ			(IDENT_ADDR + 0x8780000000UL)
+#define GRU_INT_MASK			(IDENT_ADDR + 0x8780000040UL)
+#define GRU_INT_EDGE			(IDENT_ADDR + 0x8780000080UL)
+#define GRU_INT_HILO			(IDENT_ADDR + 0x87800000C0UL)
+#define GRU_INT_CLEAR			(IDENT_ADDR + 0x8780000100UL)
+
+#define GRU_CACHE_CNFG			(IDENT_ADDR + 0x8780000200UL)
+#define GRU_SCR				(IDENT_ADDR + 0x8780000300UL)
+#define GRU_LED				(IDENT_ADDR + 0x8780000800UL)
+#define GRU_RESET			(IDENT_ADDR + 0x8780000900UL)
+
+#define ALCOR_GRU_INT_REQ_BITS		0x800fffffUL
+#define XLT_GRU_INT_REQ_BITS		0x80003fffUL
+#define GRU_INT_REQ_BITS		(alpha_mv.sys.cia.gru_int_req_bits+0)
+
+/*
+ * PYXIS interrupt control registers
+ */
+#define PYXIS_INT_REQ			(IDENT_ADDR + 0x87A0000000UL)
+#define PYXIS_INT_MASK			(IDENT_ADDR + 0x87A0000040UL)
+#define PYXIS_INT_HILO			(IDENT_ADDR + 0x87A00000C0UL)
+#define PYXIS_INT_ROUTE			(IDENT_ADDR + 0x87A0000140UL)
+#define PYXIS_GPO			(IDENT_ADDR + 0x87A0000180UL)
+#define PYXIS_INT_CNFG			(IDENT_ADDR + 0x87A00001C0UL)
+#define PYXIS_RT_COUNT			(IDENT_ADDR + 0x87A0000200UL)
+#define PYXIS_INT_TIME			(IDENT_ADDR + 0x87A0000240UL)
+#define PYXIS_IIC_CTRL			(IDENT_ADDR + 0x87A00002C0UL)
+#define PYXIS_RESET			(IDENT_ADDR + 0x8780000900UL)
+
+/* Offset between ram physical addresses and pci64 DAC bus addresses.  */
+#define PYXIS_DAC_OFFSET		(1UL << 40)
+
+/*
+ * Data structure for handling CIA machine checks.
+ */
+
+/* System-specific info.  */
+struct el_CIA_sysdata_mcheck {
+	unsigned long	cpu_err0;
+	unsigned long	cpu_err1;
+	unsigned long	cia_err;
+	unsigned long	cia_stat;
+	unsigned long	err_mask;
+	unsigned long	cia_syn;
+	unsigned long	mem_err0;
+	unsigned long	mem_err1;
+	unsigned long	pci_err0;
+	unsigned long	pci_err1;
+	unsigned long	pci_err2;
+};
+
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+/* Do not touch, this should *NOT* be static inline */
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * CIA (the 2117x PCI/memory support chipset for the EV5 (21164)
+ * series of processors uses a sparse address mapping scheme to
+ * get at PCI memory and I/O.
+ */
+
+/*
+ * Memory functions.  64-bit and 32-bit accesses are done through
+ * dense memory space, everything else through sparse space.
+ *
+ * For reading and writing 8 and 16 bit quantities we need to
+ * go through one of the three sparse address mapping regions
+ * and use the HAE_MEM CSR to provide some bits of the address.
+ * The following few routines use only sparse address region 1
+ * which gives 1Gbyte of accessible space which relates exactly
+ * to the amount of PCI memory mapping *into* system address space.
+ * See p 6-17 of the specification but it looks something like this:
+ *
+ * 21164 Address:
+ *
+ *          3         2         1
+ * 9876543210987654321098765432109876543210
+ * 1ZZZZ0.PCI.QW.Address............BBLL
+ *
+ * ZZ = SBZ
+ * BB = Byte offset
+ * LL = Transfer length
+ *
+ * PCI Address:
+ *
+ * 3         2         1
+ * 10987654321098765432109876543210
+ * HHH....PCI.QW.Address........ 00
+ *
+ * HHH = 31:29 HAE_MEM CSR
+ *
+ */
+
+#define vip	volatile int __force *
+#define vuip	volatile unsigned int __force *
+#define vulp	volatile unsigned long __force *
+
+__EXTERN_INLINE unsigned int cia_ioread8(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long result, base_and_type;
+
+	if (addr >= CIA_DENSE_MEM)
+		base_and_type = CIA_SPARSE_MEM + 0x00;
+	else
+		base_and_type = CIA_IO + 0x00;
+
+	/* We can use CIA_MEM_R1_MASK for io ports too, since it is large
+	   enough to cover all io ports, and smaller than CIA_IO.  */
+	addr &= CIA_MEM_R1_MASK;
+	result = *(vip) ((addr << 5) + base_and_type);
+	return __kernel_extbl(result, addr & 3);
+}
+
+__EXTERN_INLINE void cia_iowrite8(u8 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long w, base_and_type;
+
+	if (addr >= CIA_DENSE_MEM)
+		base_and_type = CIA_SPARSE_MEM + 0x00;
+	else
+		base_and_type = CIA_IO + 0x00;
+
+	addr &= CIA_MEM_R1_MASK;
+	w = __kernel_insbl(b, addr & 3);
+	*(vuip) ((addr << 5) + base_and_type) = w;
+}
+
+__EXTERN_INLINE unsigned int cia_ioread16(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long result, base_and_type;
+
+	if (addr >= CIA_DENSE_MEM)
+		base_and_type = CIA_SPARSE_MEM + 0x08;
+	else
+		base_and_type = CIA_IO + 0x08;
+
+	addr &= CIA_MEM_R1_MASK;
+	result = *(vip) ((addr << 5) + base_and_type);
+	return __kernel_extwl(result, addr & 3);
+}
+
+__EXTERN_INLINE void cia_iowrite16(u16 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long w, base_and_type;
+
+	if (addr >= CIA_DENSE_MEM)
+		base_and_type = CIA_SPARSE_MEM + 0x08;
+	else
+		base_and_type = CIA_IO + 0x08;
+
+	addr &= CIA_MEM_R1_MASK;
+	w = __kernel_inswl(b, addr & 3);
+	*(vuip) ((addr << 5) + base_and_type) = w;
+}
+
+__EXTERN_INLINE unsigned int cia_ioread32(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (addr < CIA_DENSE_MEM)
+		addr = ((addr - CIA_IO) << 5) + CIA_IO + 0x18;
+	return *(vuip)addr;
+}
+
+__EXTERN_INLINE void cia_iowrite32(u32 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (addr < CIA_DENSE_MEM)
+		addr = ((addr - CIA_IO) << 5) + CIA_IO + 0x18;
+	*(vuip)addr = b;
+}
+
+__EXTERN_INLINE void __iomem *cia_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + CIA_IO);
+}
+
+__EXTERN_INLINE void __iomem *cia_ioremap(unsigned long addr,
+					  unsigned long size)
+{
+	return (void __iomem *)(addr + CIA_DENSE_MEM);
+}
+
+__EXTERN_INLINE int cia_is_ioaddr(unsigned long addr)
+{
+	return addr >= IDENT_ADDR + 0x8000000000UL;
+}
+
+__EXTERN_INLINE int cia_is_mmio(const volatile void __iomem *addr)
+{
+	return (unsigned long)addr >= CIA_DENSE_MEM;
+}
+
+__EXTERN_INLINE void __iomem *cia_bwx_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + CIA_BW_IO);
+}
+
+__EXTERN_INLINE void __iomem *cia_bwx_ioremap(unsigned long addr,
+					      unsigned long size)
+{
+	return (void __iomem *)(addr + CIA_BW_MEM);
+}
+
+__EXTERN_INLINE int cia_bwx_is_ioaddr(unsigned long addr)
+{
+	return addr >= IDENT_ADDR + 0x8000000000UL;
+}
+
+__EXTERN_INLINE int cia_bwx_is_mmio(const volatile void __iomem *addr)
+{
+	return (unsigned long)addr < CIA_BW_IO;
+}
+
+#undef vip
+#undef vuip
+#undef vulp
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		cia
+#define cia_trivial_rw_bw	2
+#define cia_trivial_rw_lq	1
+#define cia_trivial_io_bw	0
+#define cia_trivial_io_lq	0
+#define cia_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		cia_bwx
+#define cia_bwx_trivial_rw_bw	1
+#define cia_bwx_trivial_rw_lq	1
+#define cia_bwx_trivial_io_bw	1
+#define cia_bwx_trivial_io_lq	1
+#define cia_bwx_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#undef __IO_PREFIX
+#ifdef CONFIG_ALPHA_PYXIS
+#define __IO_PREFIX		cia_bwx
+#else
+#define __IO_PREFIX		cia
+#endif
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_CIA__H__ */
diff --git a/arch/alpha/include/asm/core_irongate.h b/arch/alpha/include/asm/core_irongate.h
new file mode 100644
index 0000000000..1c8906bf76
--- /dev/null
+++ b/arch/alpha/include/asm/core_irongate.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_IRONGATE__H__
+#define __ALPHA_IRONGATE__H__
+
+#include <linux/types.h>
+#include <asm/compiler.h>
+
+/*
+ * IRONGATE is the internal name for the AMD-751 K7 core logic chipset
+ * which provides memory controller and PCI access for NAUTILUS-based
+ * EV6 (21264) systems.
+ *
+ * This file is based on:
+ *
+ * IronGate management library, (c) 1999 Alpha Processor, Inc.
+ * Copyright (C) 1999 Alpha Processor, Inc.,
+ *	(David Daniel, Stig Telfer, Soohoon Lee)
+ */
+
+/*
+ * The 21264 supports, and internally recognizes, a 44-bit physical
+ * address space that is divided equally between memory address space
+ * and I/O address space. Memory address space resides in the lower
+ * half of the physical address space (PA[43]=0) and I/O address space
+ * resides in the upper half of the physical address space (PA[43]=1).
+ */
+
+/*
+ * Irongate CSR map.  Some of the CSRs are 8 or 16 bits, but all access
+ * through the routines given is 32-bit.
+ *
+ * The first 0x40 bytes are standard as per the PCI spec.
+ */
+
+typedef volatile __u32	igcsr32;
+
+typedef struct {
+	igcsr32 dev_vendor;		/* 0x00 - device ID, vendor ID */
+	igcsr32 stat_cmd;		/* 0x04 - status, command */
+	igcsr32 class;			/* 0x08 - class code, rev ID */
+	igcsr32 latency;		/* 0x0C - header type, PCI latency */
+	igcsr32 bar0;			/* 0x10 - BAR0 - AGP */
+	igcsr32 bar1;			/* 0x14 - BAR1 - GART */
+	igcsr32 bar2;			/* 0x18 - Power Management reg block */
+
+	igcsr32 rsrvd0[6];		/* 0x1C-0x33 reserved */
+
+	igcsr32 capptr;			/* 0x34 - Capabilities pointer */
+
+	igcsr32 rsrvd1[2];		/* 0x38-0x3F reserved */
+
+	igcsr32 bacsr10;		/* 0x40 - base address chip selects */
+	igcsr32 bacsr32;		/* 0x44 - base address chip selects */
+	igcsr32 bacsr54_eccms761;	/* 0x48 - 751: base addr. chip selects
+						  761: ECC, mode/status */
+
+	igcsr32 rsrvd2[1];		/* 0x4C-0x4F reserved */
+
+	igcsr32 drammap;		/* 0x50 - address mapping control */
+	igcsr32 dramtm;			/* 0x54 - timing, driver strength */
+	igcsr32 dramms;			/* 0x58 - DRAM mode/status */
+
+	igcsr32 rsrvd3[1];		/* 0x5C-0x5F reserved */
+
+	igcsr32 biu0;			/* 0x60 - bus interface unit */
+	igcsr32 biusip;			/* 0x64 - Serial initialisation pkt */
+
+	igcsr32 rsrvd4[2];		/* 0x68-0x6F reserved */
+
+	igcsr32 mro;			/* 0x70 - memory request optimiser */
+
+	igcsr32 rsrvd5[3];		/* 0x74-0x7F reserved */
+
+	igcsr32 whami;			/* 0x80 - who am I */
+	igcsr32 pciarb;			/* 0x84 - PCI arbitration control */
+	igcsr32 pcicfg;			/* 0x88 - PCI config status */
+
+	igcsr32 rsrvd6[4];		/* 0x8C-0x9B reserved */
+
+	igcsr32 pci_mem;		/* 0x9C - PCI top of memory,
+						  761 only */
+
+	/* AGP (bus 1) control registers */
+	igcsr32 agpcap;			/* 0xA0 - AGP Capability Identifier */
+	igcsr32 agpstat;		/* 0xA4 - AGP status register */
+	igcsr32 agpcmd;			/* 0xA8 - AGP control register */
+	igcsr32 agpva;			/* 0xAC - AGP Virtual Address Space */
+	igcsr32 agpmode;		/* 0xB0 - AGP/GART mode control */
+} Irongate0;
+
+
+typedef struct {
+
+	igcsr32 dev_vendor;		/* 0x00 - Device and Vendor IDs */
+	igcsr32 stat_cmd;		/* 0x04 - Status and Command regs */
+	igcsr32 class;			/* 0x08 - subclass, baseclass etc */
+	igcsr32 htype;			/* 0x0C - header type (at 0x0E) */
+	igcsr32 rsrvd0[2];		/* 0x10-0x17 reserved */
+	igcsr32 busnos;			/* 0x18 - Primary, secondary bus nos */
+	igcsr32 io_baselim_regs;	/* 0x1C - IO base, IO lim, AGP status */
+	igcsr32	mem_baselim;		/* 0x20 - memory base, memory lim */
+	igcsr32 pfmem_baselim;		/* 0x24 - prefetchable base, lim */
+	igcsr32 rsrvd1[2];		/* 0x28-0x2F reserved */
+	igcsr32 io_baselim;		/* 0x30 - IO base, IO limit */
+	igcsr32 rsrvd2[2];		/* 0x34-0x3B - reserved */
+	igcsr32 interrupt;		/* 0x3C - interrupt, PCI bridge ctrl */
+
+} Irongate1;
+
+extern igcsr32 *IronECC;
+
+/*
+ * Memory spaces:
+ */
+
+/* Irongate is consistent with a subset of the Tsunami memory map */
+#ifdef USE_48_BIT_KSEG
+#define IRONGATE_BIAS 0x80000000000UL
+#else
+#define IRONGATE_BIAS 0x10000000000UL
+#endif
+
+
+#define IRONGATE_MEM		(IDENT_ADDR | IRONGATE_BIAS | 0x000000000UL)
+#define IRONGATE_IACK_SC	(IDENT_ADDR | IRONGATE_BIAS | 0x1F8000000UL)
+#define IRONGATE_IO		(IDENT_ADDR | IRONGATE_BIAS | 0x1FC000000UL)
+#define IRONGATE_CONF		(IDENT_ADDR | IRONGATE_BIAS | 0x1FE000000UL)
+
+/*
+ * PCI Configuration space accesses are formed like so:
+ *
+ * 0x1FE << 24 |  : 2 2 2 2 1 1 1 1 : 1 1 1 1 1 1 0 0 : 0 0 0 0 0 0 0 0 :
+ *                : 3 2 1 0 9 8 7 6 : 5 4 3 2 1 0 9 8 : 7 6 5 4 3 2 1 0 :
+ *                  ---bus numer---   -device-- -fun-   ---register----
+ */
+
+#define IGCSR(dev,fun,reg)	( IRONGATE_CONF | \
+				((dev)<<11) | \
+				((fun)<<8) | \
+				(reg) )
+
+#define IRONGATE0		((Irongate0 *) IGCSR(0, 0, 0))
+#define IRONGATE1		((Irongate1 *) IGCSR(1, 0, 0))
+
+/*
+ * Data structure for handling IRONGATE machine checks:
+ * This is the standard OSF logout frame
+ */
+
+#define SCB_Q_SYSERR	0x620			/* OSF definitions */
+#define SCB_Q_PROCERR	0x630
+#define SCB_Q_SYSMCHK	0x660
+#define SCB_Q_PROCMCHK	0x670
+
+struct el_IRONGATE_sysdata_mcheck {
+	__u32 FrameSize;                 /* Bytes, including this field */
+	__u32 FrameFlags;                /* <31> = Retry, <30> = Second Error */
+	__u32 CpuOffset;                 /* Offset to CPU-specific into */
+	__u32 SystemOffset;              /* Offset to system-specific info */
+	__u32 MCHK_Code;
+	__u32 MCHK_Frame_Rev;
+	__u64 I_STAT;
+	__u64 DC_STAT;
+	__u64 C_ADDR;
+	__u64 DC1_SYNDROME;
+	__u64 DC0_SYNDROME;
+	__u64 C_STAT;
+	__u64 C_STS;
+	__u64 RESERVED0;
+	__u64 EXC_ADDR;
+	__u64 IER_CM;
+	__u64 ISUM;
+	__u64 MM_STAT;
+	__u64 PAL_BASE;
+	__u64 I_CTL;
+	__u64 PCTX;
+};
+
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * IRONGATE (AMD-751) PCI/memory support chip for the EV6 (21264) and
+ * K7 can only use linear accesses to get at PCI memory and I/O spaces.
+ */
+
+/*
+ * Memory functions.  All accesses are done through linear space.
+ */
+
+__EXTERN_INLINE void __iomem *irongate_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + IRONGATE_IO);
+}
+
+extern void __iomem *irongate_ioremap(unsigned long addr, unsigned long size);
+extern void irongate_iounmap(volatile void __iomem *addr);
+
+__EXTERN_INLINE int irongate_is_ioaddr(unsigned long addr)
+{
+	return addr >= IRONGATE_MEM;
+}
+
+__EXTERN_INLINE int irongate_is_mmio(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr;
+	return addr < IRONGATE_IO || addr >= IRONGATE_CONF;
+}
+
+#undef __IO_PREFIX
+#define __IO_PREFIX			irongate
+#define irongate_trivial_rw_bw		1
+#define irongate_trivial_rw_lq		1
+#define irongate_trivial_io_bw		1
+#define irongate_trivial_io_lq		1
+#define irongate_trivial_iounmap	0
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_IRONGATE__H__ */
diff --git a/arch/alpha/include/asm/core_lca.h b/arch/alpha/include/asm/core_lca.h
new file mode 100644
index 0000000000..ec86314418
--- /dev/null
+++ b/arch/alpha/include/asm/core_lca.h
@@ -0,0 +1,362 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_LCA__H__
+#define __ALPHA_LCA__H__
+
+#include <asm/compiler.h>
+#include <asm/mce.h>
+
+/*
+ * Low Cost Alpha (LCA) definitions (these apply to 21066 and 21068,
+ * for example).
+ *
+ * This file is based on:
+ *
+ *	DECchip 21066 and DECchip 21068 Alpha AXP Microprocessors
+ *	Hardware Reference Manual; Digital Equipment Corp.; May 1994;
+ *	Maynard, MA; Order Number: EC-N2681-71.
+ */
+
+/*
+ * NOTE: The LCA uses a Host Address Extension (HAE) register to access
+ *	 PCI addresses that are beyond the first 27 bits of address
+ *	 space.  Updating the HAE requires an external cycle (and
+ *	 a memory barrier), which tends to be slow.  Instead of updating
+ *	 it on each sparse memory access, we keep the current HAE value
+ *	 cached in variable cache_hae.  Only if the cached HAE differs
+ *	 from the desired HAE value do we actually updated HAE register.
+ *	 The HAE register is preserved by the interrupt handler entry/exit
+ *	 code, so this scheme works even in the presence of interrupts.
+ *
+ * Dense memory space doesn't require the HAE, but is restricted to
+ * aligned 32 and 64 bit accesses.  Special Cycle and Interrupt
+ * Acknowledge cycles may also require the use of the HAE.  The LCA
+ * limits I/O address space to the bottom 24 bits of address space,
+ * but this easily covers the 16 bit ISA I/O address space.
+ */
+
+/*
+ * NOTE 2! The memory operations do not set any memory barriers, as
+ * it's not needed for cases like a frame buffer that is essentially
+ * memory-like.  You need to do them by hand if the operations depend
+ * on ordering.
+ *
+ * Similarly, the port I/O operations do a "mb" only after a write
+ * operation: if an mb is needed before (as in the case of doing
+ * memory mapped I/O first, and then a port I/O operation to the same
+ * device), it needs to be done by hand.
+ *
+ * After the above has bitten me 100 times, I'll give up and just do
+ * the mb all the time, but right now I'm hoping this will work out.
+ * Avoiding mb's may potentially be a noticeable speed improvement,
+ * but I can't honestly say I've tested it.
+ *
+ * Handling interrupts that need to do mb's to synchronize to
+ * non-interrupts is another fun race area.  Don't do it (because if
+ * you do, I'll have to do *everything* with interrupts disabled,
+ * ugh).
+ */
+
+/*
+ * Memory Controller registers:
+ */
+#define LCA_MEM_BCR0		(IDENT_ADDR + 0x120000000UL)
+#define LCA_MEM_BCR1		(IDENT_ADDR + 0x120000008UL)
+#define LCA_MEM_BCR2		(IDENT_ADDR + 0x120000010UL)
+#define LCA_MEM_BCR3		(IDENT_ADDR + 0x120000018UL)
+#define LCA_MEM_BMR0		(IDENT_ADDR + 0x120000020UL)
+#define LCA_MEM_BMR1		(IDENT_ADDR + 0x120000028UL)
+#define LCA_MEM_BMR2		(IDENT_ADDR + 0x120000030UL)
+#define LCA_MEM_BMR3		(IDENT_ADDR + 0x120000038UL)
+#define LCA_MEM_BTR0		(IDENT_ADDR + 0x120000040UL)
+#define LCA_MEM_BTR1		(IDENT_ADDR + 0x120000048UL)
+#define LCA_MEM_BTR2		(IDENT_ADDR + 0x120000050UL)
+#define LCA_MEM_BTR3		(IDENT_ADDR + 0x120000058UL)
+#define LCA_MEM_GTR		(IDENT_ADDR + 0x120000060UL)
+#define LCA_MEM_ESR		(IDENT_ADDR + 0x120000068UL)
+#define LCA_MEM_EAR		(IDENT_ADDR + 0x120000070UL)
+#define LCA_MEM_CAR		(IDENT_ADDR + 0x120000078UL)
+#define LCA_MEM_VGR		(IDENT_ADDR + 0x120000080UL)
+#define LCA_MEM_PLM		(IDENT_ADDR + 0x120000088UL)
+#define LCA_MEM_FOR		(IDENT_ADDR + 0x120000090UL)
+
+/*
+ * I/O Controller registers:
+ */
+#define LCA_IOC_HAE		(IDENT_ADDR + 0x180000000UL)
+#define LCA_IOC_CONF		(IDENT_ADDR + 0x180000020UL)
+#define LCA_IOC_STAT0		(IDENT_ADDR + 0x180000040UL)
+#define LCA_IOC_STAT1		(IDENT_ADDR + 0x180000060UL)
+#define LCA_IOC_TBIA		(IDENT_ADDR + 0x180000080UL)
+#define LCA_IOC_TB_ENA		(IDENT_ADDR + 0x1800000a0UL)
+#define LCA_IOC_SFT_RST		(IDENT_ADDR + 0x1800000c0UL)
+#define LCA_IOC_PAR_DIS		(IDENT_ADDR + 0x1800000e0UL)
+#define LCA_IOC_W_BASE0		(IDENT_ADDR + 0x180000100UL)
+#define LCA_IOC_W_BASE1		(IDENT_ADDR + 0x180000120UL)
+#define LCA_IOC_W_MASK0		(IDENT_ADDR + 0x180000140UL)
+#define LCA_IOC_W_MASK1		(IDENT_ADDR + 0x180000160UL)
+#define LCA_IOC_T_BASE0		(IDENT_ADDR + 0x180000180UL)
+#define LCA_IOC_T_BASE1		(IDENT_ADDR + 0x1800001a0UL)
+#define LCA_IOC_TB_TAG0		(IDENT_ADDR + 0x188000000UL)
+#define LCA_IOC_TB_TAG1		(IDENT_ADDR + 0x188000020UL)
+#define LCA_IOC_TB_TAG2		(IDENT_ADDR + 0x188000040UL)
+#define LCA_IOC_TB_TAG3		(IDENT_ADDR + 0x188000060UL)
+#define LCA_IOC_TB_TAG4		(IDENT_ADDR + 0x188000070UL)
+#define LCA_IOC_TB_TAG5		(IDENT_ADDR + 0x1880000a0UL)
+#define LCA_IOC_TB_TAG6		(IDENT_ADDR + 0x1880000c0UL)
+#define LCA_IOC_TB_TAG7		(IDENT_ADDR + 0x1880000e0UL)
+
+/*
+ * Memory spaces:
+ */
+#define LCA_IACK_SC		(IDENT_ADDR + 0x1a0000000UL)
+#define LCA_CONF		(IDENT_ADDR + 0x1e0000000UL)
+#define LCA_IO			(IDENT_ADDR + 0x1c0000000UL)
+#define LCA_SPARSE_MEM		(IDENT_ADDR + 0x200000000UL)
+#define LCA_DENSE_MEM		(IDENT_ADDR + 0x300000000UL)
+
+/*
+ * Bit definitions for I/O Controller status register 0:
+ */
+#define LCA_IOC_STAT0_CMD		0xf
+#define LCA_IOC_STAT0_ERR		(1<<4)
+#define LCA_IOC_STAT0_LOST		(1<<5)
+#define LCA_IOC_STAT0_THIT		(1<<6)
+#define LCA_IOC_STAT0_TREF		(1<<7)
+#define LCA_IOC_STAT0_CODE_SHIFT	8
+#define LCA_IOC_STAT0_CODE_MASK		0x7
+#define LCA_IOC_STAT0_P_NBR_SHIFT	13
+#define LCA_IOC_STAT0_P_NBR_MASK	0x7ffff
+
+#define LCA_HAE_ADDRESS		LCA_IOC_HAE
+
+/* LCA PMR Power Management register defines */
+#define LCA_PMR_ADDR	(IDENT_ADDR + 0x120000098UL)
+#define LCA_PMR_PDIV    0x7                     /* Primary clock divisor */
+#define LCA_PMR_ODIV    0x38                    /* Override clock divisor */
+#define LCA_PMR_INTO    0x40                    /* Interrupt override */
+#define LCA_PMR_DMAO    0x80                    /* DMA override */
+#define LCA_PMR_OCCEB   0xffff0000L             /* Override cycle counter - even bits */
+#define LCA_PMR_OCCOB   0xffff000000000000L     /* Override cycle counter - even bits */
+#define LCA_PMR_PRIMARY_MASK    0xfffffffffffffff8L
+
+/* LCA PMR Macros */
+
+#define LCA_READ_PMR        (*(volatile unsigned long *)LCA_PMR_ADDR)
+#define LCA_WRITE_PMR(d)    (*((volatile unsigned long *)LCA_PMR_ADDR) = (d))
+
+#define LCA_GET_PRIMARY(r)  ((r) & LCA_PMR_PDIV)
+#define LCA_GET_OVERRIDE(r) (((r) >> 3) & LCA_PMR_PDIV)
+#define LCA_SET_PRIMARY_CLOCK(r, c) ((r) = (((r) & LCA_PMR_PRIMARY_MASK)|(c)))
+
+/* LCA PMR Divisor values */
+#define LCA_PMR_DIV_1   0x0
+#define LCA_PMR_DIV_1_5 0x1
+#define LCA_PMR_DIV_2   0x2
+#define LCA_PMR_DIV_4   0x3
+#define LCA_PMR_DIV_8   0x4
+#define LCA_PMR_DIV_16  0x5
+#define LCA_PMR_DIV_MIN DIV_1
+#define LCA_PMR_DIV_MAX DIV_16
+
+
+/*
+ * Data structure for handling LCA machine checks.  Correctable errors
+ * result in a short logout frame, uncorrectable ones in a long one.
+ */
+struct el_lca_mcheck_short {
+	struct el_common	h;		/* common logout header */
+	unsigned long		esr;		/* error-status register */
+	unsigned long		ear;		/* error-address register */
+	unsigned long		dc_stat;	/* dcache status register */
+	unsigned long		ioc_stat0;	/* I/O controller status register 0 */
+	unsigned long		ioc_stat1;	/* I/O controller status register 1 */
+};
+
+struct el_lca_mcheck_long {
+	struct el_common	h;		/* common logout header */
+	unsigned long		pt[31];		/* PAL temps */
+	unsigned long		exc_addr;	/* exception address */
+	unsigned long		pad1[3];
+	unsigned long		pal_base;	/* PALcode base address */
+	unsigned long		hier;		/* hw interrupt enable */
+	unsigned long		hirr;		/* hw interrupt request */
+	unsigned long		mm_csr;		/* MMU control & status */
+	unsigned long		dc_stat;	/* data cache status */
+	unsigned long		dc_addr;	/* data cache addr register */
+	unsigned long		abox_ctl;	/* address box control register */
+	unsigned long		esr;		/* error status register */
+	unsigned long		ear;		/* error address register */
+	unsigned long		car;		/* cache control register */
+	unsigned long		ioc_stat0;	/* I/O controller status register 0 */
+	unsigned long		ioc_stat1;	/* I/O controller status register 1 */
+	unsigned long		va;		/* virtual address register */
+};
+
+union el_lca {
+	struct el_common *		c;
+	struct el_lca_mcheck_long *	l;
+	struct el_lca_mcheck_short *	s;
+};
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * Unlike Jensen, the Noname machines have no concept of local
+ * I/O---everything goes over the PCI bus.
+ *
+ * There is plenty room for optimization here.  In particular,
+ * the Alpha's insb/insw/extb/extw should be useful in moving
+ * data to/from the right byte-lanes.
+ */
+
+#define vip	volatile int __force *
+#define vuip	volatile unsigned int __force *
+#define vulp	volatile unsigned long __force *
+
+#define LCA_SET_HAE						\
+	do {							\
+		if (addr >= (1UL << 24)) {			\
+			unsigned long msb = addr & 0xf8000000;	\
+			addr -= msb;				\
+			set_hae(msb);				\
+		}						\
+	} while (0)
+
+
+__EXTERN_INLINE unsigned int lca_ioread8(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long result, base_and_type;
+
+	if (addr >= LCA_DENSE_MEM) {
+		addr -= LCA_DENSE_MEM;
+		LCA_SET_HAE;
+		base_and_type = LCA_SPARSE_MEM + 0x00;
+	} else {
+		addr -= LCA_IO;
+		base_and_type = LCA_IO + 0x00;
+	}
+
+	result = *(vip) ((addr << 5) + base_and_type);
+	return __kernel_extbl(result, addr & 3);
+}
+
+__EXTERN_INLINE void lca_iowrite8(u8 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long w, base_and_type;
+
+	if (addr >= LCA_DENSE_MEM) {
+		addr -= LCA_DENSE_MEM;
+		LCA_SET_HAE;
+		base_and_type = LCA_SPARSE_MEM + 0x00;
+	} else {
+		addr -= LCA_IO;
+		base_and_type = LCA_IO + 0x00;
+	}
+
+	w = __kernel_insbl(b, addr & 3);
+	*(vuip) ((addr << 5) + base_and_type) = w;
+}
+
+__EXTERN_INLINE unsigned int lca_ioread16(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long result, base_and_type;
+
+	if (addr >= LCA_DENSE_MEM) {
+		addr -= LCA_DENSE_MEM;
+		LCA_SET_HAE;
+		base_and_type = LCA_SPARSE_MEM + 0x08;
+	} else {
+		addr -= LCA_IO;
+		base_and_type = LCA_IO + 0x08;
+	}
+
+	result = *(vip) ((addr << 5) + base_and_type);
+	return __kernel_extwl(result, addr & 3);
+}
+
+__EXTERN_INLINE void lca_iowrite16(u16 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long w, base_and_type;
+
+	if (addr >= LCA_DENSE_MEM) {
+		addr -= LCA_DENSE_MEM;
+		LCA_SET_HAE;
+		base_and_type = LCA_SPARSE_MEM + 0x08;
+	} else {
+		addr -= LCA_IO;
+		base_and_type = LCA_IO + 0x08;
+	}
+
+	w = __kernel_inswl(b, addr & 3);
+	*(vuip) ((addr << 5) + base_and_type) = w;
+}
+
+__EXTERN_INLINE unsigned int lca_ioread32(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (addr < LCA_DENSE_MEM)
+		addr = ((addr - LCA_IO) << 5) + LCA_IO + 0x18;
+	return *(vuip)addr;
+}
+
+__EXTERN_INLINE void lca_iowrite32(u32 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (addr < LCA_DENSE_MEM)
+		addr = ((addr - LCA_IO) << 5) + LCA_IO + 0x18;
+	*(vuip)addr = b;
+}
+
+__EXTERN_INLINE void __iomem *lca_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + LCA_IO);
+}
+
+__EXTERN_INLINE void __iomem *lca_ioremap(unsigned long addr,
+					  unsigned long size)
+{
+	return (void __iomem *)(addr + LCA_DENSE_MEM);
+}
+
+__EXTERN_INLINE int lca_is_ioaddr(unsigned long addr)
+{
+	return addr >= IDENT_ADDR + 0x120000000UL;
+}
+
+__EXTERN_INLINE int lca_is_mmio(const volatile void __iomem *addr)
+{
+	return (unsigned long)addr >= LCA_DENSE_MEM;
+}
+
+#undef vip
+#undef vuip
+#undef vulp
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		lca
+#define lca_trivial_rw_bw	2
+#define lca_trivial_rw_lq	1
+#define lca_trivial_io_bw	0
+#define lca_trivial_io_lq	0
+#define lca_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_LCA__H__ */
diff --git a/arch/alpha/include/asm/core_marvel.h b/arch/alpha/include/asm/core_marvel.h
new file mode 100644
index 0000000000..b266e02e28
--- /dev/null
+++ b/arch/alpha/include/asm/core_marvel.h
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Marvel systems use the IO7 I/O chip provides PCI/PCIX/AGP access
+ *
+ * This file is based on:
+ *
+ * Marvel / EV7 System Programmer's Manual
+ * Revision 1.00
+ * 14 May 2001
+ */
+
+#ifndef __ALPHA_MARVEL__H__
+#define __ALPHA_MARVEL__H__
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+
+#include <asm/compiler.h>
+
+#define MARVEL_MAX_PIDS		 32 /* as long as we rely on 43-bit superpage */
+#define MARVEL_IRQ_VEC_PE_SHIFT	(10)
+#define MARVEL_IRQ_VEC_IRQ_MASK	((1 << MARVEL_IRQ_VEC_PE_SHIFT) - 1)
+#define MARVEL_NR_IRQS		\
+	(16 + (MARVEL_MAX_PIDS * (1 << MARVEL_IRQ_VEC_PE_SHIFT)))
+
+/*
+ * EV7 RBOX Registers
+ */
+typedef struct {
+	volatile unsigned long csr __attribute__((aligned(16)));
+} ev7_csr;
+
+typedef struct {
+	ev7_csr	RBOX_CFG;		/* 0x0000 */
+	ev7_csr	RBOX_NSVC;
+	ev7_csr	RBOX_EWVC;
+	ev7_csr	RBOX_WHAMI;
+	ev7_csr	RBOX_TCTL;		/* 0x0040 */
+	ev7_csr	RBOX_INT;
+	ev7_csr	RBOX_IMASK;
+	ev7_csr	RBOX_IREQ;
+	ev7_csr	RBOX_INTQ;		/* 0x0080 */
+	ev7_csr	RBOX_INTA;
+	ev7_csr	RBOX_IT;
+	ev7_csr	RBOX_SCRATCH1;
+	ev7_csr	RBOX_SCRATCH2;		/* 0x00c0 */
+	ev7_csr	RBOX_L_ERR;
+} ev7_csrs;
+
+/*
+ * EV7 CSR addressing macros
+ */
+#define EV7_MASK40(addr)        ((addr) & ((1UL << 41) - 1))
+#define EV7_KERN_ADDR(addr)	((void *)(IDENT_ADDR | EV7_MASK40(addr)))
+
+#define EV7_PE_MASK		0x1ffUL /* 9 bits ( 256 + mem/io ) */
+#define EV7_IPE(pe)		((~((long)(pe)) & EV7_PE_MASK) << 35)
+
+#define EV7_CSR_PHYS(pe, off)	(EV7_IPE(pe) | (0x7FFCUL << 20) | (off))
+#define EV7_CSRS_PHYS(pe)	(EV7_CSR_PHYS(pe, 0UL))
+
+#define EV7_CSR_KERN(pe, off)	(EV7_KERN_ADDR(EV7_CSR_PHYS(pe, off)))
+#define EV7_CSRS_KERN(pe)	(EV7_KERN_ADDR(EV7_CSRS_PHYS(pe)))
+
+#define EV7_CSR_OFFSET(name)	((unsigned long)&((ev7_csrs *)NULL)->name.csr)
+
+/*
+ * IO7 registers
+ */
+typedef struct {
+	volatile unsigned long csr __attribute__((aligned(64)));
+} io7_csr;
+
+typedef struct {
+	/* I/O Port Control Registers */
+	io7_csr	POx_CTRL;	       	/* 0x0000 */
+	io7_csr	POx_CACHE_CTL;
+	io7_csr POx_TIMER;
+	io7_csr POx_IO_ADR_EXT;
+	io7_csr	POx_MEM_ADR_EXT;	/* 0x0100 */
+	io7_csr POx_XCAL_CTRL;
+	io7_csr rsvd1[2];	/* ?? spec doesn't show 0x180 */
+	io7_csr POx_DM_SOURCE;		/* 0x0200 */
+	io7_csr POx_DM_DEST;
+	io7_csr POx_DM_SIZE;
+	io7_csr POx_DM_CTRL;
+	io7_csr rsvd2[4];		/* 0x0300 */
+
+	/* AGP Control Registers -- port 3 only */
+	io7_csr AGP_CAP_ID;		/* 0x0400 */
+	io7_csr AGP_STAT;
+	io7_csr	AGP_CMD;
+	io7_csr	rsvd3;
+
+	/* I/O Port Monitor Registers */
+	io7_csr	POx_MONCTL;		/* 0x0500 */
+	io7_csr POx_CTRA;
+	io7_csr POx_CTRB;
+	io7_csr POx_CTR56;
+	io7_csr POx_SCRATCH;		/* 0x0600 */
+	io7_csr POx_XTRA_A;
+	io7_csr POx_XTRA_TS;
+	io7_csr POx_XTRA_Z;
+	io7_csr rsvd4;			/* 0x0700 */
+	io7_csr POx_THRESHA;
+	io7_csr POx_THRESHB;
+	io7_csr rsvd5[33];
+
+	/* System Address Space Window Control Registers */
+
+	io7_csr POx_WBASE[4];		/* 0x1000 */
+	io7_csr POx_WMASK[4];
+	io7_csr POx_TBASE[4];
+	io7_csr POx_SG_TBIA;
+	io7_csr POx_MSI_WBASE;
+	io7_csr rsvd6[50];
+
+	/* I/O Port Error Registers */
+	io7_csr POx_ERR_SUM;
+	io7_csr POx_FIRST_ERR;
+	io7_csr POx_MSK_HEI;
+	io7_csr POx_TLB_ERR;
+	io7_csr POx_SPL_COMPLT;
+	io7_csr POx_TRANS_SUM;
+	io7_csr POx_FRC_PCI_ERR;
+	io7_csr POx_MULT_ERR;
+	io7_csr rsvd7[8];
+
+	/* I/O Port End of Interrupt Registers */
+	io7_csr EOI_DAT;
+	io7_csr rsvd8[7];
+	io7_csr POx_IACK_SPECIAL;
+	io7_csr rsvd9[103];
+} io7_ioport_csrs;
+
+typedef struct {
+	io7_csr IO_ASIC_REV;		/* 0x30.0000 */
+	io7_csr IO_SYS_REV;
+	io7_csr SER_CHAIN3;
+	io7_csr PO7_RST1;
+	io7_csr PO7_RST2;		/* 0x30.0100 */
+	io7_csr POx_RST[4];
+	io7_csr IO7_DWNH;
+	io7_csr IO7_MAF;
+	io7_csr IO7_MAF_TO;
+	io7_csr IO7_ACC_CLUMP;		/* 0x30.0300 */
+	io7_csr IO7_PMASK;
+	io7_csr IO7_IOMASK;
+	io7_csr IO7_UPH;
+	io7_csr IO7_UPH_TO;		/* 0x30.0400 */
+	io7_csr RBX_IREQ_OFF;
+	io7_csr RBX_INTA_OFF;
+	io7_csr INT_RTY;
+	io7_csr PO7_MONCTL;		/* 0x30.0500 */
+	io7_csr PO7_CTRA;
+	io7_csr PO7_CTRB;
+	io7_csr PO7_CTR56;
+	io7_csr PO7_SCRATCH;		/* 0x30.0600 */
+	io7_csr PO7_XTRA_A;
+	io7_csr PO7_XTRA_TS;
+	io7_csr PO7_XTRA_Z;
+	io7_csr PO7_PMASK;		/* 0x30.0700 */
+	io7_csr PO7_THRESHA;
+	io7_csr PO7_THRESHB;
+	io7_csr rsvd1[97];
+	io7_csr PO7_ERROR_SUM;		/* 0x30.2000 */
+	io7_csr PO7_BHOLE_MASK;
+	io7_csr PO7_HEI_MSK;
+	io7_csr PO7_CRD_MSK;
+	io7_csr PO7_UNCRR_SYM;		/* 0x30.2100 */
+	io7_csr PO7_CRRCT_SYM;
+	io7_csr PO7_ERR_PKT[2];
+	io7_csr PO7_UGBGE_SYM;		/* 0x30.2200 */
+	io7_csr rsbv2[887];
+	io7_csr PO7_LSI_CTL[128];	/* 0x31.0000 */
+	io7_csr rsvd3[123];
+	io7_csr HLT_CTL;		/* 0x31.3ec0 */
+	io7_csr HPI_CTL;		/* 0x31.3f00 */
+	io7_csr CRD_CTL;
+	io7_csr STV_CTL;
+	io7_csr HEI_CTL;
+	io7_csr PO7_MSI_CTL[16];	/* 0x31.4000 */
+	io7_csr rsvd4[240];
+
+	/*
+	 * Interrupt Diagnostic / Test
+	 */
+	struct {
+		io7_csr INT_PND;
+		io7_csr INT_CLR;
+		io7_csr INT_EOI;
+		io7_csr rsvd[29];
+	} INT_DIAG[4];
+	io7_csr rsvd5[125];	    	/* 0x31.a000 */
+	io7_csr MISC_PND;		/* 0x31.b800 */
+	io7_csr rsvd6[31];
+	io7_csr MSI_PND[16];		/* 0x31.c000 */
+	io7_csr rsvd7[16];
+	io7_csr MSI_CLR[16];		/* 0x31.c800 */
+} io7_port7_csrs;
+
+/* 
+ * IO7 DMA Window Base register (POx_WBASEx)
+ */
+#define wbase_m_ena  0x1
+#define wbase_m_sg   0x2
+#define wbase_m_dac  0x4
+#define wbase_m_addr 0xFFF00000
+union IO7_POx_WBASE {
+	struct {
+		unsigned ena : 1;	/* <0>			*/
+		unsigned sg : 1;	/* <1>			*/
+		unsigned dac : 1;	/* <2> -- window 3 only */
+		unsigned rsvd1 : 17; 
+		unsigned addr : 12;	/* <31:20>		*/
+		unsigned rsvd2 : 32;
+	} bits;
+	unsigned as_long[2];
+	unsigned as_quad;
+};
+
+/*
+ * IO7 IID (Interrupt IDentifier) format
+ *
+ * For level-sensative interrupts, int_num is encoded as:
+ *
+ *	bus/port	slot/device	INTx
+ *	<7:5>		<4:2>		<1:0>
+ */
+union IO7_IID {
+	struct {
+		unsigned int_num : 9;		/* <8:0>    	*/
+		unsigned tpu_mask : 4;		/* <12:9> rsvd	*/
+		unsigned msi : 1;		/* 13		*/
+		unsigned ipe : 10;		/* <23:14>	*/
+		unsigned long rsvd : 40;		
+	} bits;
+	unsigned int as_long[2];
+	unsigned long as_quad;
+};
+
+/*
+ * IO7 addressing macros
+ */
+#define IO7_KERN_ADDR(addr)	(EV7_KERN_ADDR(addr))
+
+#define IO7_PORT_MASK	   	0x07UL	/* 3 bits of port 	   */
+
+#define IO7_IPE(pe)		(EV7_IPE(pe))
+#define IO7_IPORT(port)		((~((long)(port)) & IO7_PORT_MASK) << 32)
+
+#define IO7_HOSE(pe, port)	(IO7_IPE(pe) | IO7_IPORT(port))
+
+#define IO7_MEM_PHYS(pe, port)	(IO7_HOSE(pe, port) | 0x00000000UL)
+#define IO7_CONF_PHYS(pe, port)	(IO7_HOSE(pe, port) | 0xFE000000UL)
+#define IO7_IO_PHYS(pe, port)	(IO7_HOSE(pe, port) | 0xFF000000UL)
+#define IO7_CSR_PHYS(pe, port, off) \
+                                (IO7_HOSE(pe, port) | 0xFF800000UL | (off))
+#define IO7_CSRS_PHYS(pe, port)	(IO7_CSR_PHYS(pe, port, 0UL))
+#define IO7_PORT7_CSRS_PHYS(pe) (IO7_CSR_PHYS(pe, 7, 0x300000UL))
+
+#define IO7_MEM_KERN(pe, port)      (IO7_KERN_ADDR(IO7_MEM_PHYS(pe, port)))
+#define IO7_CONF_KERN(pe, port)     (IO7_KERN_ADDR(IO7_CONF_PHYS(pe, port)))
+#define IO7_IO_KERN(pe, port)       (IO7_KERN_ADDR(IO7_IO_PHYS(pe, port)))
+#define IO7_CSR_KERN(pe, port, off) (IO7_KERN_ADDR(IO7_CSR_PHYS(pe,port,off)))
+#define IO7_CSRS_KERN(pe, port)     (IO7_KERN_ADDR(IO7_CSRS_PHYS(pe, port)))
+#define IO7_PORT7_CSRS_KERN(pe)	    (IO7_KERN_ADDR(IO7_PORT7_CSRS_PHYS(pe)))
+
+#define IO7_PLL_RNGA(pll)	(((pll) >> 3) & 0x7)
+#define IO7_PLL_RNGB(pll)	(((pll) >> 6) & 0x7)
+
+#define IO7_MEM_SPACE		(2UL * 1024 * 1024 * 1024)	/* 2GB MEM */
+#define IO7_IO_SPACE		(8UL * 1024 * 1024)		/* 8MB I/O */
+
+ 
+/* 
+ * Offset between ram physical addresses and pci64 DAC addresses
+ */
+#define IO7_DAC_OFFSET		(1UL << 49)
+
+/*
+ * This is needed to satisify the IO() macro used in initializing the machvec
+ */
+#define MARVEL_IACK_SC 							\
+        ((unsigned long)						\
+	 (&(((io7_ioport_csrs *)IO7_CSRS_KERN(0, 0))->POx_IACK_SPECIAL)))
+
+#ifdef __KERNEL__
+
+/*
+ * IO7 structs
+ */
+#define IO7_NUM_PORTS 4
+#define IO7_AGP_PORT  3
+
+struct io7_port {
+	struct io7 *io7;
+	struct pci_controller *hose;
+
+	int enabled;
+	unsigned int port;
+	io7_ioport_csrs *csrs;
+
+	unsigned long saved_wbase[4];
+	unsigned long saved_wmask[4];
+	unsigned long saved_tbase[4];
+};
+
+struct io7 {
+	struct io7 *next;
+
+	unsigned int pe;
+	io7_port7_csrs *csrs;
+	struct io7_port ports[IO7_NUM_PORTS];
+
+	raw_spinlock_t irq_lock;
+};
+
+#ifndef __EXTERN_INLINE
+# define __EXTERN_INLINE extern inline
+# define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions. All access through linear space.
+ */
+
+/*
+ * Memory functions.  All accesses through linear space.
+ */
+
+#define vucp	volatile unsigned char __force *
+#define vusp	volatile unsigned short __force *
+
+extern unsigned int marvel_ioread8(const void __iomem *);
+extern void marvel_iowrite8(u8 b, void __iomem *);
+
+__EXTERN_INLINE unsigned int marvel_ioread16(const void __iomem *addr)
+{
+	return __kernel_ldwu(*(vusp)addr);
+}
+
+__EXTERN_INLINE void marvel_iowrite16(u16 b, void __iomem *addr)
+{
+	__kernel_stw(b, *(vusp)addr);
+}
+
+extern void __iomem *marvel_ioremap(unsigned long addr, unsigned long size);
+extern void marvel_iounmap(volatile void __iomem *addr);
+extern void __iomem *marvel_ioportmap (unsigned long addr);
+
+__EXTERN_INLINE int marvel_is_ioaddr(unsigned long addr)
+{
+	return (addr >> 40) & 1;
+}
+
+extern int marvel_is_mmio(const volatile void __iomem *);
+
+#undef vucp
+#undef vusp
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		marvel
+#define marvel_trivial_rw_bw	1
+#define marvel_trivial_rw_lq	1
+#define marvel_trivial_io_bw	0
+#define marvel_trivial_io_lq	1
+#define marvel_trivial_iounmap	0
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+# undef __EXTERN_INLINE
+# undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_MARVEL__H__ */
diff --git a/arch/alpha/include/asm/core_mcpcia.h b/arch/alpha/include/asm/core_mcpcia.h
new file mode 100644
index 0000000000..cb24d1bd61
--- /dev/null
+++ b/arch/alpha/include/asm/core_mcpcia.h
@@ -0,0 +1,382 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_MCPCIA__H__
+#define __ALPHA_MCPCIA__H__
+
+/* Define to experiment with fitting everything into one 128MB HAE window.
+   One window per bus, that is.  */
+#define MCPCIA_ONE_HAE_WINDOW 1
+
+#include <linux/types.h>
+#include <asm/compiler.h>
+#include <asm/mce.h>
+
+/*
+ * MCPCIA is the internal name for a core logic chipset which provides
+ * PCI access for the RAWHIDE family of systems.
+ *
+ * This file is based on:
+ *
+ * RAWHIDE System Programmer's Manual
+ * 16-May-96
+ * Rev. 1.4
+ *
+ */
+
+/*------------------------------------------------------------------------**
+**                                                                        **
+**  I/O procedures                                                        **
+**                                                                        **
+**      inport[b|w|t|l], outport[b|w|t|l] 8:16:24:32 IO xfers             **
+**	inportbxt: 8 bits only                                            **
+**      inport:    alias of inportw                                       **
+**      outport:   alias of outportw                                      **
+**                                                                        **
+**      inmem[b|w|t|l], outmem[b|w|t|l] 8:16:24:32 ISA memory xfers       **
+**	inmembxt: 8 bits only                                             **
+**      inmem:    alias of inmemw                                         **
+**      outmem:   alias of outmemw                                        **
+**                                                                        **
+**------------------------------------------------------------------------*/
+
+
+/* MCPCIA ADDRESS BIT DEFINITIONS
+ *
+ *  3333 3333 3322 2222 2222 1111 1111 11
+ *  9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
+ *  ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
+ *  1                                             000
+ *  ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
+ *  |                                             |\|
+ *  |                               Byte Enable --+ |
+ *  |                             Transfer Length --+
+ *  +-- IO space, not cached
+ *
+ *   Byte      Transfer
+ *   Enable    Length    Transfer  Byte    Address
+ *   adr<6:5>  adr<4:3>  Length    Enable  Adder
+ *   ---------------------------------------------
+ *      00        00      Byte      1110   0x000
+ *      01        00      Byte      1101   0x020
+ *      10        00      Byte      1011   0x040
+ *      11        00      Byte      0111   0x060
+ *
+ *      00        01      Word      1100   0x008
+ *      01        01      Word      1001   0x028 <= Not supported in this code.
+ *      10        01      Word      0011   0x048
+ *
+ *      00        10      Tribyte   1000   0x010
+ *      01        10      Tribyte   0001   0x030
+ *
+ *      10        11      Longword  0000   0x058
+ *
+ *      Note that byte enables are asserted low.
+ *
+ */
+
+#define MCPCIA_MAX_HOSES 4
+
+#define MCPCIA_MID(m)		((unsigned long)(m) << 33)
+
+/* Dodge has PCI0 and PCI1 at MID 4 and 5 respectively. 
+   Durango adds PCI2 and PCI3 at MID 6 and 7 respectively.  */
+#define MCPCIA_HOSE2MID(h)	((h) + 4)
+
+#define MCPCIA_MEM_MASK 0x07ffffff /* SPARSE Mem region mask is 27 bits */
+
+/*
+ * Memory spaces:
+ */
+#define MCPCIA_SPARSE(m)	(IDENT_ADDR + 0xf000000000UL + MCPCIA_MID(m))
+#define MCPCIA_DENSE(m)		(IDENT_ADDR + 0xf100000000UL + MCPCIA_MID(m))
+#define MCPCIA_IO(m)		(IDENT_ADDR + 0xf180000000UL + MCPCIA_MID(m))
+#define MCPCIA_CONF(m)		(IDENT_ADDR + 0xf1c0000000UL + MCPCIA_MID(m))
+#define MCPCIA_CSR(m)		(IDENT_ADDR + 0xf1e0000000UL + MCPCIA_MID(m))
+#define MCPCIA_IO_IACK(m)	(IDENT_ADDR + 0xf1f0000000UL + MCPCIA_MID(m))
+#define MCPCIA_DENSE_IO(m)	(IDENT_ADDR + 0xe1fc000000UL + MCPCIA_MID(m))
+#define MCPCIA_DENSE_CONF(m)	(IDENT_ADDR + 0xe1fe000000UL + MCPCIA_MID(m))
+
+/*
+ *  General Registers
+ */
+#define MCPCIA_REV(m)		(MCPCIA_CSR(m) + 0x000)
+#define MCPCIA_WHOAMI(m)	(MCPCIA_CSR(m) + 0x040)
+#define MCPCIA_PCI_LAT(m)	(MCPCIA_CSR(m) + 0x080)
+#define MCPCIA_CAP_CTRL(m)	(MCPCIA_CSR(m) + 0x100)
+#define MCPCIA_HAE_MEM(m)	(MCPCIA_CSR(m) + 0x400)
+#define MCPCIA_HAE_IO(m)	(MCPCIA_CSR(m) + 0x440)
+#define _MCPCIA_IACK_SC(m)	(MCPCIA_CSR(m) + 0x480)
+#define MCPCIA_HAE_DENSE(m)	(MCPCIA_CSR(m) + 0x4C0)
+
+/*
+ * Interrupt Control registers
+ */
+#define MCPCIA_INT_CTL(m)	(MCPCIA_CSR(m) + 0x500)
+#define MCPCIA_INT_REQ(m)	(MCPCIA_CSR(m) + 0x540)
+#define MCPCIA_INT_TARG(m)	(MCPCIA_CSR(m) + 0x580)
+#define MCPCIA_INT_ADR(m)	(MCPCIA_CSR(m) + 0x5C0)
+#define MCPCIA_INT_ADR_EXT(m)	(MCPCIA_CSR(m) + 0x600)
+#define MCPCIA_INT_MASK0(m)	(MCPCIA_CSR(m) + 0x640)
+#define MCPCIA_INT_MASK1(m)	(MCPCIA_CSR(m) + 0x680)
+#define MCPCIA_INT_ACK0(m)	(MCPCIA_CSR(m) + 0x10003f00)
+#define MCPCIA_INT_ACK1(m)	(MCPCIA_CSR(m) + 0x10003f40)
+
+/*
+ * Performance Monitor registers
+ */
+#define MCPCIA_PERF_MON(m)	(MCPCIA_CSR(m) + 0x300)
+#define MCPCIA_PERF_CONT(m)	(MCPCIA_CSR(m) + 0x340)
+
+/*
+ * Diagnostic Registers
+ */
+#define MCPCIA_CAP_DIAG(m)	(MCPCIA_CSR(m) + 0x700)
+#define MCPCIA_TOP_OF_MEM(m)	(MCPCIA_CSR(m) + 0x7C0)
+
+/*
+ * Error registers
+ */
+#define MCPCIA_MC_ERR0(m)	(MCPCIA_CSR(m) + 0x800)
+#define MCPCIA_MC_ERR1(m)	(MCPCIA_CSR(m) + 0x840)
+#define MCPCIA_CAP_ERR(m)	(MCPCIA_CSR(m) + 0x880)
+#define MCPCIA_PCI_ERR1(m)	(MCPCIA_CSR(m) + 0x1040)
+#define MCPCIA_MDPA_STAT(m)	(MCPCIA_CSR(m) + 0x4000)
+#define MCPCIA_MDPA_SYN(m)	(MCPCIA_CSR(m) + 0x4040)
+#define MCPCIA_MDPA_DIAG(m)	(MCPCIA_CSR(m) + 0x4080)
+#define MCPCIA_MDPB_STAT(m)	(MCPCIA_CSR(m) + 0x8000)
+#define MCPCIA_MDPB_SYN(m)	(MCPCIA_CSR(m) + 0x8040)
+#define MCPCIA_MDPB_DIAG(m)	(MCPCIA_CSR(m) + 0x8080)
+
+/*
+ * PCI Address Translation Registers.
+ */
+#define MCPCIA_SG_TBIA(m)	(MCPCIA_CSR(m) + 0x1300)
+#define MCPCIA_HBASE(m)		(MCPCIA_CSR(m) + 0x1340)
+
+#define MCPCIA_W0_BASE(m)	(MCPCIA_CSR(m) + 0x1400)
+#define MCPCIA_W0_MASK(m)	(MCPCIA_CSR(m) + 0x1440)
+#define MCPCIA_T0_BASE(m)	(MCPCIA_CSR(m) + 0x1480)
+
+#define MCPCIA_W1_BASE(m)	(MCPCIA_CSR(m) + 0x1500)
+#define MCPCIA_W1_MASK(m)	(MCPCIA_CSR(m) + 0x1540)
+#define MCPCIA_T1_BASE(m)	(MCPCIA_CSR(m) + 0x1580)
+
+#define MCPCIA_W2_BASE(m)	(MCPCIA_CSR(m) + 0x1600)
+#define MCPCIA_W2_MASK(m)	(MCPCIA_CSR(m) + 0x1640)
+#define MCPCIA_T2_BASE(m)	(MCPCIA_CSR(m) + 0x1680)
+
+#define MCPCIA_W3_BASE(m)	(MCPCIA_CSR(m) + 0x1700)
+#define MCPCIA_W3_MASK(m)	(MCPCIA_CSR(m) + 0x1740)
+#define MCPCIA_T3_BASE(m)	(MCPCIA_CSR(m) + 0x1780)
+
+/* Hack!  Only words for bus 0.  */
+
+#ifndef MCPCIA_ONE_HAE_WINDOW
+#define MCPCIA_HAE_ADDRESS	MCPCIA_HAE_MEM(4)
+#endif
+#define MCPCIA_IACK_SC		_MCPCIA_IACK_SC(4)
+
+/* 
+ * The canonical non-remaped I/O and MEM addresses have these values
+ * subtracted out.  This is arranged so that folks manipulating ISA
+ * devices can use their familiar numbers and have them map to bus 0.
+ */
+
+#define MCPCIA_IO_BIAS		MCPCIA_IO(4)
+#define MCPCIA_MEM_BIAS		MCPCIA_DENSE(4)
+
+/* Offset between ram physical addresses and pci64 DAC bus addresses.  */
+#define MCPCIA_DAC_OFFSET	(1UL << 40)
+
+/*
+ * Data structure for handling MCPCIA machine checks:
+ */
+struct el_MCPCIA_uncorrected_frame_mcheck {
+	struct el_common header;
+	struct el_common_EV5_uncorrectable_mcheck procdata;
+};
+
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * MCPCIA, the RAWHIDE family PCI/memory support chipset for the EV5 (21164)
+ * and EV56 (21164a) processors, can use either a sparse address mapping
+ * scheme, or the so-called byte-word PCI address space, to get at PCI memory
+ * and I/O.
+ *
+ * Unfortunately, we can't use BWIO with EV5, so for now, we always use SPARSE.
+ */
+
+/*
+ * Memory functions.  64-bit and 32-bit accesses are done through
+ * dense memory space, everything else through sparse space.
+ *
+ * For reading and writing 8 and 16 bit quantities we need to
+ * go through one of the three sparse address mapping regions
+ * and use the HAE_MEM CSR to provide some bits of the address.
+ * The following few routines use only sparse address region 1
+ * which gives 1Gbyte of accessible space which relates exactly
+ * to the amount of PCI memory mapping *into* system address space.
+ * See p 6-17 of the specification but it looks something like this:
+ *
+ * 21164 Address:
+ *
+ *          3         2         1
+ * 9876543210987654321098765432109876543210
+ * 1ZZZZ0.PCI.QW.Address............BBLL
+ *
+ * ZZ = SBZ
+ * BB = Byte offset
+ * LL = Transfer length
+ *
+ * PCI Address:
+ *
+ * 3         2         1
+ * 10987654321098765432109876543210
+ * HHH....PCI.QW.Address........ 00
+ *
+ * HHH = 31:29 HAE_MEM CSR
+ *
+ */
+
+#define vip	volatile int __force *
+#define vuip	volatile unsigned int __force *
+
+#ifndef MCPCIA_ONE_HAE_WINDOW
+#define MCPCIA_FROB_MMIO						\
+	if (__mcpcia_is_mmio(hose)) {					\
+		set_hae(hose & 0xffffffff);				\
+		hose = hose - MCPCIA_DENSE(4) + MCPCIA_SPARSE(4);	\
+	}
+#else
+#define MCPCIA_FROB_MMIO						\
+	if (__mcpcia_is_mmio(hose)) {					\
+		hose = hose - MCPCIA_DENSE(4) + MCPCIA_SPARSE(4);	\
+	}
+#endif
+
+extern inline int __mcpcia_is_mmio(unsigned long addr)
+{
+	return (addr & 0x80000000UL) == 0;
+}
+
+__EXTERN_INLINE unsigned int mcpcia_ioread8(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK;
+	unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK;
+	unsigned long result;
+
+	MCPCIA_FROB_MMIO;
+
+	result = *(vip) ((addr << 5) + hose + 0x00);
+	return __kernel_extbl(result, addr & 3);
+}
+
+__EXTERN_INLINE void mcpcia_iowrite8(u8 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK;
+	unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK;
+	unsigned long w;
+
+	MCPCIA_FROB_MMIO;
+
+	w = __kernel_insbl(b, addr & 3);
+	*(vuip) ((addr << 5) + hose + 0x00) = w;
+}
+
+__EXTERN_INLINE unsigned int mcpcia_ioread16(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK;
+	unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK;
+	unsigned long result;
+
+	MCPCIA_FROB_MMIO;
+
+	result = *(vip) ((addr << 5) + hose + 0x08);
+	return __kernel_extwl(result, addr & 3);
+}
+
+__EXTERN_INLINE void mcpcia_iowrite16(u16 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr & MCPCIA_MEM_MASK;
+	unsigned long hose = (unsigned long)xaddr & ~MCPCIA_MEM_MASK;
+	unsigned long w;
+
+	MCPCIA_FROB_MMIO;
+
+	w = __kernel_inswl(b, addr & 3);
+	*(vuip) ((addr << 5) + hose + 0x08) = w;
+}
+
+__EXTERN_INLINE unsigned int mcpcia_ioread32(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr;
+
+	if (!__mcpcia_is_mmio(addr))
+		addr = ((addr & 0xffff) << 5) + (addr & ~0xfffful) + 0x18;
+
+	return *(vuip)addr;
+}
+
+__EXTERN_INLINE void mcpcia_iowrite32(u32 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr;
+
+	if (!__mcpcia_is_mmio(addr))
+		addr = ((addr & 0xffff) << 5) + (addr & ~0xfffful) + 0x18;
+
+	*(vuip)addr = b;
+}
+
+
+__EXTERN_INLINE void __iomem *mcpcia_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + MCPCIA_IO_BIAS);
+}
+
+__EXTERN_INLINE void __iomem *mcpcia_ioremap(unsigned long addr,
+					     unsigned long size)
+{
+	return (void __iomem *)(addr + MCPCIA_MEM_BIAS);
+}
+
+__EXTERN_INLINE int mcpcia_is_ioaddr(unsigned long addr)
+{
+	return addr >= MCPCIA_SPARSE(0);
+}
+
+__EXTERN_INLINE int mcpcia_is_mmio(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	return __mcpcia_is_mmio(addr);
+}
+
+#undef MCPCIA_FROB_MMIO
+
+#undef vip
+#undef vuip
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		mcpcia
+#define mcpcia_trivial_rw_bw	2
+#define mcpcia_trivial_rw_lq	1
+#define mcpcia_trivial_io_bw	0
+#define mcpcia_trivial_io_lq	0
+#define mcpcia_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_MCPCIA__H__ */
diff --git a/arch/alpha/include/asm/core_polaris.h b/arch/alpha/include/asm/core_polaris.h
new file mode 100644
index 0000000000..1c56dea647
--- /dev/null
+++ b/arch/alpha/include/asm/core_polaris.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_POLARIS__H__
+#define __ALPHA_POLARIS__H__
+
+#include <linux/types.h>
+#include <asm/compiler.h>
+
+/*
+ * POLARIS is the internal name for a core logic chipset which provides
+ * memory controller and PCI access for the 21164PC chip based systems.
+ *
+ * This file is based on:
+ *
+ * Polaris System Controller
+ * Device Functional Specification
+ * 22-Jan-98
+ * Rev. 4.2
+ *
+ */
+
+/* Polaris memory regions */
+#define POLARIS_SPARSE_MEM_BASE		(IDENT_ADDR + 0xf800000000UL)
+#define POLARIS_DENSE_MEM_BASE		(IDENT_ADDR + 0xf900000000UL)
+#define POLARIS_SPARSE_IO_BASE		(IDENT_ADDR + 0xf980000000UL)
+#define POLARIS_SPARSE_CONFIG_BASE	(IDENT_ADDR + 0xf9c0000000UL)
+#define POLARIS_IACK_BASE		(IDENT_ADDR + 0xf9f8000000UL)
+#define POLARIS_DENSE_IO_BASE		(IDENT_ADDR + 0xf9fc000000UL)
+#define POLARIS_DENSE_CONFIG_BASE	(IDENT_ADDR + 0xf9fe000000UL)
+
+#define POLARIS_IACK_SC			POLARIS_IACK_BASE
+
+/* The Polaris command/status registers live in PCI Config space for
+ * bus 0/device 0.  As such, they may be bytes, words, or doublewords.
+ */
+#define POLARIS_W_VENID		(POLARIS_DENSE_CONFIG_BASE)
+#define POLARIS_W_DEVID		(POLARIS_DENSE_CONFIG_BASE+2)
+#define POLARIS_W_CMD		(POLARIS_DENSE_CONFIG_BASE+4)
+#define POLARIS_W_STATUS	(POLARIS_DENSE_CONFIG_BASE+6)
+
+/*
+ * Data structure for handling POLARIS machine checks:
+ */
+struct el_POLARIS_sysdata_mcheck {
+    u_long      psc_status;
+    u_long	psc_pcictl0;
+    u_long	psc_pcictl1;
+    u_long	psc_pcictl2;
+};
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * POLARIS, the PCI/memory support chipset for the PCA56 (21164PC)
+ * processors, can use either a sparse address  mapping scheme, or the 
+ * so-called byte-word PCI address space, to get at PCI memory and I/O.
+ *
+ * However, we will support only the BWX form.
+ */
+
+/*
+ * Memory functions.  Polaris allows all accesses (byte/word
+ * as well as long/quad) to be done through dense space.
+ *
+ * We will only support DENSE access via BWX insns.
+ */
+
+__EXTERN_INLINE void __iomem *polaris_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + POLARIS_DENSE_IO_BASE);
+}
+
+__EXTERN_INLINE void __iomem *polaris_ioremap(unsigned long addr,
+					      unsigned long size)
+{
+	return (void __iomem *)(addr + POLARIS_DENSE_MEM_BASE);
+}
+
+__EXTERN_INLINE int polaris_is_ioaddr(unsigned long addr)
+{
+	return addr >= POLARIS_SPARSE_MEM_BASE;
+}
+
+__EXTERN_INLINE int polaris_is_mmio(const volatile void __iomem *addr)
+{
+	return (unsigned long)addr < POLARIS_SPARSE_IO_BASE;
+}
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		polaris
+#define polaris_trivial_rw_bw	1
+#define polaris_trivial_rw_lq	1
+#define polaris_trivial_io_bw	1
+#define polaris_trivial_io_lq	1
+#define polaris_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_POLARIS__H__ */
diff --git a/arch/alpha/include/asm/core_t2.h b/arch/alpha/include/asm/core_t2.h
new file mode 100644
index 0000000000..12bb7addc7
--- /dev/null
+++ b/arch/alpha/include/asm/core_t2.h
@@ -0,0 +1,615 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_T2__H__
+#define __ALPHA_T2__H__
+
+/* Fit everything into one 128MB HAE window. */
+#define T2_ONE_HAE_WINDOW 1
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/compiler.h>
+
+/*
+ * T2 is the internal name for the core logic chipset which provides
+ * memory controller and PCI access for the SABLE-based systems.
+ *
+ * This file is based on:
+ *
+ * SABLE I/O Specification
+ * Revision/Update Information: 1.3
+ *
+ * jestabro@amt.tay1.dec.com Initial Version.
+ *
+ */
+
+#define T2_MEM_R1_MASK 0x07ffffff  /* Mem sparse region 1 mask is 27 bits */
+
+/* GAMMA-SABLE is a SABLE with EV5-based CPUs */
+/* All LYNX machines, EV4 or EV5, use the GAMMA bias also */
+#define _GAMMA_BIAS		0x8000000000UL
+
+#if defined(CONFIG_ALPHA_GENERIC)
+#define GAMMA_BIAS		alpha_mv.sys.t2.gamma_bias
+#elif defined(CONFIG_ALPHA_GAMMA)
+#define GAMMA_BIAS		_GAMMA_BIAS
+#else
+#define GAMMA_BIAS		0
+#endif
+
+/*
+ * Memory spaces:
+ */
+#define T2_CONF		        (IDENT_ADDR + GAMMA_BIAS + 0x390000000UL)
+#define T2_IO			(IDENT_ADDR + GAMMA_BIAS + 0x3a0000000UL)
+#define T2_SPARSE_MEM		(IDENT_ADDR + GAMMA_BIAS + 0x200000000UL)
+#define T2_DENSE_MEM	        (IDENT_ADDR + GAMMA_BIAS + 0x3c0000000UL)
+
+#define T2_IOCSR		(IDENT_ADDR + GAMMA_BIAS + 0x38e000000UL)
+#define T2_CERR1		(IDENT_ADDR + GAMMA_BIAS + 0x38e000020UL)
+#define T2_CERR2		(IDENT_ADDR + GAMMA_BIAS + 0x38e000040UL)
+#define T2_CERR3		(IDENT_ADDR + GAMMA_BIAS + 0x38e000060UL)
+#define T2_PERR1		(IDENT_ADDR + GAMMA_BIAS + 0x38e000080UL)
+#define T2_PERR2		(IDENT_ADDR + GAMMA_BIAS + 0x38e0000a0UL)
+#define T2_PSCR			(IDENT_ADDR + GAMMA_BIAS + 0x38e0000c0UL)
+#define T2_HAE_1		(IDENT_ADDR + GAMMA_BIAS + 0x38e0000e0UL)
+#define T2_HAE_2		(IDENT_ADDR + GAMMA_BIAS + 0x38e000100UL)
+#define T2_HBASE		(IDENT_ADDR + GAMMA_BIAS + 0x38e000120UL)
+#define T2_WBASE1		(IDENT_ADDR + GAMMA_BIAS + 0x38e000140UL)
+#define T2_WMASK1		(IDENT_ADDR + GAMMA_BIAS + 0x38e000160UL)
+#define T2_TBASE1		(IDENT_ADDR + GAMMA_BIAS + 0x38e000180UL)
+#define T2_WBASE2		(IDENT_ADDR + GAMMA_BIAS + 0x38e0001a0UL)
+#define T2_WMASK2		(IDENT_ADDR + GAMMA_BIAS + 0x38e0001c0UL)
+#define T2_TBASE2		(IDENT_ADDR + GAMMA_BIAS + 0x38e0001e0UL)
+#define T2_TLBBR		(IDENT_ADDR + GAMMA_BIAS + 0x38e000200UL)
+#define T2_IVR			(IDENT_ADDR + GAMMA_BIAS + 0x38e000220UL)
+#define T2_HAE_3		(IDENT_ADDR + GAMMA_BIAS + 0x38e000240UL)
+#define T2_HAE_4		(IDENT_ADDR + GAMMA_BIAS + 0x38e000260UL)
+
+/* The CSRs below are T3/T4 only */
+#define T2_WBASE3		(IDENT_ADDR + GAMMA_BIAS + 0x38e000280UL)
+#define T2_WMASK3		(IDENT_ADDR + GAMMA_BIAS + 0x38e0002a0UL)
+#define T2_TBASE3		(IDENT_ADDR + GAMMA_BIAS + 0x38e0002c0UL)
+
+#define T2_TDR0			(IDENT_ADDR + GAMMA_BIAS + 0x38e000300UL)
+#define T2_TDR1			(IDENT_ADDR + GAMMA_BIAS + 0x38e000320UL)
+#define T2_TDR2			(IDENT_ADDR + GAMMA_BIAS + 0x38e000340UL)
+#define T2_TDR3			(IDENT_ADDR + GAMMA_BIAS + 0x38e000360UL)
+#define T2_TDR4			(IDENT_ADDR + GAMMA_BIAS + 0x38e000380UL)
+#define T2_TDR5			(IDENT_ADDR + GAMMA_BIAS + 0x38e0003a0UL)
+#define T2_TDR6			(IDENT_ADDR + GAMMA_BIAS + 0x38e0003c0UL)
+#define T2_TDR7			(IDENT_ADDR + GAMMA_BIAS + 0x38e0003e0UL)
+
+#define T2_WBASE4		(IDENT_ADDR + GAMMA_BIAS + 0x38e000400UL)
+#define T2_WMASK4		(IDENT_ADDR + GAMMA_BIAS + 0x38e000420UL)
+#define T2_TBASE4		(IDENT_ADDR + GAMMA_BIAS + 0x38e000440UL)
+
+#define T2_AIR			(IDENT_ADDR + GAMMA_BIAS + 0x38e000460UL)
+#define T2_VAR			(IDENT_ADDR + GAMMA_BIAS + 0x38e000480UL)
+#define T2_DIR			(IDENT_ADDR + GAMMA_BIAS + 0x38e0004a0UL)
+#define T2_ICE			(IDENT_ADDR + GAMMA_BIAS + 0x38e0004c0UL)
+
+#ifndef T2_ONE_HAE_WINDOW
+#define T2_HAE_ADDRESS		T2_HAE_1
+#endif
+
+/*  T2 CSRs are in the non-cachable primary IO space from 3.8000.0000 to
+ 3.8fff.ffff
+ *
+ *  +--------------+ 3 8000 0000
+ *  | CPU 0 CSRs   |
+ *  +--------------+ 3 8100 0000
+ *  | CPU 1 CSRs   |
+ *  +--------------+ 3 8200 0000
+ *  | CPU 2 CSRs   |
+ *  +--------------+ 3 8300 0000
+ *  | CPU 3 CSRs   |
+ *  +--------------+ 3 8400 0000
+ *  | CPU Reserved |
+ *  +--------------+ 3 8700 0000
+ *  | Mem Reserved |
+ *  +--------------+ 3 8800 0000
+ *  | Mem 0 CSRs   |
+ *  +--------------+ 3 8900 0000
+ *  | Mem 1 CSRs   |
+ *  +--------------+ 3 8a00 0000
+ *  | Mem 2 CSRs   |
+ *  +--------------+ 3 8b00 0000
+ *  | Mem 3 CSRs   |
+ *  +--------------+ 3 8c00 0000
+ *  | Mem Reserved |
+ *  +--------------+ 3 8e00 0000
+ *  | PCI Bridge   |
+ *  +--------------+ 3 8f00 0000
+ *  | Expansion IO |
+ *  +--------------+ 3 9000 0000
+ *
+ *
+ */
+#define T2_CPU0_BASE            (IDENT_ADDR + GAMMA_BIAS + 0x380000000L)
+#define T2_CPU1_BASE            (IDENT_ADDR + GAMMA_BIAS + 0x381000000L)
+#define T2_CPU2_BASE            (IDENT_ADDR + GAMMA_BIAS + 0x382000000L)
+#define T2_CPU3_BASE            (IDENT_ADDR + GAMMA_BIAS + 0x383000000L)
+
+#define T2_CPUn_BASE(n)		(T2_CPU0_BASE + (((n)&3) * 0x001000000L))
+
+#define T2_MEM0_BASE            (IDENT_ADDR + GAMMA_BIAS + 0x388000000L)
+#define T2_MEM1_BASE            (IDENT_ADDR + GAMMA_BIAS + 0x389000000L)
+#define T2_MEM2_BASE            (IDENT_ADDR + GAMMA_BIAS + 0x38a000000L)
+#define T2_MEM3_BASE            (IDENT_ADDR + GAMMA_BIAS + 0x38b000000L)
+
+
+/*
+ * Sable CPU Module CSRS
+ *
+ * These are CSRs for hardware other than the CPU chip on the CPU module.
+ * The CPU module has Backup Cache control logic, Cbus control logic, and
+ * interrupt control logic on it.  There is a duplicate tag store to speed
+ * up maintaining cache coherency.
+ */
+
+struct sable_cpu_csr {
+  unsigned long bcc;     long fill_00[3]; /* Backup Cache Control */
+  unsigned long bcce;    long fill_01[3]; /* Backup Cache Correctable Error */
+  unsigned long bccea;   long fill_02[3]; /* B-Cache Corr Err Address Latch */
+  unsigned long bcue;    long fill_03[3]; /* B-Cache Uncorrectable Error */
+  unsigned long bcuea;   long fill_04[3]; /* B-Cache Uncorr Err Addr Latch */
+  unsigned long dter;    long fill_05[3]; /* Duplicate Tag Error */
+  unsigned long cbctl;   long fill_06[3]; /* CBus Control */
+  unsigned long cbe;     long fill_07[3]; /* CBus Error */
+  unsigned long cbeal;   long fill_08[3]; /* CBus Error Addr Latch low */
+  unsigned long cbeah;   long fill_09[3]; /* CBus Error Addr Latch high */
+  unsigned long pmbx;    long fill_10[3]; /* Processor Mailbox */
+  unsigned long ipir;    long fill_11[3]; /* Inter-Processor Int Request */
+  unsigned long sic;     long fill_12[3]; /* System Interrupt Clear */
+  unsigned long adlk;    long fill_13[3]; /* Address Lock (LDxL/STxC) */
+  unsigned long madrl;   long fill_14[3]; /* CBus Miss Address */
+  unsigned long rev;     long fill_15[3]; /* CMIC Revision */
+};
+
+/*
+ * Data structure for handling T2 machine checks:
+ */
+struct el_t2_frame_header {
+	unsigned int	elcf_fid;	/* Frame ID (from above) */
+	unsigned int	elcf_size;	/* Size of frame in bytes */
+};
+
+struct el_t2_procdata_mcheck {
+	unsigned long	elfmc_paltemp[32];	/* PAL TEMP REGS. */
+	/* EV4-specific fields */
+	unsigned long	elfmc_exc_addr;	/* Addr of excepting insn. */
+	unsigned long	elfmc_exc_sum;	/* Summary of arith traps. */
+	unsigned long	elfmc_exc_mask;	/* Exception mask (from exc_sum). */
+	unsigned long	elfmc_iccsr;	/* IBox hardware enables. */
+	unsigned long	elfmc_pal_base;	/* Base address for PALcode. */
+	unsigned long	elfmc_hier;	/* Hardware Interrupt Enable. */
+	unsigned long	elfmc_hirr;	/* Hardware Interrupt Request. */
+	unsigned long	elfmc_mm_csr;	/* D-stream fault info. */
+	unsigned long	elfmc_dc_stat;	/* D-cache status (ECC/Parity Err). */
+	unsigned long	elfmc_dc_addr;	/* EV3 Phys Addr for ECC/DPERR. */
+	unsigned long	elfmc_abox_ctl;	/* ABox Control Register. */
+	unsigned long	elfmc_biu_stat;	/* BIU Status. */
+	unsigned long	elfmc_biu_addr;	/* BUI Address. */
+	unsigned long	elfmc_biu_ctl;	/* BIU Control. */
+	unsigned long	elfmc_fill_syndrome; /* For correcting ECC errors. */
+	unsigned long	elfmc_fill_addr;/* Cache block which was being read. */
+	unsigned long	elfmc_va;	/* Effective VA of fault or miss. */
+	unsigned long	elfmc_bc_tag;	/* Backup Cache Tag Probe Results. */
+};
+
+/*
+ * Sable processor specific Machine Check Data segment.
+ */
+
+struct el_t2_logout_header {
+	unsigned int	elfl_size;	/* size in bytes of logout area. */
+	unsigned int	elfl_sbz1:31;	/* Should be zero. */
+	unsigned int	elfl_retry:1;	/* Retry flag. */
+	unsigned int	elfl_procoffset; /* Processor-specific offset. */
+	unsigned int	elfl_sysoffset;	 /* Offset of system-specific. */
+	unsigned int	elfl_error_type;	/* PAL error type code. */
+	unsigned int	elfl_frame_rev;		/* PAL Frame revision. */
+};
+struct el_t2_sysdata_mcheck {
+	unsigned long    elcmc_bcc;	      /* CSR 0 */
+	unsigned long    elcmc_bcce;	      /* CSR 1 */
+	unsigned long    elcmc_bccea;      /* CSR 2 */
+	unsigned long    elcmc_bcue;	      /* CSR 3 */
+	unsigned long    elcmc_bcuea;      /* CSR 4 */
+	unsigned long    elcmc_dter;	      /* CSR 5 */
+	unsigned long    elcmc_cbctl;      /* CSR 6 */
+	unsigned long    elcmc_cbe;	      /* CSR 7 */
+	unsigned long    elcmc_cbeal;      /* CSR 8 */
+	unsigned long    elcmc_cbeah;      /* CSR 9 */
+	unsigned long    elcmc_pmbx;	      /* CSR 10 */
+	unsigned long    elcmc_ipir;	      /* CSR 11 */
+	unsigned long    elcmc_sic;	      /* CSR 12 */
+	unsigned long    elcmc_adlk;	      /* CSR 13 */
+	unsigned long    elcmc_madrl;      /* CSR 14 */
+	unsigned long    elcmc_crrev4;     /* CSR 15 */
+};
+
+/*
+ * Sable memory error frame - sable pfms section 3.42
+ */
+struct el_t2_data_memory {
+	struct	el_t2_frame_header elcm_hdr;	/* ID$MEM-FERR = 0x08 */
+	unsigned int  elcm_module;	/* Module id. */
+	unsigned int  elcm_res04;	/* Reserved. */
+	unsigned long elcm_merr;	/* CSR0: Error Reg 1. */
+	unsigned long elcm_mcmd1;	/* CSR1: Command Trap 1. */
+	unsigned long elcm_mcmd2;	/* CSR2: Command Trap 2. */
+	unsigned long elcm_mconf;	/* CSR3: Configuration. */
+	unsigned long elcm_medc1;	/* CSR4: EDC Status 1. */
+	unsigned long elcm_medc2;	/* CSR5: EDC Status 2. */
+	unsigned long elcm_medcc;	/* CSR6: EDC Control. */
+	unsigned long elcm_msctl;	/* CSR7: Stream Buffer Control. */
+	unsigned long elcm_mref;	/* CSR8: Refresh Control. */
+	unsigned long elcm_filter;	/* CSR9: CRD Filter Control. */
+};
+
+
+/*
+ * Sable other CPU error frame - sable pfms section 3.43
+ */
+struct el_t2_data_other_cpu {
+	short	      elco_cpuid;	/* CPU ID */
+	short	      elco_res02[3];
+	unsigned long elco_bcc;	/* CSR 0 */
+	unsigned long elco_bcce;	/* CSR 1 */
+	unsigned long elco_bccea;	/* CSR 2 */
+	unsigned long elco_bcue;	/* CSR 3 */
+	unsigned long elco_bcuea;	/* CSR 4 */
+	unsigned long elco_dter;	/* CSR 5 */
+	unsigned long elco_cbctl;	/* CSR 6 */
+	unsigned long elco_cbe;	/* CSR 7 */
+	unsigned long elco_cbeal;	/* CSR 8 */
+	unsigned long elco_cbeah;	/* CSR 9 */
+	unsigned long elco_pmbx;	/* CSR 10 */
+	unsigned long elco_ipir;	/* CSR 11 */
+	unsigned long elco_sic;	/* CSR 12 */
+	unsigned long elco_adlk;	/* CSR 13 */
+	unsigned long elco_madrl;	/* CSR 14 */
+	unsigned long elco_crrev4;	/* CSR 15 */
+};
+
+/*
+ * Sable other CPU error frame - sable pfms section 3.44
+ */
+struct el_t2_data_t2{
+	struct el_t2_frame_header elct_hdr;	/* ID$T2-FRAME */
+	unsigned long elct_iocsr;	/* IO Control and Status Register */
+	unsigned long elct_cerr1;	/* Cbus Error Register 1 */
+	unsigned long elct_cerr2;	/* Cbus Error Register 2 */
+	unsigned long elct_cerr3;	/* Cbus Error Register 3 */
+	unsigned long elct_perr1;	/* PCI Error Register 1 */
+	unsigned long elct_perr2;	/* PCI Error Register 2 */
+	unsigned long elct_hae0_1;	/* High Address Extension Register 1 */
+	unsigned long elct_hae0_2;	/* High Address Extension Register 2 */
+	unsigned long elct_hbase;	/* High Base Register */
+	unsigned long elct_wbase1;	/* Window Base Register 1 */
+	unsigned long elct_wmask1;	/* Window Mask Register 1 */
+	unsigned long elct_tbase1;	/* Translated Base Register 1 */
+	unsigned long elct_wbase2;	/* Window Base Register 2 */
+	unsigned long elct_wmask2;	/* Window Mask Register 2 */
+	unsigned long elct_tbase2;	/* Translated Base Register 2 */
+	unsigned long elct_tdr0;	/* TLB Data Register 0 */
+	unsigned long elct_tdr1;	/* TLB Data Register 1 */
+	unsigned long elct_tdr2;	/* TLB Data Register 2 */
+	unsigned long elct_tdr3;	/* TLB Data Register 3 */
+	unsigned long elct_tdr4;	/* TLB Data Register 4 */
+	unsigned long elct_tdr5;	/* TLB Data Register 5 */
+	unsigned long elct_tdr6;	/* TLB Data Register 6 */
+	unsigned long elct_tdr7;	/* TLB Data Register 7 */
+};
+
+/*
+ * Sable error log data structure - sable pfms section 3.40
+ */
+struct el_t2_data_corrected {
+	unsigned long elcpb_biu_stat;
+	unsigned long elcpb_biu_addr;
+	unsigned long elcpb_biu_ctl;
+	unsigned long elcpb_fill_syndrome;
+	unsigned long elcpb_fill_addr;
+	unsigned long elcpb_bc_tag;
+};
+
+/*
+ * Sable error log data structure
+ * Note there are 4 memory slots on sable (see t2.h)
+ */
+struct el_t2_frame_mcheck {
+	struct el_t2_frame_header elfmc_header;	/* ID$P-FRAME_MCHECK */
+	struct el_t2_logout_header elfmc_hdr;
+	struct el_t2_procdata_mcheck elfmc_procdata;
+	struct el_t2_sysdata_mcheck elfmc_sysdata;
+	struct el_t2_data_t2 elfmc_t2data;
+	struct el_t2_data_memory elfmc_memdata[4];
+	struct el_t2_frame_header elfmc_footer;	/* empty */
+};
+
+
+/*
+ * Sable error log data structures on memory errors
+ */
+struct el_t2_frame_corrected {
+	struct el_t2_frame_header elfcc_header;	/* ID$P-BC-COR */
+	struct el_t2_logout_header elfcc_hdr;
+	struct el_t2_data_corrected elfcc_procdata;
+/*	struct el_t2_data_t2 elfcc_t2data;		*/
+/*	struct el_t2_data_memory elfcc_memdata[4];	*/
+	struct el_t2_frame_header elfcc_footer;	/* empty */
+};
+
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * T2 (the core logic PCI/memory support chipset for the SABLE
+ * series of processors uses a sparse address mapping scheme to
+ * get at PCI memory and I/O.
+ */
+
+#define vip	volatile int *
+#define vuip	volatile unsigned int *
+
+extern inline u8 t2_inb(unsigned long addr)
+{
+	long result = *(vip) ((addr << 5) + T2_IO + 0x00);
+	return __kernel_extbl(result, addr & 3);
+}
+
+extern inline void t2_outb(u8 b, unsigned long addr)
+{
+	unsigned long w;
+
+	w = __kernel_insbl(b, addr & 3);
+	*(vuip) ((addr << 5) + T2_IO + 0x00) = w;
+	mb();
+}
+
+extern inline u16 t2_inw(unsigned long addr)
+{
+	long result = *(vip) ((addr << 5) + T2_IO + 0x08);
+	return __kernel_extwl(result, addr & 3);
+}
+
+extern inline void t2_outw(u16 b, unsigned long addr)
+{
+	unsigned long w;
+
+	w = __kernel_inswl(b, addr & 3);
+	*(vuip) ((addr << 5) + T2_IO + 0x08) = w;
+	mb();
+}
+
+extern inline u32 t2_inl(unsigned long addr)
+{
+	return *(vuip) ((addr << 5) + T2_IO + 0x18);
+}
+
+extern inline void t2_outl(u32 b, unsigned long addr)
+{
+	*(vuip) ((addr << 5) + T2_IO + 0x18) = b;
+	mb();
+}
+
+
+/*
+ * Memory functions.
+ *
+ * For reading and writing 8 and 16 bit quantities we need to
+ * go through one of the three sparse address mapping regions
+ * and use the HAE_MEM CSR to provide some bits of the address.
+ * The following few routines use only sparse address region 1
+ * which gives 1Gbyte of accessible space which relates exactly
+ * to the amount of PCI memory mapping *into* system address space.
+ * See p 6-17 of the specification but it looks something like this:
+ *
+ * 21164 Address:
+ *
+ *          3         2         1
+ * 9876543210987654321098765432109876543210
+ * 1ZZZZ0.PCI.QW.Address............BBLL
+ *
+ * ZZ = SBZ
+ * BB = Byte offset
+ * LL = Transfer length
+ *
+ * PCI Address:
+ *
+ * 3         2         1
+ * 10987654321098765432109876543210
+ * HHH....PCI.QW.Address........ 00
+ *
+ * HHH = 31:29 HAE_MEM CSR
+ *
+ */
+
+#ifdef T2_ONE_HAE_WINDOW
+#define t2_set_hae
+#else
+#define t2_set_hae { \
+	unsigned long msb = addr >> 27; \
+	addr &= T2_MEM_R1_MASK; \
+	set_hae(msb); \
+}
+#endif
+
+/*
+ * NOTE: take T2_DENSE_MEM off in each readX/writeX routine, since
+ *       they may be called directly, rather than through the
+ *       ioreadNN/iowriteNN routines.
+ */
+
+__EXTERN_INLINE u8 t2_readb(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM;
+	unsigned long result;
+
+	t2_set_hae;
+
+	result = *(vip) ((addr << 5) + T2_SPARSE_MEM + 0x00);
+	return __kernel_extbl(result, addr & 3);
+}
+
+__EXTERN_INLINE u16 t2_readw(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM;
+	unsigned long result;
+
+	t2_set_hae;
+
+	result = *(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x08);
+	return __kernel_extwl(result, addr & 3);
+}
+
+/*
+ * On SABLE with T2, we must use SPARSE memory even for 32-bit access,
+ * because we cannot access all of DENSE without changing its HAE.
+ */
+__EXTERN_INLINE u32 t2_readl(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM;
+	unsigned long result;
+
+	t2_set_hae;
+
+	result = *(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x18);
+	return result & 0xffffffffUL;
+}
+
+__EXTERN_INLINE u64 t2_readq(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM;
+	unsigned long r0, r1, work;
+
+	t2_set_hae;
+
+	work = (addr << 5) + T2_SPARSE_MEM + 0x18;
+	r0 = *(vuip)(work);
+	r1 = *(vuip)(work + (4 << 5));
+	return r1 << 32 | r0;
+}
+
+__EXTERN_INLINE void t2_writeb(u8 b, volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM;
+	unsigned long w;
+
+	t2_set_hae;
+
+	w = __kernel_insbl(b, addr & 3);
+	*(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x00) = w;
+}
+
+__EXTERN_INLINE void t2_writew(u16 b, volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM;
+	unsigned long w;
+
+	t2_set_hae;
+
+	w = __kernel_inswl(b, addr & 3);
+	*(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x08) = w;
+}
+
+/*
+ * On SABLE with T2, we must use SPARSE memory even for 32-bit access,
+ * because we cannot access all of DENSE without changing its HAE.
+ */
+__EXTERN_INLINE void t2_writel(u32 b, volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM;
+
+	t2_set_hae;
+
+	*(vuip) ((addr << 5) + T2_SPARSE_MEM + 0x18) = b;
+}
+
+__EXTERN_INLINE void t2_writeq(u64 b, volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr - T2_DENSE_MEM;
+	unsigned long work;
+
+	t2_set_hae;
+
+	work = (addr << 5) + T2_SPARSE_MEM + 0x18;
+	*(vuip)work = b;
+	*(vuip)(work + (4 << 5)) = b >> 32;
+}
+
+__EXTERN_INLINE void __iomem *t2_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + T2_IO);
+}
+
+__EXTERN_INLINE void __iomem *t2_ioremap(unsigned long addr, 
+					 unsigned long size)
+{
+	return (void __iomem *)(addr + T2_DENSE_MEM);
+}
+
+__EXTERN_INLINE int t2_is_ioaddr(unsigned long addr)
+{
+	return (long)addr >= 0;
+}
+
+__EXTERN_INLINE int t2_is_mmio(const volatile void __iomem *addr)
+{
+	return (unsigned long)addr >= T2_DENSE_MEM;
+}
+
+/* New-style ioread interface.  The mmio routines are so ugly for T2 that
+   it doesn't make sense to merge the pio and mmio routines.  */
+
+#define IOPORT(OS, NS)							\
+__EXTERN_INLINE unsigned int t2_ioread##NS(const void __iomem *xaddr)		\
+{									\
+	if (t2_is_mmio(xaddr))						\
+		return t2_read##OS(xaddr);				\
+	else								\
+		return t2_in##OS((unsigned long)xaddr - T2_IO);		\
+}									\
+__EXTERN_INLINE void t2_iowrite##NS(u##NS b, void __iomem *xaddr)	\
+{									\
+	if (t2_is_mmio(xaddr))						\
+		t2_write##OS(b, xaddr);					\
+	else								\
+		t2_out##OS(b, (unsigned long)xaddr - T2_IO);		\
+}
+
+IOPORT(b, 8)
+IOPORT(w, 16)
+IOPORT(l, 32)
+
+#undef IOPORT
+
+#undef vip
+#undef vuip
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		t2
+#define t2_trivial_rw_bw	0
+#define t2_trivial_rw_lq	0
+#define t2_trivial_io_bw	0
+#define t2_trivial_io_lq	0
+#define t2_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_T2__H__ */
diff --git a/arch/alpha/include/asm/core_titan.h b/arch/alpha/include/asm/core_titan.h
new file mode 100644
index 0000000000..dcc02682c1
--- /dev/null
+++ b/arch/alpha/include/asm/core_titan.h
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_TITAN__H__
+#define __ALPHA_TITAN__H__
+
+#include <linux/types.h>
+#include <asm/compiler.h>
+
+/*
+ * TITAN is the internal names for a core logic chipset which provides
+ * memory controller and PCI/AGP access for 21264 based systems.
+ *
+ * This file is based on:
+ *
+ * Titan Chipset Engineering Specification
+ * Revision 0.12
+ * 13 July 1999
+ *
+ */
+
+/* XXX: Do we need to conditionalize on this?  */
+#ifdef USE_48_BIT_KSEG
+#define TI_BIAS 0x80000000000UL
+#else
+#define TI_BIAS 0x10000000000UL
+#endif
+
+/*
+ * CChip, DChip, and PChip registers
+ */
+
+typedef struct {
+	volatile unsigned long csr __attribute__((aligned(64)));
+} titan_64;
+
+typedef struct {
+	titan_64	csc;
+	titan_64	mtr;
+	titan_64	misc;
+	titan_64	mpd;
+	titan_64	aar0;
+	titan_64	aar1;
+	titan_64	aar2;
+	titan_64	aar3;
+	titan_64	dim0;
+	titan_64	dim1;
+	titan_64	dir0;
+	titan_64	dir1;
+	titan_64	drir;
+	titan_64	prben;
+	titan_64	iic0;
+	titan_64	iic1;
+	titan_64	mpr0;
+	titan_64	mpr1;
+	titan_64	mpr2;
+	titan_64	mpr3;
+	titan_64	rsvd[2];
+	titan_64	ttr;
+	titan_64	tdr;
+	titan_64	dim2;
+	titan_64	dim3;
+	titan_64	dir2;
+	titan_64	dir3;
+	titan_64	iic2;
+	titan_64	iic3;
+	titan_64	pwr;
+	titan_64	reserved[17];
+	titan_64	cmonctla;
+	titan_64	cmonctlb;
+	titan_64	cmoncnt01;
+	titan_64	cmoncnt23;
+	titan_64	cpen;
+} titan_cchip;
+
+typedef struct {
+	titan_64	dsc;
+	titan_64	str;
+	titan_64	drev;
+	titan_64	dsc2;
+} titan_dchip;
+
+typedef struct {
+	titan_64	wsba[4];
+	titan_64	wsm[4];
+	titan_64	tba[4];
+	titan_64	pctl;
+	titan_64	plat;
+	titan_64	reserved0[2];
+	union {
+		struct {
+			titan_64	serror;
+			titan_64	serren;
+			titan_64	serrset;
+			titan_64	reserved0;
+			titan_64	gperror;
+			titan_64	gperren;
+			titan_64	gperrset;
+			titan_64	reserved1;
+			titan_64	gtlbiv;
+			titan_64	gtlbia;
+			titan_64	reserved2[2];
+			titan_64	sctl;
+			titan_64	reserved3[3];
+		} g;
+		struct {
+			titan_64	agperror;
+			titan_64	agperren;
+			titan_64	agperrset;
+			titan_64	agplastwr;
+			titan_64	aperror;
+			titan_64	aperren;
+			titan_64	aperrset;
+			titan_64	reserved0;
+			titan_64	atlbiv;
+			titan_64	atlbia;
+			titan_64	reserved1[6];
+		} a;
+	} port_specific;
+	titan_64	sprst;
+	titan_64	reserved1[31];
+} titan_pachip_port;
+
+typedef struct {
+	titan_pachip_port	g_port;
+	titan_pachip_port	a_port;
+} titan_pachip;
+
+#define TITAN_cchip	((titan_cchip  *)(IDENT_ADDR+TI_BIAS+0x1A0000000UL))
+#define TITAN_dchip    	((titan_dchip  *)(IDENT_ADDR+TI_BIAS+0x1B0000800UL))
+#define TITAN_pachip0 	((titan_pachip *)(IDENT_ADDR+TI_BIAS+0x180000000UL))
+#define TITAN_pachip1 	((titan_pachip *)(IDENT_ADDR+TI_BIAS+0x380000000UL))
+extern unsigned TITAN_agp;
+extern int TITAN_bootcpu;
+
+/*
+ * TITAN PA-chip Window Space Base Address register.
+ * (WSBA[0-2])
+ */
+#define wsba_m_ena 0x1                
+#define wsba_m_sg 0x2
+#define wsba_m_addr 0xFFF00000  
+#define wmask_k_sz1gb 0x3FF00000                   
+union TPAchipWSBA {
+	struct  {
+		unsigned wsba_v_ena : 1;
+		unsigned wsba_v_sg : 1;
+		unsigned wsba_v_rsvd1 : 18;
+		unsigned wsba_v_addr : 12;
+		unsigned wsba_v_rsvd2 : 32;
+        } wsba_r_bits;
+	int wsba_q_whole [2];
+};
+
+/*
+ * TITAN PA-chip Control Register
+ * This definition covers both the G-Port GPCTL and the A-PORT APCTL.
+ * Bits <51:0> are the same in both cases. APCTL<63:52> are only 
+ * applicable to AGP.
+ */
+#define pctl_m_fbtb 			0x00000001
+#define pctl_m_thdis 			0x00000002
+#define pctl_m_chaindis 		0x00000004
+#define pctl_m_tgtlat 			0x00000018
+#define pctl_m_hole  	  		0x00000020
+#define pctl_m_mwin 	  		0x00000040
+#define pctl_m_arbena 	  		0x00000080
+#define pctl_m_prigrp 	  		0x0000FF00
+#define pctl_m_ppri 	  		0x00010000
+#define pctl_m_pcispd66  		0x00020000
+#define pctl_m_cngstlt	  		0x003C0000
+#define pctl_m_ptpdesten 		0x3FC00000
+#define pctl_m_dpcen			0x40000000
+#define pctl_m_apcen		0x0000000080000000UL
+#define pctl_m_dcrtv		0x0000000300000000UL
+#define pctl_m_en_stepping	0x0000000400000000UL
+#define apctl_m_rsvd1		0x000FFFF800000000UL
+#define apctl_m_agp_rate	0x0030000000000000UL
+#define apctl_m_agp_sba_en	0x0040000000000000UL
+#define apctl_m_agp_en		0x0080000000000000UL
+#define apctl_m_rsvd2		0x0100000000000000UL
+#define apctl_m_agp_present	0x0200000000000000UL
+#define apctl_agp_hp_rd		0x1C00000000000000UL
+#define apctl_agp_lp_rd		0xE000000000000000UL
+#define gpctl_m_rsvd		0xFFFFFFF800000000UL
+union TPAchipPCTL {
+	struct {
+		unsigned pctl_v_fbtb : 1;		/* A/G [0]     */
+		unsigned pctl_v_thdis : 1;		/* A/G [1]     */
+		unsigned pctl_v_chaindis : 1;		/* A/G [2]     */
+		unsigned pctl_v_tgtlat : 2;		/* A/G [4:3]   */
+		unsigned pctl_v_hole : 1;		/* A/G [5]     */
+		unsigned pctl_v_mwin : 1;		/* A/G [6]     */
+		unsigned pctl_v_arbena : 1;		/* A/G [7]     */
+		unsigned pctl_v_prigrp : 8;		/* A/G [15:8]  */
+		unsigned pctl_v_ppri : 1;		/* A/G [16]    */
+		unsigned pctl_v_pcispd66 : 1;		/* A/G [17]    */
+		unsigned pctl_v_cngstlt : 4;		/* A/G [21:18] */
+		unsigned pctl_v_ptpdesten : 8;		/* A/G [29:22] */
+		unsigned pctl_v_dpcen : 1;		/* A/G [30]    */
+		unsigned pctl_v_apcen : 1;		/* A/G [31]    */
+		unsigned pctl_v_dcrtv : 2;		/* A/G [33:32] */
+		unsigned pctl_v_en_stepping :1;		/* A/G [34]    */
+		unsigned apctl_v_rsvd1 : 17;		/* A   [51:35] */
+		unsigned apctl_v_agp_rate : 2;		/* A   [53:52] */
+		unsigned apctl_v_agp_sba_en : 1;	/* A   [54]    */
+		unsigned apctl_v_agp_en : 1;		/* A   [55]    */
+		unsigned apctl_v_rsvd2 : 1;		/* A   [56]    */
+		unsigned apctl_v_agp_present : 1;	/* A   [57]    */
+		unsigned apctl_v_agp_hp_rd : 3;		/* A   [60:58] */
+		unsigned apctl_v_agp_lp_rd : 3;		/* A   [63:61] */
+	} pctl_r_bits;
+	unsigned int pctl_l_whole [2];
+	unsigned long pctl_q_whole;
+};
+
+/*
+ * SERROR / SERREN / SERRSET
+ */
+union TPAchipSERR {
+	struct {
+		unsigned serr_v_lost_uecc : 1;		/* [0]		*/
+		unsigned serr_v_uecc : 1;		/* [1]  	*/
+		unsigned serr_v_cre : 1;		/* [2]		*/
+		unsigned serr_v_nxio : 1;		/* [3]		*/
+		unsigned serr_v_lost_cre : 1;		/* [4]		*/
+		unsigned serr_v_rsvd0 : 10;		/* [14:5]	*/
+		unsigned serr_v_addr : 32;		/* [46:15]	*/
+		unsigned serr_v_rsvd1 : 5;		/* [51:47]	*/
+		unsigned serr_v_source : 2;		/* [53:52]	*/
+		unsigned serr_v_cmd : 2;		/* [55:54]	*/
+		unsigned serr_v_syn : 8;		/* [63:56]	*/
+	} serr_r_bits;
+	unsigned int serr_l_whole[2];
+	unsigned long serr_q_whole;
+};
+
+/*
+ * GPERROR / APERROR / GPERREN / APERREN / GPERRSET / APERRSET
+ */
+union TPAchipPERR {
+	struct {
+		unsigned long perr_v_lost : 1;	     	/* [0]		*/
+		unsigned long perr_v_serr : 1;		/* [1]		*/
+		unsigned long perr_v_perr : 1;		/* [2]		*/
+		unsigned long perr_v_dcrto : 1;		/* [3]		*/
+		unsigned long perr_v_sge : 1;		/* [4]		*/
+		unsigned long perr_v_ape : 1;		/* [5]		*/
+		unsigned long perr_v_ta : 1;		/* [6]		*/
+		unsigned long perr_v_dpe : 1;		/* [7]		*/
+		unsigned long perr_v_nds : 1;		/* [8]		*/
+		unsigned long perr_v_iptpr : 1;		/* [9]		*/
+		unsigned long perr_v_iptpw : 1;		/* [10] 	*/
+		unsigned long perr_v_rsvd0 : 3;		/* [13:11]	*/
+		unsigned long perr_v_addr : 33;		/* [46:14]	*/
+		unsigned long perr_v_dac : 1;		/* [47]		*/
+		unsigned long perr_v_mwin : 1;		/* [48]		*/
+		unsigned long perr_v_rsvd1 : 3;		/* [51:49]	*/
+		unsigned long perr_v_cmd : 4;		/* [55:52]	*/
+		unsigned long perr_v_rsvd2 : 8;		/* [63:56]	*/
+	} perr_r_bits;
+	unsigned int perr_l_whole[2];
+	unsigned long perr_q_whole;
+};
+
+/*
+ * AGPERROR / AGPERREN / AGPERRSET
+ */
+union TPAchipAGPERR {
+	struct {
+		unsigned agperr_v_lost : 1;		/* [0]		*/
+		unsigned agperr_v_lpqfull : 1;		/* [1]		*/
+		unsigned apgerr_v_hpqfull : 1;		/* [2]		*/
+		unsigned agperr_v_rescmd : 1;		/* [3]		*/
+		unsigned agperr_v_ipte : 1;		/* [4]		*/
+		unsigned agperr_v_ptp :	1;      	/* [5]		*/
+		unsigned agperr_v_nowindow : 1;		/* [6]		*/
+		unsigned agperr_v_rsvd0 : 8;		/* [14:7]	*/
+		unsigned agperr_v_addr : 32;		/* [46:15]	*/
+		unsigned agperr_v_rsvd1 : 1;		/* [47]		*/
+		unsigned agperr_v_dac : 1;		/* [48]		*/
+		unsigned agperr_v_mwin : 1;		/* [49]		*/
+		unsigned agperr_v_cmd : 3;		/* [52:50]	*/
+		unsigned agperr_v_length : 6;		/* [58:53]	*/
+		unsigned agperr_v_fence : 1;		/* [59]		*/
+		unsigned agperr_v_rsvd2 : 4;		/* [63:60]	*/
+	} agperr_r_bits;
+	unsigned int agperr_l_whole[2];
+	unsigned long agperr_q_whole;
+};
+/*
+ * Memory spaces:
+ * Hose numbers are assigned as follows:
+ *		0 - pachip 0 / G Port
+ *		1 - pachip 1 / G Port
+ * 		2 - pachip 0 / A Port
+ *      	3 - pachip 1 / A Port
+ */
+#define TITAN_HOSE_SHIFT       (33) 
+#define TITAN_HOSE(h)		(((unsigned long)(h)) << TITAN_HOSE_SHIFT)
+#define TITAN_BASE		(IDENT_ADDR + TI_BIAS)
+#define TITAN_MEM(h)	     	(TITAN_BASE+TITAN_HOSE(h)+0x000000000UL)
+#define _TITAN_IACK_SC(h)    	(TITAN_BASE+TITAN_HOSE(h)+0x1F8000000UL)
+#define TITAN_IO(h)	     	(TITAN_BASE+TITAN_HOSE(h)+0x1FC000000UL)
+#define TITAN_CONF(h)	     	(TITAN_BASE+TITAN_HOSE(h)+0x1FE000000UL)
+
+#define TITAN_HOSE_MASK		TITAN_HOSE(3)
+#define TITAN_IACK_SC	     	_TITAN_IACK_SC(0) /* hack! */
+
+/* 
+ * The canonical non-remaped I/O and MEM addresses have these values
+ * subtracted out.  This is arranged so that folks manipulating ISA
+ * devices can use their familiar numbers and have them map to bus 0.
+ */
+
+#define TITAN_IO_BIAS		TITAN_IO(0)
+#define TITAN_MEM_BIAS		TITAN_MEM(0)
+
+/* The IO address space is larger than 0xffff */
+#define TITAN_IO_SPACE		(TITAN_CONF(0) - TITAN_IO(0))
+
+/* TIG Space */
+#define TITAN_TIG_SPACE		(TITAN_BASE + 0x100000000UL)
+
+/* Offset between ram physical addresses and pci64 DAC bus addresses.  */
+/* ??? Just a guess.  Ought to confirm it hasn't been moved.  */
+#define TITAN_DAC_OFFSET	(1UL << 40)
+
+/*
+ * Data structure for handling TITAN machine checks:
+ */
+#define SCB_Q_SYSERR	0x620
+#define SCB_Q_PROCERR	0x630
+#define SCB_Q_SYSMCHK	0x660
+#define SCB_Q_PROCMCHK	0x670
+#define SCB_Q_SYSEVENT	0x680	/* environmental / system management */
+struct el_TITAN_sysdata_mcheck {
+	u64 summary;	/* 0x00 */
+	u64 c_dirx;	/* 0x08 */
+	u64 c_misc;	/* 0x10 */
+	u64 p0_serror;	/* 0x18 */
+	u64 p0_gperror; /* 0x20 */
+	u64 p0_aperror; /* 0x28 */
+	u64 p0_agperror;/* 0x30 */
+	u64 p1_serror;	/* 0x38 */
+	u64 p1_gperror; /* 0x40 */
+	u64 p1_aperror; /* 0x48 */
+	u64 p1_agperror;/* 0x50 */
+};
+
+/*
+ * System area for a privateer 680 environmental/system management mcheck 
+ */
+struct el_PRIVATEER_envdata_mcheck {
+	u64 summary;	/* 0x00 */
+	u64 c_dirx;	/* 0x08 */
+	u64 smir;	/* 0x10 */
+	u64 cpuir;	/* 0x18 */
+	u64 psir;	/* 0x20 */
+	u64 fault;	/* 0x28 */
+	u64 sys_doors;	/* 0x30 */
+	u64 temp_warn;	/* 0x38 */
+	u64 fan_ctrl;	/* 0x40 */
+	u64 code;	/* 0x48 */
+	u64 reserved;	/* 0x50 */
+};
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * TITAN, a 21??? PCI/memory support chipset for the EV6 (21264)
+ * can only use linear accesses to get at PCI/AGP memory and I/O spaces.
+ */
+
+/*
+ * Memory functions.  all accesses are done through linear space.
+ */
+extern void __iomem *titan_ioportmap(unsigned long addr);
+extern void __iomem *titan_ioremap(unsigned long addr, unsigned long size);
+extern void titan_iounmap(volatile void __iomem *addr);
+
+__EXTERN_INLINE int titan_is_ioaddr(unsigned long addr)
+{
+	return addr >= TITAN_BASE;
+}
+
+extern int titan_is_mmio(const volatile void __iomem *addr);
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		titan
+#define titan_trivial_rw_bw	1
+#define titan_trivial_rw_lq	1
+#define titan_trivial_io_bw	1
+#define titan_trivial_io_lq	1
+#define titan_trivial_iounmap	0
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_TITAN__H__ */
diff --git a/arch/alpha/include/asm/core_tsunami.h b/arch/alpha/include/asm/core_tsunami.h
new file mode 100644
index 0000000000..3391e95754
--- /dev/null
+++ b/arch/alpha/include/asm/core_tsunami.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_TSUNAMI__H__
+#define __ALPHA_TSUNAMI__H__
+
+#include <linux/types.h>
+#include <asm/compiler.h>
+
+/*
+ * TSUNAMI/TYPHOON are the internal names for the core logic chipset which
+ * provides memory controller and PCI access for the 21264 based systems.
+ *
+ * This file is based on:
+ *
+ * Tsunami System Programmers Manual
+ * Preliminary, Chapters 2-5
+ *
+ */
+
+/* XXX: Do we need to conditionalize on this?  */
+#ifdef USE_48_BIT_KSEG
+#define TS_BIAS 0x80000000000UL
+#else
+#define TS_BIAS 0x10000000000UL
+#endif
+
+/*
+ * CChip, DChip, and PChip registers
+ */
+
+typedef struct {
+	volatile unsigned long csr __attribute__((aligned(64)));
+} tsunami_64;
+
+typedef struct {
+	tsunami_64	csc;
+	tsunami_64	mtr;
+	tsunami_64	misc;
+	tsunami_64	mpd;
+	tsunami_64	aar0;
+	tsunami_64	aar1;
+	tsunami_64	aar2;
+	tsunami_64	aar3;
+	tsunami_64	dim0;
+	tsunami_64	dim1;
+	tsunami_64	dir0;
+	tsunami_64	dir1;
+	tsunami_64	drir;
+	tsunami_64	prben;
+	tsunami_64	iic;	/* a.k.a. iic0 */
+	tsunami_64	wdr;	/* a.k.a. iic1 */
+	tsunami_64	mpr0;
+	tsunami_64	mpr1;
+	tsunami_64	mpr2;
+	tsunami_64	mpr3;
+	tsunami_64	mctl;
+	tsunami_64	__pad1;
+	tsunami_64	ttr;
+	tsunami_64	tdr;
+	tsunami_64	dim2;
+	tsunami_64	dim3;
+	tsunami_64	dir2;
+	tsunami_64	dir3;
+	tsunami_64	iic2;
+	tsunami_64	iic3;
+} tsunami_cchip;
+
+typedef struct {
+	tsunami_64	dsc;
+	tsunami_64	str;
+	tsunami_64	drev;
+} tsunami_dchip;
+
+typedef struct {
+	tsunami_64	wsba[4];
+	tsunami_64	wsm[4];
+	tsunami_64	tba[4];
+	tsunami_64	pctl;
+	tsunami_64	plat;
+	tsunami_64	reserved;
+	tsunami_64	perror;
+	tsunami_64	perrmask;
+	tsunami_64	perrset;
+	tsunami_64	tlbiv;
+	tsunami_64	tlbia;
+	tsunami_64	pmonctl;
+	tsunami_64	pmoncnt;
+} tsunami_pchip;
+
+#define TSUNAMI_cchip  ((tsunami_cchip *)(IDENT_ADDR+TS_BIAS+0x1A0000000UL))
+#define TSUNAMI_dchip  ((tsunami_dchip *)(IDENT_ADDR+TS_BIAS+0x1B0000800UL))
+#define TSUNAMI_pchip0 ((tsunami_pchip *)(IDENT_ADDR+TS_BIAS+0x180000000UL))
+#define TSUNAMI_pchip1 ((tsunami_pchip *)(IDENT_ADDR+TS_BIAS+0x380000000UL))
+extern int TSUNAMI_bootcpu;
+
+/*
+ * TSUNAMI Pchip Error register.
+ */
+
+#define perror_m_lost 0x1
+#define perror_m_serr 0x2
+#define perror_m_perr 0x4
+#define perror_m_dcrto 0x8
+#define perror_m_sge 0x10
+#define perror_m_ape 0x20
+#define perror_m_ta 0x40
+#define perror_m_rdpe 0x80
+#define perror_m_nds 0x100
+#define perror_m_rto 0x200
+#define perror_m_uecc 0x400
+#define perror_m_cre 0x800
+#define perror_m_addrl 0xFFFFFFFF0000UL
+#define perror_m_addrh 0x7000000000000UL
+#define perror_m_cmd 0xF0000000000000UL
+#define perror_m_syn 0xFF00000000000000UL
+union TPchipPERROR {   
+	struct  {
+		unsigned int perror_v_lost : 1;
+		unsigned perror_v_serr : 1;
+		unsigned perror_v_perr : 1;
+		unsigned perror_v_dcrto : 1;
+		unsigned perror_v_sge : 1;
+		unsigned perror_v_ape : 1;
+		unsigned perror_v_ta : 1;
+		unsigned perror_v_rdpe : 1;
+		unsigned perror_v_nds : 1;
+		unsigned perror_v_rto : 1;
+		unsigned perror_v_uecc : 1;
+		unsigned perror_v_cre : 1;                 
+		unsigned perror_v_rsvd1 : 4;
+		unsigned perror_v_addrl : 32;
+		unsigned perror_v_addrh : 3;
+		unsigned perror_v_rsvd2 : 1;
+		unsigned perror_v_cmd : 4;
+		unsigned perror_v_syn : 8;
+	} perror_r_bits;
+	int perror_q_whole [2];
+};                       
+
+/*
+ * TSUNAMI Pchip Window Space Base Address register.
+ */
+#define wsba_m_ena 0x1                
+#define wsba_m_sg 0x2
+#define wsba_m_ptp 0x4
+#define wsba_m_addr 0xFFF00000  
+#define wmask_k_sz1gb 0x3FF00000                   
+union TPchipWSBA {
+	struct  {
+		unsigned wsba_v_ena : 1;
+		unsigned wsba_v_sg : 1;
+		unsigned wsba_v_ptp : 1;
+		unsigned wsba_v_rsvd1 : 17;
+		unsigned wsba_v_addr : 12;
+		unsigned wsba_v_rsvd2 : 32;
+	} wsba_r_bits;
+	int wsba_q_whole [2];
+};
+
+/*
+ * TSUNAMI Pchip Control Register
+ */
+#define pctl_m_fdsc 0x1
+#define pctl_m_fbtb 0x2
+#define pctl_m_thdis 0x4
+#define pctl_m_chaindis 0x8
+#define pctl_m_tgtlat 0x10
+#define pctl_m_hole 0x20
+#define pctl_m_mwin 0x40
+#define pctl_m_arbena 0x80
+#define pctl_m_prigrp 0x7F00
+#define pctl_m_ppri 0x8000
+#define pctl_m_rsvd1 0x30000
+#define pctl_m_eccen 0x40000
+#define pctl_m_padm 0x80000
+#define pctl_m_cdqmax 0xF00000
+#define pctl_m_rev 0xFF000000
+#define pctl_m_crqmax 0xF00000000UL
+#define pctl_m_ptpmax 0xF000000000UL
+#define pctl_m_pclkx 0x30000000000UL
+#define pctl_m_fdsdis 0x40000000000UL
+#define pctl_m_fdwdis 0x80000000000UL
+#define pctl_m_ptevrfy 0x100000000000UL
+#define pctl_m_rpp 0x200000000000UL
+#define pctl_m_pid 0xC00000000000UL
+#define pctl_m_rsvd2 0xFFFF000000000000UL
+
+union TPchipPCTL {
+	struct {
+		unsigned pctl_v_fdsc : 1;
+		unsigned pctl_v_fbtb : 1;
+		unsigned pctl_v_thdis : 1;
+		unsigned pctl_v_chaindis : 1;
+		unsigned pctl_v_tgtlat : 1;
+		unsigned pctl_v_hole : 1;
+		unsigned pctl_v_mwin : 1;
+		unsigned pctl_v_arbena : 1;
+		unsigned pctl_v_prigrp : 7;
+		unsigned pctl_v_ppri : 1;
+		unsigned pctl_v_rsvd1 : 2;
+		unsigned pctl_v_eccen : 1;
+		unsigned pctl_v_padm : 1;
+		unsigned pctl_v_cdqmax : 4;
+		unsigned pctl_v_rev : 8;
+		unsigned pctl_v_crqmax : 4;
+		unsigned pctl_v_ptpmax : 4;
+		unsigned pctl_v_pclkx : 2;
+		unsigned pctl_v_fdsdis : 1;
+		unsigned pctl_v_fdwdis : 1;
+		unsigned pctl_v_ptevrfy : 1;
+		unsigned pctl_v_rpp : 1;
+		unsigned pctl_v_pid : 2;
+		unsigned pctl_v_rsvd2 : 16;
+	} pctl_r_bits;
+	int pctl_q_whole [2];
+};
+
+/*
+ * TSUNAMI Pchip Error Mask Register.
+ */
+#define perrmask_m_lost 0x1
+#define perrmask_m_serr 0x2
+#define perrmask_m_perr 0x4
+#define perrmask_m_dcrto 0x8
+#define perrmask_m_sge 0x10
+#define perrmask_m_ape 0x20
+#define perrmask_m_ta 0x40
+#define perrmask_m_rdpe 0x80
+#define perrmask_m_nds 0x100
+#define perrmask_m_rto 0x200
+#define perrmask_m_uecc 0x400
+#define perrmask_m_cre 0x800
+#define perrmask_m_rsvd 0xFFFFFFFFFFFFF000UL
+union TPchipPERRMASK {   
+	struct  {
+		unsigned int perrmask_v_lost : 1;
+		unsigned perrmask_v_serr : 1;
+		unsigned perrmask_v_perr : 1;
+		unsigned perrmask_v_dcrto : 1;
+		unsigned perrmask_v_sge : 1;
+		unsigned perrmask_v_ape : 1;
+		unsigned perrmask_v_ta : 1;
+		unsigned perrmask_v_rdpe : 1;
+		unsigned perrmask_v_nds : 1;
+		unsigned perrmask_v_rto : 1;
+		unsigned perrmask_v_uecc : 1;
+		unsigned perrmask_v_cre : 1;                 
+		unsigned perrmask_v_rsvd1 : 20;
+		unsigned perrmask_v_rsvd2 : 32;
+	} perrmask_r_bits;
+	int perrmask_q_whole [2];
+};                       
+
+/*
+ * Memory spaces:
+ */
+#define TSUNAMI_HOSE(h)		(((unsigned long)(h)) << 33)
+#define TSUNAMI_BASE		(IDENT_ADDR + TS_BIAS)
+
+#define TSUNAMI_MEM(h)		(TSUNAMI_BASE+TSUNAMI_HOSE(h) + 0x000000000UL)
+#define _TSUNAMI_IACK_SC(h)	(TSUNAMI_BASE+TSUNAMI_HOSE(h) + 0x1F8000000UL)
+#define TSUNAMI_IO(h)		(TSUNAMI_BASE+TSUNAMI_HOSE(h) + 0x1FC000000UL)
+#define TSUNAMI_CONF(h)		(TSUNAMI_BASE+TSUNAMI_HOSE(h) + 0x1FE000000UL)
+
+#define TSUNAMI_IACK_SC		_TSUNAMI_IACK_SC(0) /* hack! */
+
+
+/* 
+ * The canonical non-remaped I/O and MEM addresses have these values
+ * subtracted out.  This is arranged so that folks manipulating ISA
+ * devices can use their familiar numbers and have them map to bus 0.
+ */
+
+#define TSUNAMI_IO_BIAS          TSUNAMI_IO(0)
+#define TSUNAMI_MEM_BIAS         TSUNAMI_MEM(0)
+
+/* The IO address space is larger than 0xffff */
+#define TSUNAMI_IO_SPACE	(TSUNAMI_CONF(0) - TSUNAMI_IO(0))
+
+/* Offset between ram physical addresses and pci64 DAC bus addresses.  */
+#define TSUNAMI_DAC_OFFSET	(1UL << 40)
+
+/*
+ * Data structure for handling TSUNAMI machine checks:
+ */
+struct el_TSUNAMI_sysdata_mcheck {
+};
+
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * I/O functions:
+ *
+ * TSUNAMI, the 21??? PCI/memory support chipset for the EV6 (21264)
+ * can only use linear accesses to get at PCI memory and I/O spaces.
+ */
+
+/*
+ * Memory functions.  all accesses are done through linear space.
+ */
+extern void __iomem *tsunami_ioportmap(unsigned long addr);
+extern void __iomem *tsunami_ioremap(unsigned long addr, unsigned long size);
+__EXTERN_INLINE int tsunami_is_ioaddr(unsigned long addr)
+{
+	return addr >= TSUNAMI_BASE;
+}
+
+__EXTERN_INLINE int tsunami_is_mmio(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	return (addr & 0x100000000UL) == 0;
+}
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		tsunami
+#define tsunami_trivial_rw_bw	1
+#define tsunami_trivial_rw_lq	1
+#define tsunami_trivial_io_bw	1
+#define tsunami_trivial_io_lq	1
+#define tsunami_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_TSUNAMI__H__ */
diff --git a/arch/alpha/include/asm/core_wildfire.h b/arch/alpha/include/asm/core_wildfire.h
new file mode 100644
index 0000000000..5d1b37f412
--- /dev/null
+++ b/arch/alpha/include/asm/core_wildfire.h
@@ -0,0 +1,319 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_WILDFIRE__H__
+#define __ALPHA_WILDFIRE__H__
+
+#include <linux/types.h>
+#include <asm/compiler.h>
+
+#define WILDFIRE_MAX_QBB	8	/* more than 8 requires other mods */
+#define WILDFIRE_PCA_PER_QBB	4
+#define WILDFIRE_IRQ_PER_PCA	64
+
+#define WILDFIRE_NR_IRQS \
+  (WILDFIRE_MAX_QBB * WILDFIRE_PCA_PER_QBB * WILDFIRE_IRQ_PER_PCA)
+
+extern unsigned char wildfire_hard_qbb_map[WILDFIRE_MAX_QBB];
+extern unsigned char wildfire_soft_qbb_map[WILDFIRE_MAX_QBB];
+#define QBB_MAP_EMPTY	0xff
+
+extern unsigned long wildfire_hard_qbb_mask;
+extern unsigned long wildfire_soft_qbb_mask;
+extern unsigned long wildfire_gp_mask;
+extern unsigned long wildfire_hs_mask;
+extern unsigned long wildfire_iop_mask;
+extern unsigned long wildfire_ior_mask;
+extern unsigned long wildfire_pca_mask;
+extern unsigned long wildfire_cpu_mask;
+extern unsigned long wildfire_mem_mask;
+
+#define WILDFIRE_QBB_EXISTS(qbbno) (wildfire_soft_qbb_mask & (1 << (qbbno)))
+
+#define WILDFIRE_MEM_EXISTS(qbbno) (wildfire_mem_mask & (0xf << ((qbbno) << 2)))
+
+#define WILDFIRE_PCA_EXISTS(qbbno, pcano) \
+		(wildfire_pca_mask & (1 << (((qbbno) << 2) + (pcano))))
+
+typedef struct {
+	volatile unsigned long csr __attribute__((aligned(64)));
+} wildfire_64;
+
+typedef struct {
+	volatile unsigned long csr __attribute__((aligned(256)));
+} wildfire_256;
+
+typedef struct {
+	volatile unsigned long csr __attribute__((aligned(2048)));
+} wildfire_2k;
+
+typedef struct {
+	wildfire_64	qsd_whami;
+	wildfire_64	qsd_rev;
+	wildfire_64	qsd_port_present;
+	wildfire_64	qsd_port_active;
+	wildfire_64	qsd_fault_ena;
+	wildfire_64	qsd_cpu_int_ena;
+	wildfire_64	qsd_mem_config;
+	wildfire_64	qsd_err_sum;
+	wildfire_64	ce_sum[4];
+	wildfire_64	dev_init[4];
+	wildfire_64	it_int[4];
+	wildfire_64	ip_int[4];
+	wildfire_64	uce_sum[4];
+	wildfire_64	se_sum__non_dev_int[4];
+	wildfire_64	scratch[4];
+	wildfire_64	qsd_timer;
+	wildfire_64	qsd_diag;
+} wildfire_qsd;
+
+typedef struct {
+	wildfire_256	qsd_whami;
+	wildfire_256	__pad1;
+	wildfire_256	ce_sum;
+	wildfire_256	dev_init;
+	wildfire_256	it_int;
+	wildfire_256	ip_int;
+	wildfire_256	uce_sum;
+	wildfire_256	se_sum;
+} wildfire_fast_qsd;
+
+typedef struct {
+	wildfire_2k	qsa_qbb_id;
+	wildfire_2k	__pad1;
+	wildfire_2k	qsa_port_ena;
+	wildfire_2k	qsa_scratch;
+	wildfire_2k	qsa_config[5];
+	wildfire_2k	qsa_ref_int;
+	wildfire_2k	qsa_qbb_pop[2];
+	wildfire_2k	qsa_dtag_fc;
+	wildfire_2k	__pad2[3];
+	wildfire_2k	qsa_diag;
+	wildfire_2k	qsa_diag_lock[4];
+	wildfire_2k	__pad3[11];
+	wildfire_2k	qsa_cpu_err_sum;
+	wildfire_2k	qsa_misc_err_sum;
+	wildfire_2k	qsa_tmo_err_sum;
+	wildfire_2k	qsa_err_ena;
+	wildfire_2k	qsa_tmo_config;
+	wildfire_2k	qsa_ill_cmd_err_sum;
+	wildfire_2k	__pad4[26];
+	wildfire_2k	qsa_busy_mask;
+	wildfire_2k	qsa_arr_valid;
+	wildfire_2k	__pad5[2];
+	wildfire_2k	qsa_port_map[4];
+	wildfire_2k	qsa_arr_addr[8];
+	wildfire_2k	qsa_arr_mask[8];
+} wildfire_qsa;
+
+typedef struct {
+	wildfire_64	ioa_config;
+	wildfire_64	iod_config;
+	wildfire_64	iop_switch_credits;
+	wildfire_64	__pad1;
+	wildfire_64	iop_hose_credits;
+	wildfire_64	__pad2[11];
+	struct {
+		wildfire_64	__pad3;
+		wildfire_64	init;
+	} iop_hose[4];
+	wildfire_64	ioa_hose_0_ctrl;
+	wildfire_64	iod_hose_0_ctrl;
+	wildfire_64	ioa_hose_1_ctrl;
+	wildfire_64	iod_hose_1_ctrl;
+	wildfire_64	ioa_hose_2_ctrl;
+	wildfire_64	iod_hose_2_ctrl;
+	wildfire_64	ioa_hose_3_ctrl;
+	wildfire_64	iod_hose_3_ctrl;
+	struct {
+		wildfire_64	target;
+		wildfire_64	__pad4;
+	} iop_dev_int[4];
+
+	wildfire_64	iop_err_int_target;
+	wildfire_64	__pad5[7];
+	wildfire_64	iop_qbb_err_sum;
+	wildfire_64	__pad6;
+	wildfire_64	iop_qbb_se_sum;
+	wildfire_64	__pad7;
+	wildfire_64	ioa_err_sum;
+	wildfire_64	iod_err_sum;
+	wildfire_64	__pad8[4];
+	wildfire_64	ioa_diag_force_err;
+	wildfire_64	iod_diag_force_err;
+	wildfire_64	__pad9[4];
+	wildfire_64	iop_diag_send_err_int;
+	wildfire_64	__pad10[15];
+	wildfire_64	ioa_scratch;
+	wildfire_64	iod_scratch;
+} wildfire_iop;
+
+typedef struct {
+	wildfire_2k	gpa_qbb_map[4];
+	wildfire_2k	gpa_mem_pop_map;
+	wildfire_2k	gpa_scratch;
+	wildfire_2k	gpa_diag;
+	wildfire_2k	gpa_config_0;
+	wildfire_2k	__pad1;
+	wildfire_2k	gpa_init_id;
+	wildfire_2k	gpa_config_2;
+	/* not complete */
+} wildfire_gp;
+
+typedef struct {
+	wildfire_64	pca_what_am_i;
+	wildfire_64	pca_err_sum;
+	wildfire_64	pca_diag_force_err;
+	wildfire_64	pca_diag_send_err_int;
+	wildfire_64	pca_hose_credits;
+	wildfire_64	pca_scratch;
+	wildfire_64	pca_micro_addr;
+	wildfire_64	pca_micro_data;
+	wildfire_64	pca_pend_int;
+	wildfire_64	pca_sent_int;
+	wildfire_64	__pad1;
+	wildfire_64	pca_stdio_edge_level;
+	wildfire_64	__pad2[52];
+	struct {
+		wildfire_64	target;
+		wildfire_64	enable;
+	} pca_int[4];
+	wildfire_64	__pad3[56];
+	wildfire_64	pca_alt_sent_int[32];
+} wildfire_pca;
+
+typedef struct {
+	wildfire_64	ne_what_am_i;
+	/* not complete */
+} wildfire_ne;
+
+typedef struct {
+	wildfire_64	fe_what_am_i;
+	/* not complete */
+} wildfire_fe;
+
+typedef struct {
+	wildfire_64	pci_io_addr_ext;
+	wildfire_64	pci_ctrl;
+	wildfire_64	pci_err_sum;
+	wildfire_64	pci_err_addr;
+	wildfire_64	pci_stall_cnt;
+	wildfire_64	pci_iack_special;
+	wildfire_64	__pad1[2];
+	wildfire_64	pci_pend_int;
+	wildfire_64	pci_sent_int;
+	wildfire_64	__pad2[54];
+	struct {
+		wildfire_64	wbase;
+		wildfire_64	wmask;
+		wildfire_64	tbase;
+	} pci_window[4];
+	wildfire_64	pci_flush_tlb;
+	wildfire_64	pci_perf_mon;
+} wildfire_pci;
+
+#define WILDFIRE_ENTITY_SHIFT		18
+
+#define WILDFIRE_GP_ENTITY		(0x10UL << WILDFIRE_ENTITY_SHIFT)
+#define WILDFIRE_IOP_ENTITY		(0x08UL << WILDFIRE_ENTITY_SHIFT)
+#define WILDFIRE_QSA_ENTITY		(0x04UL << WILDFIRE_ENTITY_SHIFT)
+#define WILDFIRE_QSD_ENTITY_SLOW	(0x05UL << WILDFIRE_ENTITY_SHIFT)
+#define WILDFIRE_QSD_ENTITY_FAST	(0x01UL << WILDFIRE_ENTITY_SHIFT)
+
+#define WILDFIRE_PCA_ENTITY(pca)	((0xc|(pca))<<WILDFIRE_ENTITY_SHIFT)
+
+#define WILDFIRE_BASE		(IDENT_ADDR | (1UL << 40))
+
+#define WILDFIRE_QBB_MASK	0x0fUL	/* for now, only 4 bits/16 QBBs */
+
+#define WILDFIRE_QBB(q)		((~((long)(q)) & WILDFIRE_QBB_MASK) << 36)
+#define WILDFIRE_HOSE(h)	((long)(h) << 33)
+
+#define WILDFIRE_QBB_IO(q)	(WILDFIRE_BASE | WILDFIRE_QBB(q))
+#define WILDFIRE_QBB_HOSE(q,h)	(WILDFIRE_QBB_IO(q) | WILDFIRE_HOSE(h))
+
+#define WILDFIRE_MEM(q,h)	(WILDFIRE_QBB_HOSE(q,h) | 0x000000000UL)
+#define WILDFIRE_CONF(q,h)	(WILDFIRE_QBB_HOSE(q,h) | 0x1FE000000UL)
+#define WILDFIRE_IO(q,h)	(WILDFIRE_QBB_HOSE(q,h) | 0x1FF000000UL)
+
+#define WILDFIRE_qsd(q) \
+ ((wildfire_qsd *)(WILDFIRE_QBB_IO(q)|WILDFIRE_QSD_ENTITY_SLOW|(((1UL<<13)-1)<<23)))
+
+#define WILDFIRE_fast_qsd() \
+ ((wildfire_fast_qsd *)(WILDFIRE_QBB_IO(0)|WILDFIRE_QSD_ENTITY_FAST|(((1UL<<13)-1)<<23)))
+
+#define WILDFIRE_qsa(q) \
+ ((wildfire_qsa *)(WILDFIRE_QBB_IO(q)|WILDFIRE_QSA_ENTITY|(((1UL<<13)-1)<<23)))
+
+#define WILDFIRE_iop(q) \
+ ((wildfire_iop *)(WILDFIRE_QBB_IO(q)|WILDFIRE_IOP_ENTITY|(((1UL<<13)-1)<<23)))
+
+#define WILDFIRE_gp(q) \
+ ((wildfire_gp *)(WILDFIRE_QBB_IO(q)|WILDFIRE_GP_ENTITY|(((1UL<<13)-1)<<23)))
+
+#define WILDFIRE_pca(q,pca) \
+ ((wildfire_pca *)(WILDFIRE_QBB_IO(q)|WILDFIRE_PCA_ENTITY(pca)|(((1UL<<13)-1)<<23)))
+
+#define WILDFIRE_ne(q,pca) \
+ ((wildfire_ne *)(WILDFIRE_QBB_IO(q)|WILDFIRE_PCA_ENTITY(pca)|(((1UL<<13)-1)<<23)|(1UL<<16)))
+
+#define WILDFIRE_fe(q,pca) \
+ ((wildfire_fe *)(WILDFIRE_QBB_IO(q)|WILDFIRE_PCA_ENTITY(pca)|(((1UL<<13)-1)<<23)|(3UL<<15)))
+
+#define WILDFIRE_pci(q,h) \
+ ((wildfire_pci *)(WILDFIRE_QBB_IO(q)|WILDFIRE_PCA_ENTITY(((h)&6)>>1)|((((h)&1)|2)<<16)|(((1UL<<13)-1)<<23)))
+
+#define WILDFIRE_IO_BIAS        WILDFIRE_IO(0,0)
+#define WILDFIRE_MEM_BIAS       WILDFIRE_MEM(0,0) /* ??? */
+
+/* The IO address space is larger than 0xffff */
+#define WILDFIRE_IO_SPACE	(8UL*1024*1024)
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * Memory functions.  all accesses are done through linear space.
+ */
+
+__EXTERN_INLINE void __iomem *wildfire_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)(addr + WILDFIRE_IO_BIAS);
+}
+
+__EXTERN_INLINE void __iomem *wildfire_ioremap(unsigned long addr, 
+					       unsigned long size)
+{
+	return (void __iomem *)(addr + WILDFIRE_MEM_BIAS);
+}
+
+__EXTERN_INLINE int wildfire_is_ioaddr(unsigned long addr)
+{
+	return addr >= WILDFIRE_BASE;
+}
+
+__EXTERN_INLINE int wildfire_is_mmio(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long)xaddr;
+	return (addr & 0x100000000UL) == 0;
+}
+
+#undef __IO_PREFIX
+#define __IO_PREFIX			wildfire
+#define wildfire_trivial_rw_bw		1
+#define wildfire_trivial_rw_lq		1
+#define wildfire_trivial_io_bw		1
+#define wildfire_trivial_io_lq		1
+#define wildfire_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_WILDFIRE__H__ */
diff --git a/arch/alpha/include/asm/delay.h b/arch/alpha/include/asm/delay.h
new file mode 100644
index 0000000000..868aa61aba
--- /dev/null
+++ b/arch/alpha/include/asm/delay.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_DELAY_H
+#define __ALPHA_DELAY_H
+
+extern void __delay(int loops);
+extern void udelay(unsigned long usecs);
+
+extern void ndelay(unsigned long nsecs);
+#define ndelay ndelay
+
+#endif /* defined(__ALPHA_DELAY_H) */
diff --git a/arch/alpha/include/asm/device.h b/arch/alpha/include/asm/device.h
new file mode 100644
index 0000000000..9ca75a7db2
--- /dev/null
+++ b/arch/alpha/include/asm/device.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Arch specific extensions to struct device
+ */
+#include <asm-generic/device.h>
+
diff --git a/arch/alpha/include/asm/div64.h b/arch/alpha/include/asm/div64.h
new file mode 100644
index 0000000000..6cd978cefb
--- /dev/null
+++ b/arch/alpha/include/asm/div64.h
@@ -0,0 +1 @@
+#include <asm-generic/div64.h>
diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h
new file mode 100644
index 0000000000..0ee6a5c99b
--- /dev/null
+++ b/arch/alpha/include/asm/dma-mapping.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_DMA_MAPPING_H
+#define _ALPHA_DMA_MAPPING_H
+
+extern const struct dma_map_ops alpha_pci_ops;
+
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+{
+#ifdef CONFIG_ALPHA_JENSEN
+	return NULL;
+#else
+	return &alpha_pci_ops;
+#endif
+}
+
+#endif	/* _ALPHA_DMA_MAPPING_H */
diff --git a/arch/alpha/include/asm/dma.h b/arch/alpha/include/asm/dma.h
new file mode 100644
index 0000000000..28610ea778
--- /dev/null
+++ b/arch/alpha/include/asm/dma.h
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm-alpha/dma.h
+ *
+ * This is essentially the same as the i386 DMA stuff, as the AlphaPCs
+ * use ISA-compatible dma.  The only extension is support for high-page
+ * registers that allow to set the top 8 bits of a 32-bit DMA address.
+ * This register should be written last when setting up a DMA address
+ * as this will also enable DMA across 64 KB boundaries.
+ */
+
+/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_DMA_H
+#define _ASM_DMA_H
+
+#include <linux/spinlock.h>
+#include <asm/io.h>
+
+#define dma_outb	outb
+#define dma_inb		inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ *  controller 1: channels 0-3, byte operations, ports 00-1F
+ *  controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ *  - ALL registers are 8 bits only, regardless of transfer size
+ *  - channel 4 is not used - cascades 1 into 2.
+ *  - channels 0-3 are byte - addresses/counts are for physical bytes
+ *  - channels 5-7 are word - addresses/counts are for physical words
+ *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ *  - transfer count loaded to registers is 1 less than actual count
+ *  - controller 2 offsets are all even (2x offsets for controller 1)
+ *  - page registers for 5-7 don't use data bit 0, represent 128K pages
+ *  - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.  
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ *  Address mapping for channels 0-3:
+ *
+ *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *   P7  ...  P0  A7 ... A0  A7 ... A0   
+ * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
+ *
+ *  Address mapping for channels 5-7:
+ *
+ *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
+ *    |  ...  |   \   \   ... \  \  \  ... \  \
+ *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
+ *    |  ...  |     \   \   ... \  \  \  ... \
+ *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
+ * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
+ *
+ */
+
+#define MAX_DMA_CHANNELS	8
+
+/*
+  ISA DMA limitations on Alpha platforms,
+
+  These may be due to SIO (PCI<->ISA bridge) chipset limitation, or
+  just a wiring limit.
+*/
+
+/* The maximum address for ISA DMA transfer on Alpha XL, due to an
+   hardware SIO limitation, is 64MB.
+*/
+#define ALPHA_XL_MAX_ISA_DMA_ADDRESS		0x04000000UL
+
+/* The maximum address for ISA DMA transfer on RUFFIAN,
+   due to an hardware SIO limitation, is 16MB.
+*/
+#define ALPHA_RUFFIAN_MAX_ISA_DMA_ADDRESS	0x01000000UL
+
+/* The maximum address for ISA DMA transfer on SABLE, and some ALCORs,
+   due to an hardware SIO chip limitation, is 2GB.
+*/
+#define ALPHA_SABLE_MAX_ISA_DMA_ADDRESS		0x80000000UL
+#define ALPHA_ALCOR_MAX_ISA_DMA_ADDRESS		0x80000000UL
+
+/*
+  Maximum address for all the others is the complete 32-bit bus
+  address space.
+*/
+#define ALPHA_MAX_ISA_DMA_ADDRESS		0x100000000UL
+
+#ifdef CONFIG_ALPHA_GENERIC
+# define MAX_ISA_DMA_ADDRESS		(alpha_mv.max_isa_dma_address)
+#else
+# if defined(CONFIG_ALPHA_XL)
+#  define MAX_ISA_DMA_ADDRESS		ALPHA_XL_MAX_ISA_DMA_ADDRESS
+# elif defined(CONFIG_ALPHA_RUFFIAN)
+#  define MAX_ISA_DMA_ADDRESS		ALPHA_RUFFIAN_MAX_ISA_DMA_ADDRESS
+# elif defined(CONFIG_ALPHA_SABLE)
+#  define MAX_ISA_DMA_ADDRESS		ALPHA_SABLE_MAX_ISA_DMA_ADDRESS
+# elif defined(CONFIG_ALPHA_ALCOR)
+#  define MAX_ISA_DMA_ADDRESS		ALPHA_ALCOR_MAX_ISA_DMA_ADDRESS
+# else
+#  define MAX_ISA_DMA_ADDRESS		ALPHA_MAX_ISA_DMA_ADDRESS
+# endif
+#endif
+
+/* If we have the iommu, we don't have any address limitations on DMA.
+   Otherwise (Nautilus, RX164), we have to have 0-16 Mb DMA zone
+   like i386. */
+#define MAX_DMA_ADDRESS		(alpha_mv.mv_pci_tbi ?	\
+				 ~0UL : IDENT_ADDR + 0x01000000)
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG		0x08	/* command register (w) */
+#define DMA1_STAT_REG		0x08	/* status register (r) */
+#define DMA1_REQ_REG            0x09    /* request register (w) */
+#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
+#define DMA1_MODE_REG		0x0B	/* mode register (w) */
+#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
+#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
+#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
+#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
+#define DMA1_EXT_MODE_REG	(0x400 | DMA1_MODE_REG)
+
+#define DMA2_CMD_REG		0xD0	/* command register (w) */
+#define DMA2_STAT_REG		0xD0	/* status register (r) */
+#define DMA2_REQ_REG            0xD2    /* request register (w) */
+#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
+#define DMA2_MODE_REG		0xD6	/* mode register (w) */
+#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
+#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
+#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
+#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
+#define DMA2_EXT_MODE_REG	(0x400 | DMA2_MODE_REG)
+
+#define DMA_ADDR_0              0x00    /* DMA address registers */
+#define DMA_ADDR_1              0x02
+#define DMA_ADDR_2              0x04
+#define DMA_ADDR_3              0x06
+#define DMA_ADDR_4              0xC0
+#define DMA_ADDR_5              0xC4
+#define DMA_ADDR_6              0xC8
+#define DMA_ADDR_7              0xCC
+
+#define DMA_CNT_0               0x01    /* DMA count registers */
+#define DMA_CNT_1               0x03
+#define DMA_CNT_2               0x05
+#define DMA_CNT_3               0x07
+#define DMA_CNT_4               0xC2
+#define DMA_CNT_5               0xC6
+#define DMA_CNT_6               0xCA
+#define DMA_CNT_7               0xCE
+
+#define DMA_PAGE_0              0x87    /* DMA page registers */
+#define DMA_PAGE_1              0x83
+#define DMA_PAGE_2              0x81
+#define DMA_PAGE_3              0x82
+#define DMA_PAGE_5              0x8B
+#define DMA_PAGE_6              0x89
+#define DMA_PAGE_7              0x8A
+
+#define DMA_HIPAGE_0		(0x400 | DMA_PAGE_0)
+#define DMA_HIPAGE_1		(0x400 | DMA_PAGE_1)
+#define DMA_HIPAGE_2		(0x400 | DMA_PAGE_2)
+#define DMA_HIPAGE_3		(0x400 | DMA_PAGE_3)
+#define DMA_HIPAGE_4		(0x400 | DMA_PAGE_4)
+#define DMA_HIPAGE_5		(0x400 | DMA_PAGE_5)
+#define DMA_HIPAGE_6		(0x400 | DMA_PAGE_6)
+#define DMA_HIPAGE_7		(0x400 | DMA_PAGE_7)
+
+#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
+
+#define DMA_AUTOINIT	0x10
+
+extern spinlock_t  dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dma_spin_lock, flags);
+	return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+	if (dmanr<=3)
+		dma_outb(dmanr,  DMA1_MASK_REG);
+	else
+		dma_outb(dmanr & 3,  DMA2_MASK_REG);
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+	if (dmanr<=3)
+		dma_outb(dmanr | 4,  DMA1_MASK_REG);
+	else
+		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while interrupts are disabled! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+	if (dmanr<=3)
+		dma_outb(0,  DMA1_CLEAR_FF_REG);
+	else
+		dma_outb(0,  DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+	if (dmanr<=3)
+		dma_outb(mode | dmanr,  DMA1_MODE_REG);
+	else
+		dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
+}
+
+/* set extended mode for a specific DMA channel */
+static __inline__ void set_dma_ext_mode(unsigned int dmanr, char ext_mode)
+{
+	if (dmanr<=3)
+		dma_outb(ext_mode | dmanr,  DMA1_EXT_MODE_REG);
+	else
+		dma_outb(ext_mode | (dmanr&3),  DMA2_EXT_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, unsigned int pagenr)
+{
+	switch(dmanr) {
+		case 0:
+			dma_outb(pagenr, DMA_PAGE_0);
+			dma_outb((pagenr >> 8), DMA_HIPAGE_0);
+			break;
+		case 1:
+			dma_outb(pagenr, DMA_PAGE_1);
+			dma_outb((pagenr >> 8), DMA_HIPAGE_1);
+			break;
+		case 2:
+			dma_outb(pagenr, DMA_PAGE_2);
+			dma_outb((pagenr >> 8), DMA_HIPAGE_2);
+			break;
+		case 3:
+			dma_outb(pagenr, DMA_PAGE_3);
+			dma_outb((pagenr >> 8), DMA_HIPAGE_3);
+			break;
+		case 5:
+			dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+			dma_outb((pagenr >> 8), DMA_HIPAGE_5);
+			break;
+		case 6:
+			dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+			dma_outb((pagenr >> 8), DMA_HIPAGE_6);
+			break;
+		case 7:
+			dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+			dma_outb((pagenr >> 8), DMA_HIPAGE_7);
+			break;
+	}
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+	if (dmanr <= 3)  {
+	    dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+            dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+	}  else  {
+	    dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+	    dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+	}
+	set_dma_page(dmanr, a>>16);	/* set hipage last to enable 32-bit mode */
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+        count--;
+	if (dmanr <= 3)  {
+	    dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+	    dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+        } else {
+	    dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+	    dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+        }
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
+					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
+
+	/* using short to get 16-bit wrap around */
+	unsigned short count;
+
+	count = 1 + dma_inb(io_port);
+	count += dma_inb(io_port) << 8;
+	
+	return (dmanr<=3)? count : (count<<1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char * device_id);	/* reserve a DMA channel */
+extern void free_dma(unsigned int dmanr);	/* release it again */
+#define KERNEL_HAVE_CHECK_DMA
+extern int check_dma(unsigned int dmanr);
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy 	(0)
+#endif
+
+
+#endif /* _ASM_DMA_H */
diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h
new file mode 100644
index 0000000000..8049997fa3
--- /dev/null
+++ b/arch/alpha/include/asm/elf.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALPHA_ELF_H
+#define __ASM_ALPHA_ELF_H
+
+#include <asm/auxvec.h>
+#include <asm/special_insns.h>
+
+/* Special values for the st_other field in the symbol table.  */
+
+#define STO_ALPHA_NOPV		0x80
+#define STO_ALPHA_STD_GPLOAD	0x88
+
+/*
+ * Alpha ELF relocation types
+ */
+#define R_ALPHA_NONE            0       /* No reloc */
+#define R_ALPHA_REFLONG         1       /* Direct 32 bit */
+#define R_ALPHA_REFQUAD         2       /* Direct 64 bit */
+#define R_ALPHA_GPREL32         3       /* GP relative 32 bit */
+#define R_ALPHA_LITERAL         4       /* GP relative 16 bit w/optimization */
+#define R_ALPHA_LITUSE          5       /* Optimization hint for LITERAL */
+#define R_ALPHA_GPDISP          6       /* Add displacement to GP */
+#define R_ALPHA_BRADDR          7       /* PC+4 relative 23 bit shifted */
+#define R_ALPHA_HINT            8       /* PC+4 relative 16 bit shifted */
+#define R_ALPHA_SREL16          9       /* PC relative 16 bit */
+#define R_ALPHA_SREL32          10      /* PC relative 32 bit */
+#define R_ALPHA_SREL64          11      /* PC relative 64 bit */
+#define R_ALPHA_GPRELHIGH       17      /* GP relative 32 bit, high 16 bits */
+#define R_ALPHA_GPRELLOW        18      /* GP relative 32 bit, low 16 bits */
+#define R_ALPHA_GPREL16         19      /* GP relative 16 bit */
+#define R_ALPHA_COPY            24      /* Copy symbol at runtime */
+#define R_ALPHA_GLOB_DAT        25      /* Create GOT entry */
+#define R_ALPHA_JMP_SLOT        26      /* Create PLT entry */
+#define R_ALPHA_RELATIVE        27      /* Adjust by program base */
+#define R_ALPHA_BRSGP		28
+#define R_ALPHA_TLSGD           29
+#define R_ALPHA_TLS_LDM         30
+#define R_ALPHA_DTPMOD64        31
+#define R_ALPHA_GOTDTPREL       32
+#define R_ALPHA_DTPREL64        33
+#define R_ALPHA_DTPRELHI        34
+#define R_ALPHA_DTPRELLO        35
+#define R_ALPHA_DTPREL16        36
+#define R_ALPHA_GOTTPREL        37
+#define R_ALPHA_TPREL64         38
+#define R_ALPHA_TPRELHI         39
+#define R_ALPHA_TPRELLO         40
+#define R_ALPHA_TPREL16         41
+
+#define SHF_ALPHA_GPREL		0x10000000
+
+/* Legal values for e_flags field of Elf64_Ehdr.  */
+
+#define EF_ALPHA_32BIT		1	/* All addresses are below 2GB */
+
+/*
+ * ELF register definitions..
+ */
+
+/*
+ * The OSF/1 version of <sys/procfs.h> makes gregset_t 46 entries long.
+ * I have no idea why that is so.  For now, we just leave it at 33
+ * (32 general regs + processor status word). 
+ */
+#define ELF_NGREG	33
+#define ELF_NFPREG	32
+
+typedef unsigned long elf_greg_t;
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef double elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS64
+#define ELF_DATA	ELFDATA2LSB
+#define ELF_ARCH	EM_ALPHA
+
+#define ELF_EXEC_PAGESIZE	8192
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
+
+/* $0 is set by ld.so to a pointer to a function which might be 
+   registered using atexit.  This provides a mean for the dynamic
+   linker to call DT_FINI functions for shared libraries that have
+   been loaded before the code runs.
+
+   So that we can use the same startup file with static executables,
+   we start programs with a value of 0 to indicate that there is no
+   such function.  */
+
+#define ELF_PLAT_INIT(_r, load_addr)	_r->r0 = 0
+
+/* The registers are laid out in pt_regs for PAL and syscall
+   convenience.  Re-order them for the linear elf_gregset_t.  */
+
+struct pt_regs;
+struct thread_info;
+struct task_struct;
+extern void dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt,
+			    struct thread_info *ti);
+#define ELF_CORE_COPY_REGS(DEST, REGS) \
+	dump_elf_thread(DEST, REGS, current_thread_info());
+
+/* Similar, but for a thread other than current.  */
+
+extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task);
+#define ELF_CORE_COPY_TASK_REGS(TASK, DEST) \
+	dump_elf_task(*(DEST), TASK)
+
+/* Similar, but for the FP registers.  */
+
+extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task);
+#define ELF_CORE_COPY_FPREGS(TASK, DEST) \
+	dump_elf_task_fp(*(DEST), TASK)
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this CPU supports.  This is trivial on Alpha, 
+   but not so on other machines. */
+
+#define ELF_HWCAP  (~amask(-1))
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.  */
+
+#define ELF_PLATFORM				\
+({						\
+	enum implver_enum i_ = implver();	\
+	( i_ == IMPLVER_EV4 ? "ev4"		\
+	: i_ == IMPLVER_EV5			\
+	  ? (amask(AMASK_BWX) ? "ev5" : "ev56")	\
+	: amask (AMASK_CIX) ? "ev6" : "ev67");	\
+})
+
+#define SET_PERSONALITY(EX)					\
+	set_personality(((EX).e_flags & EF_ALPHA_32BIT)		\
+	   ? PER_LINUX_32BIT : PER_LINUX)
+
+extern int alpha_l1i_cacheshape;
+extern int alpha_l1d_cacheshape;
+extern int alpha_l2_cacheshape;
+extern int alpha_l3_cacheshape;
+
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+#define ARCH_DLINFO						\
+  do {								\
+    NEW_AUX_ENT(AT_L1I_CACHESHAPE, alpha_l1i_cacheshape);	\
+    NEW_AUX_ENT(AT_L1D_CACHESHAPE, alpha_l1d_cacheshape);	\
+    NEW_AUX_ENT(AT_L2_CACHESHAPE, alpha_l2_cacheshape);		\
+    NEW_AUX_ENT(AT_L3_CACHESHAPE, alpha_l3_cacheshape);		\
+  } while (0)
+
+#endif /* __ASM_ALPHA_ELF_H */
diff --git a/arch/alpha/include/asm/emergency-restart.h b/arch/alpha/include/asm/emergency-restart.h
new file mode 100644
index 0000000000..108d8c48e4
--- /dev/null
+++ b/arch/alpha/include/asm/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_EMERGENCY_RESTART_H
+#define _ASM_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/alpha/include/asm/err_common.h b/arch/alpha/include/asm/err_common.h
new file mode 100644
index 0000000000..610e01fe73
--- /dev/null
+++ b/arch/alpha/include/asm/err_common.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	linux/include/asm-alpha/err_common.h
+ *
+ *	Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation)
+ *
+ *	Contains declarations and macros to support Alpha error handling
+ * 	implementations.
+ */
+
+#ifndef __ALPHA_ERR_COMMON_H
+#define __ALPHA_ERR_COMMON_H 1
+
+/*
+ * SCB Vector definitions
+ */
+#define SCB_Q_SYSERR	0x620
+#define SCB_Q_PROCERR	0x630
+#define SCB_Q_SYSMCHK	0x660
+#define SCB_Q_PROCMCHK	0x670
+#define SCB_Q_SYSEVENT	0x680
+
+/*
+ * Disposition definitions for logout frame parser
+ */
+#define MCHK_DISPOSITION_UNKNOWN_ERROR		0x00
+#define MCHK_DISPOSITION_REPORT			0x01
+#define MCHK_DISPOSITION_DISMISS		0x02
+
+/*
+ * Error Log definitions
+ */
+/*
+ * Types
+ */
+
+#define EL_CLASS__TERMINATION		(0)
+#  define EL_TYPE__TERMINATION__TERMINATION		(0)
+#define EL_CLASS__HEADER		(5)
+#  define EL_TYPE__HEADER__SYSTEM_ERROR_FRAME		(1)
+#  define EL_TYPE__HEADER__SYSTEM_EVENT_FRAME		(2)
+#  define EL_TYPE__HEADER__HALT_FRAME			(3)
+#  define EL_TYPE__HEADER__LOGOUT_FRAME			(19)
+#define EL_CLASS__GENERAL_NOTIFICATION	(9)
+#define EL_CLASS__PCI_ERROR_FRAME	(11)
+#define EL_CLASS__REGATTA_FAMILY	(12)
+#  define EL_TYPE__REGATTA__PROCESSOR_ERROR_FRAME	(1)
+#  define EL_TYPE__REGATTA__SYSTEM_ERROR_FRAME		(2)
+#  define EL_TYPE__REGATTA__ENVIRONMENTAL_FRAME		(3)
+#  define EL_TYPE__REGATTA__TITAN_PCHIP0_EXTENDED	(8)
+#  define EL_TYPE__REGATTA__TITAN_PCHIP1_EXTENDED	(9)
+#  define EL_TYPE__REGATTA__TITAN_MEMORY_EXTENDED	(10)
+#  define EL_TYPE__REGATTA__PROCESSOR_DBL_ERROR_HALT	(11)
+#  define EL_TYPE__REGATTA__SYSTEM_DBL_ERROR_HALT	(12)
+#define EL_CLASS__PAL                   (14)
+#  define EL_TYPE__PAL__LOGOUT_FRAME                    (1)
+#  define EL_TYPE__PAL__EV7_PROCESSOR			(4)
+#  define EL_TYPE__PAL__EV7_ZBOX			(5)
+#  define EL_TYPE__PAL__EV7_RBOX			(6)
+#  define EL_TYPE__PAL__EV7_IO				(7)
+#  define EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE	(10)
+#  define EL_TYPE__PAL__ENV__AIRMOVER_FAN		(11)
+#  define EL_TYPE__PAL__ENV__VOLTAGE			(12)
+#  define EL_TYPE__PAL__ENV__INTRUSION			(13)
+#  define EL_TYPE__PAL__ENV__POWER_SUPPLY		(14)
+#  define EL_TYPE__PAL__ENV__LAN			(15)
+#  define EL_TYPE__PAL__ENV__HOT_PLUG			(16)
+
+union el_timestamp {
+	struct {
+		u8 second;
+		u8 minute;
+		u8 hour;
+		u8 day;
+		u8 month;
+		u8 year;
+	} b;
+	u64 as_int;
+};
+
+struct el_subpacket {
+	u16 length;		/* length of header (in bytes)	*/
+	u16 class;		/* header class and type...   	*/
+	u16 type;		/* ...determine content     	*/
+	u16 revision;		/* header revision 		*/
+	union {
+		struct {	/* Class 5, Type 1 - System Error	*/
+			u32 frame_length;
+			u32 frame_packet_count;			
+		} sys_err;			
+		struct {	/* Class 5, Type 2 - System Event 	*/
+			union el_timestamp timestamp;
+			u32 frame_length;
+			u32 frame_packet_count;			
+		} sys_event;
+		struct {	/* Class 5, Type 3 - Double Error Halt	*/
+			u16 halt_code;
+			u16 reserved;
+			union el_timestamp timestamp;
+			u32 frame_length;
+			u32 frame_packet_count;
+		} err_halt;
+		struct {	/* Clasee 5, Type 19 - Logout Frame Header */
+			u32 frame_length;
+			u32 frame_flags;
+			u32 cpu_offset;	
+			u32 system_offset;
+		} logout_header;
+		struct {	/* Class 12 - Regatta			*/
+			u64 cpuid;
+			u64 data_start[1];
+		} regatta_frame;
+		struct {	/* Raw 				        */
+			u64 data_start[1];
+		} raw;
+	} by_type;
+};
+
+#endif /* __ALPHA_ERR_COMMON_H */
diff --git a/arch/alpha/include/asm/err_ev6.h b/arch/alpha/include/asm/err_ev6.h
new file mode 100644
index 0000000000..ea637791e4
--- /dev/null
+++ b/arch/alpha/include/asm/err_ev6.h
@@ -0,0 +1,6 @@
+#ifndef __ALPHA_ERR_EV6_H
+#define __ALPHA_ERR_EV6_H 1
+
+/* Dummy include for now. */
+
+#endif /* __ALPHA_ERR_EV6_H */
diff --git a/arch/alpha/include/asm/err_ev7.h b/arch/alpha/include/asm/err_ev7.h
new file mode 100644
index 0000000000..ab17e63337
--- /dev/null
+++ b/arch/alpha/include/asm/err_ev7.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_ERR_EV7_H
+#define __ALPHA_ERR_EV7_H 1
+
+/*
+ * Data for el packet class PAL (14), type LOGOUT_FRAME (1)
+ */
+struct ev7_pal_logout_subpacket {
+	u32 mchk_code;
+	u32 subpacket_count;
+	u64 whami;
+	u64 rbox_whami;
+	u64 rbox_int;
+	u64 exc_addr;
+	union el_timestamp timestamp;
+	u64 halt_code;
+	u64 reserved;
+};
+
+/*
+ * Data for el packet class PAL (14), type EV7_PROCESSOR (4)
+ */
+struct ev7_pal_processor_subpacket {
+	u64 i_stat;
+	u64 dc_stat;
+	u64 c_addr;
+	u64 c_syndrome_1;
+	u64 c_syndrome_0;
+	u64 c_stat;
+	u64 c_sts;
+	u64 mm_stat;
+	u64 exc_addr;
+	u64 ier_cm;
+	u64 isum;
+	u64 pal_base;
+	u64 i_ctl;
+	u64 process_context;
+	u64 cbox_ctl;
+	u64 cbox_stp_ctl;
+	u64 cbox_acc_ctl;
+	u64 cbox_lcl_set;
+	u64 cbox_gbl_set;
+	u64 bbox_ctl;
+	u64 bbox_err_sts;
+	u64 bbox_err_idx;
+	u64 cbox_ddp_err_sts;
+	u64 bbox_dat_rmp;
+	u64 reserved[2];
+};
+
+/*
+ * Data for el packet class PAL (14), type EV7_ZBOX (5)
+ */
+struct ev7_pal_zbox_subpacket {
+	u32 zbox0_dram_err_status_1;
+	u32 zbox0_dram_err_status_2;
+	u32 zbox0_dram_err_status_3;
+	u32 zbox0_dram_err_ctl;
+	u32 zbox0_dram_err_adr;
+	u32 zbox0_dift_timeout;
+	u32 zbox0_dram_mapper_ctl;
+	u32 zbox0_frc_err_adr;
+	u32 zbox0_dift_err_status;
+	u32 reserved1;
+	u32 zbox1_dram_err_status_1;
+	u32 zbox1_dram_err_status_2;
+	u32 zbox1_dram_err_status_3;
+	u32 zbox1_dram_err_ctl;
+	u32 zbox1_dram_err_adr;
+	u32 zbox1_dift_timeout;
+	u32 zbox1_dram_mapper_ctl;
+	u32 zbox1_frc_err_adr;
+	u32 zbox1_dift_err_status;
+	u32 reserved2;
+	u64 cbox_ctl;
+	u64 cbox_stp_ctl;
+	u64 zbox0_error_pa;
+	u64 zbox1_error_pa;
+	u64 zbox0_ored_syndrome;
+	u64 zbox1_ored_syndrome;
+	u64 reserved3[2];
+};
+
+/*
+ * Data for el packet class PAL (14), type EV7_RBOX (6)
+ */
+struct ev7_pal_rbox_subpacket {
+	u64 rbox_cfg;
+	u64 rbox_n_cfg;
+	u64 rbox_s_cfg;
+	u64 rbox_e_cfg;
+	u64 rbox_w_cfg;
+	u64 rbox_n_err;
+	u64 rbox_s_err;
+	u64 rbox_e_err;
+	u64 rbox_w_err;
+	u64 rbox_io_cfg;
+	u64 rbox_io_err;
+	u64 rbox_l_err;
+	u64 rbox_whoami;
+	u64 rbox_imask;
+	u64 rbox_intq;
+	u64 rbox_int;
+	u64 reserved[2];
+};
+
+/*
+ * Data for el packet class PAL (14), type EV7_IO (7)
+ */
+struct ev7_pal_io_one_port {
+	u64 pox_err_sum;
+	u64 pox_tlb_err;
+	u64 pox_spl_cmplt;
+	u64 pox_trans_sum;
+	u64 pox_first_err;
+	u64 pox_mult_err;
+	u64 pox_dm_source;
+	u64 pox_dm_dest;
+	u64 pox_dm_size;
+	u64 pox_dm_ctrl;
+	u64 reserved;
+};
+
+struct ev7_pal_io_subpacket {
+	u64 io_asic_rev;
+	u64 io_sys_rev;
+	u64 io7_uph;
+	u64 hpi_ctl;
+	u64 crd_ctl;
+	u64 hei_ctl;
+	u64 po7_error_sum;
+	u64 po7_uncrr_sym;
+	u64 po7_crrct_sym;
+	u64 po7_ugbge_sym;
+	u64 po7_err_pkt0;
+	u64 po7_err_pkt1;
+	u64 reserved[2];
+	struct ev7_pal_io_one_port ports[4];
+};
+
+/*
+ * Environmental subpacket. Data used for el packets:
+ * 	   class PAL (14), type AMBIENT_TEMPERATURE (10)
+ * 	   class PAL (14), type AIRMOVER_FAN (11)
+ * 	   class PAL (14), type VOLTAGE (12)
+ * 	   class PAL (14), type INTRUSION (13)
+ *	   class PAL (14), type POWER_SUPPLY (14)
+ *	   class PAL (14), type LAN (15)
+ *	   class PAL (14), type HOT_PLUG (16)
+ */
+struct ev7_pal_environmental_subpacket {
+	u16 cabinet;
+	u16 drawer;
+	u16 reserved1[2];
+	u8 module_type;
+	u8 unit_id;		/* unit reporting condition */
+	u8 reserved2;
+	u8 condition;		/* condition reported       */
+};
+
+/*
+ * Convert environmental type to index
+ */
+static inline int ev7_lf_env_index(int type)
+{
+	BUG_ON((type < EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE) 
+	       || (type > EL_TYPE__PAL__ENV__HOT_PLUG));
+
+	return type - EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE;
+}
+
+/*
+ * Data for generic el packet class PAL.
+ */
+struct ev7_pal_subpacket {
+	union {
+		struct ev7_pal_logout_subpacket logout;	     /* Type     1 */
+		struct ev7_pal_processor_subpacket ev7;	     /* Type     4 */
+		struct ev7_pal_zbox_subpacket zbox;	     /* Type     5 */
+		struct ev7_pal_rbox_subpacket rbox;	     /* Type     6 */
+		struct ev7_pal_io_subpacket io;		     /* Type     7 */
+		struct ev7_pal_environmental_subpacket env;  /* Type 10-16 */
+		u64 as_quad[1];				     /* Raw u64    */
+	} by_type;
+};
+
+/*
+ * Struct to contain collected logout from subpackets.
+ */
+struct ev7_lf_subpackets {
+	struct ev7_pal_logout_subpacket *logout;		/* Type  1 */
+	struct ev7_pal_processor_subpacket *ev7;		/* Type  4 */
+	struct ev7_pal_zbox_subpacket *zbox;			/* Type  5 */
+	struct ev7_pal_rbox_subpacket *rbox;			/* Type  6 */
+	struct ev7_pal_io_subpacket *io;			/* Type  7 */
+	struct ev7_pal_environmental_subpacket *env[7];	     /* Type 10-16 */
+
+	unsigned int io_pid;
+};
+
+#endif /* __ALPHA_ERR_EV7_H */
+
+
diff --git a/arch/alpha/include/asm/extable.h b/arch/alpha/include/asm/extable.h
new file mode 100644
index 0000000000..e42592390e
--- /dev/null
+++ b/arch/alpha/include/asm/extable.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_EXTABLE_H
+#define _ASM_EXTABLE_H
+
+/*
+ * About the exception table:
+ *
+ * - insn is a 32-bit pc-relative offset from the faulting insn.
+ * - nextinsn is a 16-bit offset off of the faulting instruction
+ *   (not off of the *next* instruction as branches are).
+ * - errreg is the register in which to place -EFAULT.
+ * - valreg is the final target register for the load sequence
+ *   and will be zeroed.
+ *
+ * Either errreg or valreg may be $31, in which case nothing happens.
+ *
+ * The exception fixup information "just so happens" to be arranged
+ * as in a MEM format instruction.  This lets us emit our three
+ * values like so:
+ *
+ *      lda valreg, nextinsn(errreg)
+ *
+ */
+
+struct exception_table_entry
+{
+	signed int insn;
+	union exception_fixup {
+		unsigned unit;
+		struct {
+			signed int nextinsn : 16;
+			unsigned int errreg : 5;
+			unsigned int valreg : 5;
+		} bits;
+	} fixup;
+};
+
+/* Returns the new pc */
+#define fixup_exception(map_reg, _fixup, pc)			\
+({								\
+	if ((_fixup)->fixup.bits.valreg != 31)			\
+		map_reg((_fixup)->fixup.bits.valreg) = 0;	\
+	if ((_fixup)->fixup.bits.errreg != 31)			\
+		map_reg((_fixup)->fixup.bits.errreg) = -EFAULT;	\
+	(pc) + (_fixup)->fixup.bits.nextinsn;			\
+})
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#define swap_ex_entry_fixup(a, b, tmp, delta)			\
+	do {							\
+		(a)->fixup.unit = (b)->fixup.unit;		\
+		(b)->fixup.unit = (tmp).fixup.unit;		\
+	} while (0)
+
+#endif
diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h
new file mode 100644
index 0000000000..8dfdb3aa1d
--- /dev/null
+++ b/arch/alpha/include/asm/floppy.h
@@ -0,0 +1,113 @@
+/*
+ * Architecture specific parts of the Floppy driver
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995
+ */
+#ifndef __ASM_ALPHA_FLOPPY_H
+#define __ASM_ALPHA_FLOPPY_H
+
+
+#define fd_inb(base, reg)		inb_p((base) + (reg))
+#define fd_outb(value, base, reg)	outb_p(value, (base) + (reg))
+
+#define fd_enable_dma()         enable_dma(FLOPPY_DMA)
+#define fd_disable_dma()        disable_dma(FLOPPY_DMA)
+#define fd_request_dma()        request_dma(FLOPPY_DMA,"floppy")
+#define fd_free_dma()           free_dma(FLOPPY_DMA)
+#define fd_clear_dma_ff()       clear_dma_ff(FLOPPY_DMA)
+#define fd_set_dma_mode(mode)   set_dma_mode(FLOPPY_DMA,mode)
+#define fd_set_dma_addr(addr)   set_dma_addr(FLOPPY_DMA,virt_to_bus(addr))
+#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA,count)
+#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
+#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
+#define fd_request_irq()        request_irq(FLOPPY_IRQ, floppy_interrupt,\
+					    0, "floppy", NULL)
+#define fd_free_irq()           free_irq(FLOPPY_IRQ, NULL)
+
+#ifdef CONFIG_PCI
+
+#include <linux/pci.h>
+
+#define fd_dma_setup(addr,size,mode,io) alpha_fd_dma_setup(addr,size,mode,io)
+
+static __inline__ int 
+alpha_fd_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+	static unsigned long prev_size;
+	static dma_addr_t bus_addr = 0;
+	static char *prev_addr;
+	static int prev_dir;
+	int dir;
+
+	dir = (mode != DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE;
+
+	if (bus_addr 
+	    && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
+		/* different from last time -- unmap prev */
+		pci_unmap_single(isa_bridge, bus_addr, prev_size, prev_dir);
+		bus_addr = 0;
+	}
+
+	if (!bus_addr)	/* need to map it */
+		bus_addr = pci_map_single(isa_bridge, addr, size, dir);
+
+	/* remember this one as prev */
+	prev_addr = addr;
+	prev_size = size;
+	prev_dir = dir;
+
+	fd_clear_dma_ff();
+	fd_set_dma_mode(mode);
+	set_dma_addr(FLOPPY_DMA, bus_addr);
+	fd_set_dma_count(size);
+	virtual_dma_port = io;
+	fd_enable_dma();
+
+	return 0;
+}
+
+#endif /* CONFIG_PCI */
+
+__inline__ void virtual_dma_init(void)
+{
+	/* Nothing to do on an Alpha */
+}
+
+static int FDC1 = 0x3f0;
+static int FDC2 = -1;
+
+/*
+ * Again, the CMOS information doesn't work on the alpha..
+ */
+#define FLOPPY0_TYPE 6
+#define FLOPPY1_TYPE 0
+
+#define N_FDC 2
+#define N_DRIVE 8
+
+/*
+ * Most Alphas have no problems with floppy DMA crossing 64k borders,
+ * except for certain ones, like XL and RUFFIAN.
+ *
+ * However, the test is simple and fast, and this *is* floppy, after all,
+ * so we do it for all platforms, just to make sure.
+ *
+ * This is advantageous in other circumstances as well, as in moving
+ * about the PCI DMA windows and forcing the floppy to start doing
+ * scatter-gather when it never had before, and there *is* a problem
+ * on that platform... ;-}
+ */
+
+static inline unsigned long CROSS_64KB(void *a, unsigned long s)
+{
+	unsigned long p = (unsigned long)a;
+	return ((p + s - 1) ^ p) & ~0xffffUL;
+}
+
+#define EXTRA_FLOPPY_PARAMS
+
+#endif /* __ASM_ALPHA_FLOPPY_H */
diff --git a/arch/alpha/include/asm/fpu.h b/arch/alpha/include/asm/fpu.h
new file mode 100644
index 0000000000..b9691405e5
--- /dev/null
+++ b/arch/alpha/include/asm/fpu.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALPHA_FPU_H
+#define __ASM_ALPHA_FPU_H
+
+#include <asm/special_insns.h>
+#include <uapi/asm/fpu.h>
+
+/* The following two functions don't need trapb/excb instructions
+   around the mf_fpcr/mt_fpcr instructions because (a) the kernel
+   never generates arithmetic faults and (b) call_pal instructions
+   are implied trap barriers.  */
+
+static inline unsigned long
+rdfpcr(void)
+{
+	unsigned long tmp, ret;
+
+#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
+	__asm__ __volatile__ (
+		"ftoit $f0,%0\n\t"
+		"mf_fpcr $f0\n\t"
+		"ftoit $f0,%1\n\t"
+		"itoft %0,$f0"
+		: "=r"(tmp), "=r"(ret));
+#else
+	__asm__ __volatile__ (
+		"stt $f0,%0\n\t"
+		"mf_fpcr $f0\n\t"
+		"stt $f0,%1\n\t"
+		"ldt $f0,%0"
+		: "=m"(tmp), "=m"(ret));
+#endif
+
+	return ret;
+}
+
+static inline void
+wrfpcr(unsigned long val)
+{
+	unsigned long tmp;
+
+#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
+	__asm__ __volatile__ (
+		"ftoit $f0,%0\n\t"
+		"itoft %1,$f0\n\t"
+		"mt_fpcr $f0\n\t"
+		"itoft %0,$f0"
+		: "=&r"(tmp) : "r"(val));
+#else
+	__asm__ __volatile__ (
+		"stt $f0,%0\n\t"
+		"ldt $f0,%1\n\t"
+		"mt_fpcr $f0\n\t"
+		"ldt $f0,%0"
+		: "=m"(tmp) : "m"(val));
+#endif
+}
+
+static inline unsigned long
+swcr_update_status(unsigned long swcr, unsigned long fpcr)
+{
+	/* EV6 implements most of the bits in hardware.  Collect
+	   the acrued exception bits from the real fpcr.  */
+	if (implver() == IMPLVER_EV6) {
+		swcr &= ~IEEE_STATUS_MASK;
+		swcr |= (fpcr >> 35) & IEEE_STATUS_MASK;
+	}
+	return swcr;
+}
+
+extern unsigned long alpha_read_fp_reg (unsigned long reg);
+extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
+extern unsigned long alpha_read_fp_reg_s (unsigned long reg);
+extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);
+
+#endif /* __ASM_ALPHA_FPU_H */
diff --git a/arch/alpha/include/asm/ftrace.h b/arch/alpha/include/asm/ftrace.h
new file mode 100644
index 0000000000..40a8c178f1
--- /dev/null
+++ b/arch/alpha/include/asm/ftrace.h
@@ -0,0 +1 @@
+/* empty */
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
new file mode 100644
index 0000000000..da67afd578
--- /dev/null
+++ b/arch/alpha/include/asm/futex.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALPHA_FUTEX_H
+#define _ASM_ALPHA_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+#include <asm/barrier.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)	\
+	__asm__ __volatile__(					\
+		__ASM_SMP_MB					\
+	"1:	ldl_l	%0,0(%2)\n"				\
+		insn						\
+	"2:	stl_c	%1,0(%2)\n"				\
+	"	beq	%1,4f\n"				\
+	"	mov	$31,%1\n"				\
+	"3:	.subsection 2\n"				\
+	"4:	br	1b\n"					\
+	"	.previous\n"					\
+	EXC(1b,3b,$31,%1)					\
+	EXC(2b,3b,$31,%1)					\
+	:	"=&r" (oldval), "=&r"(ret)			\
+	:	"r" (uaddr), "r"(oparg)				\
+	:	"memory")
+
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
+{
+	int oldval = 0, ret;
+
+	if (!access_ok(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("mov %3,%1\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("addl %0,%3,%1\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("or %0,%3,%1\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("andnot %0,%3,%1\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("xor %0,%3,%1\n", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	if (!ret)
+		*oval = oldval;
+
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	int ret = 0, cmp;
+	u32 prev;
+
+	if (!access_ok(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	__asm__ __volatile__ (
+		__ASM_SMP_MB
+	"1:	ldl_l	%1,0(%3)\n"
+	"	cmpeq	%1,%4,%2\n"
+	"	beq	%2,3f\n"
+	"	mov	%5,%2\n"
+	"2:	stl_c	%2,0(%3)\n"
+	"	beq	%2,4f\n"
+	"3:	.subsection 2\n"
+	"4:	br	1b\n"
+	"	.previous\n"
+	EXC(1b,3b,$31,%0)
+	EXC(2b,3b,$31,%0)
+	:	"+r"(ret), "=&r"(prev), "=&r"(cmp)
+	:	"r"(uaddr), "r"((long)(int)oldval), "r"(newval)
+	:	"memory");
+
+	*uval = prev;
+	return ret;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_ALPHA_FUTEX_H */
diff --git a/arch/alpha/include/asm/gct.h b/arch/alpha/include/asm/gct.h
new file mode 100644
index 0000000000..2847449fb1
--- /dev/null
+++ b/arch/alpha/include/asm/gct.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_GCT_H
+#define __ALPHA_GCT_H
+
+typedef u64 gct_id;
+typedef u64 gct6_handle;
+
+typedef struct __gct6_node {
+	u8 type;	
+	u8 subtype;
+	u16 size;
+	u32 hd_extension;
+	gct6_handle owner;
+	gct6_handle active_user;
+	gct_id id;
+	u64 flags;
+	u16 rev;
+	u16 change_counter;
+	u16 max_child;
+	u16 reserved1;
+	gct6_handle saved_owner;
+	gct6_handle affinity;
+	gct6_handle parent;
+	gct6_handle next;
+	gct6_handle prev;
+	gct6_handle child;
+	u64 fw_flags;
+	u64 os_usage;
+	u64 fru_id;
+	u32 checksum;
+	u32 magic;	/* 'GLXY' */
+} gct6_node;
+
+typedef struct {
+	u8 type;	
+	u8 subtype;
+	void (*callout)(gct6_node *);
+} gct6_search_struct;
+
+#define GCT_NODE_MAGIC	  0x59584c47	/* 'GLXY' */
+
+/* 
+ * node types 
+ */
+#define GCT_TYPE_HOSE			0x0E
+
+/*
+ * node subtypes
+ */
+#define GCT_SUBTYPE_IO_PORT_MODULE	0x2C
+
+#define GCT_NODE_PTR(off) ((gct6_node *)((char *)hwrpb + 		\
+					 hwrpb->frut_offset + 		\
+					 (gct6_handle)(off)))		\
+
+int gct6_find_nodes(gct6_node *, gct6_search_struct *);
+
+#endif /* __ALPHA_GCT_H */
+
diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h
new file mode 100644
index 0000000000..5ce5b34e8a
--- /dev/null
+++ b/arch/alpha/include/asm/hardirq.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_HARDIRQ_H
+#define _ALPHA_HARDIRQ_H
+
+void ack_bad_irq(unsigned int irq);
+#define ack_bad_irq ack_bad_irq
+
+#include <asm-generic/hardirq.h>
+
+#endif /* _ALPHA_HARDIRQ_H */
diff --git a/arch/alpha/include/asm/hw_irq.h b/arch/alpha/include/asm/hw_irq.h
new file mode 100644
index 0000000000..e2d81ac0d9
--- /dev/null
+++ b/arch/alpha/include/asm/hw_irq.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_HW_IRQ_H
+#define _ALPHA_HW_IRQ_H
+
+
+extern volatile unsigned long irq_err_count;
+DECLARE_PER_CPU(unsigned long, irq_pmi_count);
+
+#ifdef CONFIG_ALPHA_GENERIC
+#define ACTUAL_NR_IRQS	alpha_mv.nr_irqs
+#else
+#define ACTUAL_NR_IRQS	NR_IRQS
+#endif
+
+#endif
diff --git a/arch/alpha/include/asm/hwrpb.h b/arch/alpha/include/asm/hwrpb.h
new file mode 100644
index 0000000000..d8180e527a
--- /dev/null
+++ b/arch/alpha/include/asm/hwrpb.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_HWRPB_H
+#define __ALPHA_HWRPB_H
+
+#define INIT_HWRPB ((struct hwrpb_struct *) 0x10000000)
+
+/*
+ * DEC processor types for Alpha systems.  Found in HWRPB.
+ * These values are architected.
+ */
+
+#define EV3_CPU                 1       /* EV3                  */
+#define EV4_CPU                 2       /* EV4 (21064)          */
+#define LCA4_CPU                4       /* LCA4 (21066/21068)   */
+#define EV5_CPU                 5       /* EV5 (21164)          */
+#define EV45_CPU                6       /* EV4.5 (21064/xxx)    */
+#define EV56_CPU		7	/* EV5.6 (21164)	*/
+#define EV6_CPU			8	/* EV6 (21264)		*/
+#define PCA56_CPU		9	/* PCA56 (21164PC)	*/
+#define PCA57_CPU		10	/* PCA57 (notyet)	*/
+#define EV67_CPU		11	/* EV67 (21264A)	*/
+#define EV68CB_CPU		12	/* EV68CB (21264C)	*/
+#define EV68AL_CPU		13	/* EV68AL (21264B)	*/
+#define EV68CX_CPU		14	/* EV68CX (21264D)	*/
+#define EV7_CPU			15	/* EV7 (21364)		*/
+#define EV79_CPU		16	/* EV79 (21364??)	*/
+#define EV69_CPU		17	/* EV69 (21264/EV69A)	*/
+
+/*
+ * DEC system types for Alpha systems.  Found in HWRPB.
+ * These values are architected.
+ */
+
+#define ST_ADU			  1	/* Alpha ADU systype	*/
+#define ST_DEC_4000		  2	/* Cobra systype	*/
+#define ST_DEC_7000		  3	/* Ruby systype		*/
+#define ST_DEC_3000_500		  4	/* Flamingo systype	*/
+#define ST_DEC_2000_300		  6	/* Jensen systype	*/
+#define ST_DEC_3000_300		  7	/* Pelican systype	*/
+#define ST_DEC_2100_A500	  9	/* Sable systype	*/
+#define ST_DEC_AXPVME_64	 10	/* AXPvme system type	*/
+#define ST_DEC_AXPPCI_33	 11	/* NoName system type	*/
+#define ST_DEC_TLASER		 12	/* Turbolaser systype	*/
+#define ST_DEC_2100_A50		 13	/* Avanti systype	*/
+#define ST_DEC_MUSTANG		 14	/* Mustang systype	*/
+#define ST_DEC_ALCOR		 15	/* Alcor (EV5) systype	*/
+#define ST_DEC_1000		 17	/* Mikasa systype	*/
+#define ST_DEC_EB64		 18	/* EB64 systype		*/
+#define ST_DEC_EB66		 19	/* EB66 systype		*/
+#define ST_DEC_EB64P		 20	/* EB64+ systype	*/
+#define ST_DEC_BURNS		 21	/* laptop systype	*/
+#define ST_DEC_RAWHIDE		 22	/* Rawhide systype	*/
+#define ST_DEC_K2		 23	/* K2 systype		*/
+#define ST_DEC_LYNX		 24	/* Lynx systype		*/
+#define ST_DEC_XL		 25	/* Alpha XL systype	*/
+#define ST_DEC_EB164		 26	/* EB164 systype	*/
+#define ST_DEC_NORITAKE		 27	/* Noritake systype	*/
+#define ST_DEC_CORTEX		 28	/* Cortex systype	*/
+#define ST_DEC_MIATA		 30	/* Miata systype        */
+#define ST_DEC_XXM		 31	/* XXM systype		*/
+#define ST_DEC_TAKARA		 32	/* Takara systype	*/
+#define ST_DEC_YUKON		 33	/* Yukon systype	*/
+#define ST_DEC_TSUNAMI		 34	/* Tsunami systype	*/
+#define ST_DEC_WILDFIRE		 35	/* Wildfire systype	*/
+#define ST_DEC_CUSCO		 36	/* CUSCO systype	*/
+#define ST_DEC_EIGER		 37	/* Eiger systype	*/
+#define ST_DEC_TITAN		 38	/* Titan systype	*/
+#define ST_DEC_MARVEL		 39	/* Marvel systype	*/
+
+/* UNOFFICIAL!!! */
+#define ST_UNOFFICIAL_BIAS	100
+#define ST_DTI_RUFFIAN		101	/* RUFFIAN systype	*/
+
+/* Alpha Processor, Inc. systems */
+#define ST_API_BIAS		200
+#define ST_API_NAUTILUS		201	/* UP1000 systype	*/
+
+struct pcb_struct {
+	unsigned long ksp;
+	unsigned long usp;
+	unsigned long ptbr;
+	unsigned int pcc;
+	unsigned int asn;
+	unsigned long unique;
+	unsigned long flags;
+	unsigned long res1, res2;
+};
+
+struct percpu_struct {
+	unsigned long hwpcb[16];
+	unsigned long flags;
+	unsigned long pal_mem_size;
+	unsigned long pal_scratch_size;
+	unsigned long pal_mem_pa;
+	unsigned long pal_scratch_pa;
+	unsigned long pal_revision;
+	unsigned long type;
+	unsigned long variation;
+	unsigned long revision;
+	unsigned long serial_no[2];
+	unsigned long logout_area_pa;
+	unsigned long logout_area_len;
+	unsigned long halt_PCBB;
+	unsigned long halt_PC;
+	unsigned long halt_PS;
+	unsigned long halt_arg;
+	unsigned long halt_ra;
+	unsigned long halt_pv;
+	unsigned long halt_reason;
+	unsigned long res;
+	unsigned long ipc_buffer[21];
+	unsigned long palcode_avail[16];
+	unsigned long compatibility;
+	unsigned long console_data_log_pa;
+	unsigned long console_data_log_length;
+	unsigned long bcache_info;
+};
+
+struct procdesc_struct {
+	unsigned long weird_vms_stuff;
+	unsigned long address;
+};
+
+struct vf_map_struct {
+	unsigned long va;
+	unsigned long pa;
+	unsigned long count;
+};
+
+struct crb_struct {
+	struct procdesc_struct * dispatch_va;
+	struct procdesc_struct * dispatch_pa;
+	struct procdesc_struct * fixup_va;
+	struct procdesc_struct * fixup_pa;
+	/* virtual->physical map */
+	unsigned long map_entries;
+	unsigned long map_pages;
+	struct vf_map_struct map[1];
+};
+
+struct memclust_struct {
+	unsigned long start_pfn;
+	unsigned long numpages;
+	unsigned long numtested;
+	unsigned long bitmap_va;
+	unsigned long bitmap_pa;
+	unsigned long bitmap_chksum;
+	unsigned long usage;
+};
+
+struct memdesc_struct {
+	unsigned long chksum;
+	unsigned long optional_pa;
+	unsigned long numclusters;
+	struct memclust_struct cluster[0];
+};
+
+struct dsr_struct {
+	long smm;			/* SMM nubber used by LMF       */
+	unsigned long  lurt_off;	/* offset to LURT table         */
+	unsigned long  sysname_off;	/* offset to sysname char count */
+};
+
+struct hwrpb_struct {
+	unsigned long phys_addr;	/* check: physical address of the hwrpb */
+	unsigned long id;		/* check: "HWRPB\0\0\0" */
+	unsigned long revision;	
+	unsigned long size;		/* size of hwrpb */
+	unsigned long cpuid;
+	unsigned long pagesize;		/* 8192, I hope */
+	unsigned long pa_bits;		/* number of physical address bits */
+	unsigned long max_asn;
+	unsigned char ssn[16];		/* system serial number: big bother is watching */
+	unsigned long sys_type;
+	unsigned long sys_variation;
+	unsigned long sys_revision;
+	unsigned long intr_freq;	/* interval clock frequency * 4096 */
+	unsigned long cycle_freq;	/* cycle counter frequency */
+	unsigned long vptb;		/* Virtual Page Table Base address */
+	unsigned long res1;
+	unsigned long tbhb_offset;	/* Translation Buffer Hint Block */
+	unsigned long nr_processors;
+	unsigned long processor_size;
+	unsigned long processor_offset;
+	unsigned long ctb_nr;
+	unsigned long ctb_size;		/* console terminal block size */
+	unsigned long ctbt_offset;	/* console terminal block table offset */
+	unsigned long crb_offset;	/* console callback routine block */
+	unsigned long mddt_offset;	/* memory data descriptor table */
+	unsigned long cdb_offset;	/* configuration data block (or NULL) */
+	unsigned long frut_offset;	/* FRU table (or NULL) */
+	void (*save_terminal)(unsigned long);
+	unsigned long save_terminal_data;
+	void (*restore_terminal)(unsigned long);
+	unsigned long restore_terminal_data;
+	void (*CPU_restart)(unsigned long);
+	unsigned long CPU_restart_data;
+	unsigned long res2;
+	unsigned long res3;
+	unsigned long chksum;
+	unsigned long rxrdy;
+	unsigned long txrdy;
+	unsigned long dsr_offset;	/* "Dynamic System Recognition Data Block Table" */
+};
+
+#ifdef __KERNEL__
+
+extern struct hwrpb_struct *hwrpb;
+
+static inline void
+hwrpb_update_checksum(struct hwrpb_struct *h)
+{
+	unsigned long sum = 0, *l;
+        for (l = (unsigned long *) h; l < (unsigned long *) &h->chksum; ++l)
+                sum += *l;
+        h->chksum = sum;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_HWRPB_H */
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
new file mode 100644
index 0000000000..c9cb554fbe
--- /dev/null
+++ b/arch/alpha/include/asm/io.h
@@ -0,0 +1,607 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_IO_H
+#define __ALPHA_IO_H
+
+#ifdef __KERNEL__
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/compiler.h>
+#include <asm/machvec.h>
+#include <asm/hwrpb.h>
+
+/* The generic header contains only prototypes.  Including it ensures that
+   the implementation we have here matches that interface.  */
+#include <asm-generic/iomap.h>
+
+/* We don't use IO slowdowns on the Alpha, but.. */
+#define __SLOW_DOWN_IO	do { } while (0)
+#define SLOW_DOWN_IO	do { } while (0)
+
+/*
+ * Virtual -> physical identity mapping starts at this offset
+ */
+#ifdef USE_48_BIT_KSEG
+#define IDENT_ADDR     0xffff800000000000UL
+#else
+#define IDENT_ADDR     0xfffffc0000000000UL
+#endif
+
+/*
+ * We try to avoid hae updates (thus the cache), but when we
+ * do need to update the hae, we need to do it atomically, so
+ * that any interrupts wouldn't get confused with the hae
+ * register not being up-to-date with respect to the hardware
+ * value.
+ */
+extern inline void __set_hae(unsigned long new_hae)
+{
+	unsigned long flags = swpipl(IPL_MAX);
+
+	barrier();
+
+	alpha_mv.hae_cache = new_hae;
+	*alpha_mv.hae_register = new_hae;
+	mb();
+	/* Re-read to make sure it was written.  */
+	new_hae = *alpha_mv.hae_register;
+
+	setipl(flags);
+	barrier();
+}
+
+extern inline void set_hae(unsigned long new_hae)
+{
+	if (new_hae != alpha_mv.hae_cache)
+		__set_hae(new_hae);
+}
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ */
+#ifdef USE_48_BIT_KSEG
+static inline unsigned long virt_to_phys(volatile void *address)
+{
+	return (unsigned long)address - IDENT_ADDR;
+}
+
+static inline void * phys_to_virt(unsigned long address)
+{
+	return (void *) (address + IDENT_ADDR);
+}
+#else
+static inline unsigned long virt_to_phys(volatile void *address)
+{
+        unsigned long phys = (unsigned long)address;
+
+	/* Sign-extend from bit 41.  */
+	phys <<= (64 - 41);
+	phys = (long)phys >> (64 - 41);
+
+	/* Crop to the physical address width of the processor.  */
+        phys &= (1ul << hwrpb->pa_bits) - 1;
+
+        return phys;
+}
+
+static inline void * phys_to_virt(unsigned long address)
+{
+        return (void *)(IDENT_ADDR + (address & ((1ul << 41) - 1)));
+}
+#endif
+
+#define page_to_phys(page)	page_to_pa(page)
+
+/* Maximum PIO space address supported?  */
+#define IO_SPACE_LIMIT 0xffff
+
+/*
+ * Change addresses as seen by the kernel (virtual) to addresses as
+ * seen by a device (bus), and vice versa.
+ *
+ * Note that this only works for a limited range of kernel addresses,
+ * and very well may not span all memory.  Consider this interface 
+ * deprecated in favour of the DMA-mapping API.
+ */
+extern unsigned long __direct_map_base;
+extern unsigned long __direct_map_size;
+
+static inline unsigned long __deprecated virt_to_bus(volatile void *address)
+{
+	unsigned long phys = virt_to_phys(address);
+	unsigned long bus = phys + __direct_map_base;
+	return phys <= __direct_map_size ? bus : 0;
+}
+#define isa_virt_to_bus virt_to_bus
+
+static inline void * __deprecated bus_to_virt(unsigned long address)
+{
+	void *virt;
+
+	/* This check is a sanity check but also ensures that bus address 0
+	   maps to virtual address 0 which is useful to detect null pointers
+	   (the NCR driver is much simpler if NULL pointers are preserved).  */
+	address -= __direct_map_base;
+	virt = phys_to_virt(address);
+	return (long)address <= 0 ? NULL : virt;
+}
+#define isa_bus_to_virt bus_to_virt
+
+/*
+ * There are different chipsets to interface the Alpha CPUs to the world.
+ */
+
+#define IO_CONCAT(a,b)	_IO_CONCAT(a,b)
+#define _IO_CONCAT(a,b)	a ## _ ## b
+
+#ifdef CONFIG_ALPHA_GENERIC
+
+/* In a generic kernel, we always go through the machine vector.  */
+
+#define REMAP1(TYPE, NAME, QUAL)					\
+static inline TYPE generic_##NAME(QUAL void __iomem *addr)		\
+{									\
+	return alpha_mv.mv_##NAME(addr);				\
+}
+
+#define REMAP2(TYPE, NAME, QUAL)					\
+static inline void generic_##NAME(TYPE b, QUAL void __iomem *addr)	\
+{									\
+	alpha_mv.mv_##NAME(b, addr);					\
+}
+
+REMAP1(unsigned int, ioread8, const)
+REMAP1(unsigned int, ioread16, const)
+REMAP1(unsigned int, ioread32, const)
+REMAP1(u8, readb, const volatile)
+REMAP1(u16, readw, const volatile)
+REMAP1(u32, readl, const volatile)
+REMAP1(u64, readq, const volatile)
+
+REMAP2(u8, iowrite8, /**/)
+REMAP2(u16, iowrite16, /**/)
+REMAP2(u32, iowrite32, /**/)
+REMAP2(u8, writeb, volatile)
+REMAP2(u16, writew, volatile)
+REMAP2(u32, writel, volatile)
+REMAP2(u64, writeq, volatile)
+
+#undef REMAP1
+#undef REMAP2
+
+extern inline void __iomem *generic_ioportmap(unsigned long a)
+{
+	return alpha_mv.mv_ioportmap(a);
+}
+
+static inline void __iomem *generic_ioremap(unsigned long a, unsigned long s)
+{
+	return alpha_mv.mv_ioremap(a, s);
+}
+
+static inline void generic_iounmap(volatile void __iomem *a)
+{
+	return alpha_mv.mv_iounmap(a);
+}
+
+static inline int generic_is_ioaddr(unsigned long a)
+{
+	return alpha_mv.mv_is_ioaddr(a);
+}
+
+static inline int generic_is_mmio(const volatile void __iomem *a)
+{
+	return alpha_mv.mv_is_mmio(a);
+}
+
+#define __IO_PREFIX		generic
+#define generic_trivial_rw_bw	0
+#define generic_trivial_rw_lq	0
+#define generic_trivial_io_bw	0
+#define generic_trivial_io_lq	0
+#define generic_trivial_iounmap	0
+
+#else
+
+#if defined(CONFIG_ALPHA_APECS)
+# include <asm/core_apecs.h>
+#elif defined(CONFIG_ALPHA_CIA)
+# include <asm/core_cia.h>
+#elif defined(CONFIG_ALPHA_IRONGATE)
+# include <asm/core_irongate.h>
+#elif defined(CONFIG_ALPHA_JENSEN)
+# include <asm/jensen.h>
+#elif defined(CONFIG_ALPHA_LCA)
+# include <asm/core_lca.h>
+#elif defined(CONFIG_ALPHA_MARVEL)
+# include <asm/core_marvel.h>
+#elif defined(CONFIG_ALPHA_MCPCIA)
+# include <asm/core_mcpcia.h>
+#elif defined(CONFIG_ALPHA_POLARIS)
+# include <asm/core_polaris.h>
+#elif defined(CONFIG_ALPHA_T2)
+# include <asm/core_t2.h>
+#elif defined(CONFIG_ALPHA_TSUNAMI)
+# include <asm/core_tsunami.h>
+#elif defined(CONFIG_ALPHA_TITAN)
+# include <asm/core_titan.h>
+#elif defined(CONFIG_ALPHA_WILDFIRE)
+# include <asm/core_wildfire.h>
+#else
+#error "What system is this?"
+#endif
+
+#endif /* GENERIC */
+
+/*
+ * We always have external versions of these routines.
+ */
+extern u8		inb(unsigned long port);
+extern u16		inw(unsigned long port);
+extern u32		inl(unsigned long port);
+extern void		outb(u8 b, unsigned long port);
+extern void		outw(u16 b, unsigned long port);
+extern void		outl(u32 b, unsigned long port);
+
+extern u8		readb(const volatile void __iomem *addr);
+extern u16		readw(const volatile void __iomem *addr);
+extern u32		readl(const volatile void __iomem *addr);
+extern u64		readq(const volatile void __iomem *addr);
+extern void		writeb(u8 b, volatile void __iomem *addr);
+extern void		writew(u16 b, volatile void __iomem *addr);
+extern void		writel(u32 b, volatile void __iomem *addr);
+extern void		writeq(u64 b, volatile void __iomem *addr);
+
+extern u8		__raw_readb(const volatile void __iomem *addr);
+extern u16		__raw_readw(const volatile void __iomem *addr);
+extern u32		__raw_readl(const volatile void __iomem *addr);
+extern u64		__raw_readq(const volatile void __iomem *addr);
+extern void		__raw_writeb(u8 b, volatile void __iomem *addr);
+extern void		__raw_writew(u16 b, volatile void __iomem *addr);
+extern void		__raw_writel(u32 b, volatile void __iomem *addr);
+extern void		__raw_writeq(u64 b, volatile void __iomem *addr);
+
+/*
+ * Mapping from port numbers to __iomem space is pretty easy.
+ */
+
+/* These two have to be extern inline because of the extern prototype from
+   <asm-generic/iomap.h>.  It is not legal to mix "extern" and "static" for
+   the same declaration.  */
+extern inline void __iomem *ioport_map(unsigned long port, unsigned int size)
+{
+	return IO_CONCAT(__IO_PREFIX,ioportmap) (port);
+}
+
+extern inline void ioport_unmap(void __iomem *addr)
+{
+}
+
+static inline void __iomem *ioremap(unsigned long port, unsigned long size)
+{
+	return IO_CONCAT(__IO_PREFIX,ioremap) (port, size);
+}
+
+#define ioremap_wc ioremap
+#define ioremap_uc ioremap
+
+static inline void iounmap(volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,iounmap)(addr);
+}
+
+static inline int __is_ioaddr(unsigned long addr)
+{
+	return IO_CONCAT(__IO_PREFIX,is_ioaddr)(addr);
+}
+#define __is_ioaddr(a)		__is_ioaddr((unsigned long)(a))
+
+static inline int __is_mmio(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,is_mmio)(addr);
+}
+
+
+/*
+ * If the actual I/O bits are sufficiently trivial, then expand inline.
+ */
+
+#if IO_CONCAT(__IO_PREFIX,trivial_io_bw)
+extern inline unsigned int ioread8(const void __iomem *addr)
+{
+	unsigned int ret;
+	mb();
+	ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr);
+	mb();
+	return ret;
+}
+
+extern inline unsigned int ioread16(const void __iomem *addr)
+{
+	unsigned int ret;
+	mb();
+	ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr);
+	mb();
+	return ret;
+}
+
+extern inline void iowrite8(u8 b, void __iomem *addr)
+{
+	mb();
+	IO_CONCAT(__IO_PREFIX, iowrite8)(b, addr);
+}
+
+extern inline void iowrite16(u16 b, void __iomem *addr)
+{
+	mb();
+	IO_CONCAT(__IO_PREFIX, iowrite16)(b, addr);
+}
+
+extern inline u8 inb(unsigned long port)
+{
+	return ioread8(ioport_map(port, 1));
+}
+
+extern inline u16 inw(unsigned long port)
+{
+	return ioread16(ioport_map(port, 2));
+}
+
+extern inline void outb(u8 b, unsigned long port)
+{
+	iowrite8(b, ioport_map(port, 1));
+}
+
+extern inline void outw(u16 b, unsigned long port)
+{
+	iowrite16(b, ioport_map(port, 2));
+}
+#endif
+
+#if IO_CONCAT(__IO_PREFIX,trivial_io_lq)
+extern inline unsigned int ioread32(const void __iomem *addr)
+{
+	unsigned int ret;
+	mb();
+	ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr);
+	mb();
+	return ret;
+}
+
+extern inline void iowrite32(u32 b, void __iomem *addr)
+{
+	mb();
+	IO_CONCAT(__IO_PREFIX, iowrite32)(b, addr);
+}
+
+extern inline u32 inl(unsigned long port)
+{
+	return ioread32(ioport_map(port, 4));
+}
+
+extern inline void outl(u32 b, unsigned long port)
+{
+	iowrite32(b, ioport_map(port, 4));
+}
+#endif
+
+#if IO_CONCAT(__IO_PREFIX,trivial_rw_bw) == 1
+extern inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,readb)(addr);
+}
+
+extern inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,readw)(addr);
+}
+
+extern inline void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,writeb)(b, addr);
+}
+
+extern inline void __raw_writew(u16 b, volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,writew)(b, addr);
+}
+
+extern inline u8 readb(const volatile void __iomem *addr)
+{
+	u8 ret;
+	mb();
+	ret = __raw_readb(addr);
+	mb();
+	return ret;
+}
+
+extern inline u16 readw(const volatile void __iomem *addr)
+{
+	u16 ret;
+	mb();
+	ret = __raw_readw(addr);
+	mb();
+	return ret;
+}
+
+extern inline void writeb(u8 b, volatile void __iomem *addr)
+{
+	mb();
+	__raw_writeb(b, addr);
+}
+
+extern inline void writew(u16 b, volatile void __iomem *addr)
+{
+	mb();
+	__raw_writew(b, addr);
+}
+#endif
+
+#if IO_CONCAT(__IO_PREFIX,trivial_rw_lq) == 1
+extern inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,readl)(addr);
+}
+
+extern inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,readq)(addr);
+}
+
+extern inline void __raw_writel(u32 b, volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,writel)(b, addr);
+}
+
+extern inline void __raw_writeq(u64 b, volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,writeq)(b, addr);
+}
+
+extern inline u32 readl(const volatile void __iomem *addr)
+{
+	u32 ret;
+	mb();
+	ret = __raw_readl(addr);
+	mb();
+	return ret;
+}
+
+extern inline u64 readq(const volatile void __iomem *addr)
+{
+	u64 ret;
+	mb();
+	ret = __raw_readq(addr);
+	mb();
+	return ret;
+}
+
+extern inline void writel(u32 b, volatile void __iomem *addr)
+{
+	mb();
+	__raw_writel(b, addr);
+}
+
+extern inline void writeq(u64 b, volatile void __iomem *addr)
+{
+	mb();
+	__raw_writeq(b, addr);
+}
+#endif
+
+#define ioread16be(p) swab16(ioread16(p))
+#define ioread32be(p) swab32(ioread32(p))
+#define iowrite16be(v,p) iowrite16(swab16(v), (p))
+#define iowrite32be(v,p) iowrite32(swab32(v), (p))
+
+#define inb_p		inb
+#define inw_p		inw
+#define inl_p		inl
+#define outb_p		outb
+#define outw_p		outw
+#define outl_p		outl
+
+extern u8 readb_relaxed(const volatile void __iomem *addr);
+extern u16 readw_relaxed(const volatile void __iomem *addr);
+extern u32 readl_relaxed(const volatile void __iomem *addr);
+extern u64 readq_relaxed(const volatile void __iomem *addr);
+
+#if IO_CONCAT(__IO_PREFIX,trivial_io_bw)
+extern inline u8 readb_relaxed(const volatile void __iomem *addr)
+{
+	mb();
+	return __raw_readb(addr);
+}
+
+extern inline u16 readw_relaxed(const volatile void __iomem *addr)
+{
+	mb();
+	return __raw_readw(addr);
+}
+#endif
+
+#if IO_CONCAT(__IO_PREFIX,trivial_io_lq)
+extern inline u32 readl_relaxed(const volatile void __iomem *addr)
+{
+	mb();
+	return __raw_readl(addr);
+}
+
+extern inline u64 readq_relaxed(const volatile void __iomem *addr)
+{
+	mb();
+	return __raw_readq(addr);
+}
+#endif
+
+#define writeb_relaxed	writeb
+#define writew_relaxed	writew
+#define writel_relaxed	writel
+#define writeq_relaxed	writeq
+
+/*
+ * String version of IO memory access ops:
+ */
+extern void memcpy_fromio(void *, const volatile void __iomem *, long);
+extern void memcpy_toio(volatile void __iomem *, const void *, long);
+extern void _memset_c_io(volatile void __iomem *, unsigned long, long);
+
+static inline void memset_io(volatile void __iomem *addr, u8 c, long len)
+{
+	_memset_c_io(addr, 0x0101010101010101UL * c, len);
+}
+
+#define __HAVE_ARCH_MEMSETW_IO
+static inline void memsetw_io(volatile void __iomem *addr, u16 c, long len)
+{
+	_memset_c_io(addr, 0x0001000100010001UL * c, len);
+}
+
+/*
+ * String versions of in/out ops:
+ */
+extern void insb (unsigned long port, void *dst, unsigned long count);
+extern void insw (unsigned long port, void *dst, unsigned long count);
+extern void insl (unsigned long port, void *dst, unsigned long count);
+extern void outsb (unsigned long port, const void *src, unsigned long count);
+extern void outsw (unsigned long port, const void *src, unsigned long count);
+extern void outsl (unsigned long port, const void *src, unsigned long count);
+
+/*
+ * The Alpha Jensen hardware for some rather strange reason puts
+ * the RTC clock at 0x170 instead of 0x70. Probably due to some
+ * misguided idea about using 0x70 for NMI stuff.
+ *
+ * These defines will override the defaults when doing RTC queries
+ */
+
+#ifdef CONFIG_ALPHA_GENERIC
+# define RTC_PORT(x)	((x) + alpha_mv.rtc_port)
+#else
+# ifdef CONFIG_ALPHA_JENSEN
+#  define RTC_PORT(x)	(0x170+(x))
+# else
+#  define RTC_PORT(x)	(0x70 + (x))
+# endif
+#endif
+#define RTC_ALWAYS_BCD	0
+
+/*
+ * Some mucking forons use if[n]def writeq to check if platform has it.
+ * It's a bloody bad idea and we probably want ARCH_HAS_WRITEQ for them
+ * to play with; for now just use cpp anti-recursion logics and make sure
+ * that damn thing is defined and expands to itself.
+ */
+
+#define writeq writeq
+#define readq readq
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_IO_H */
diff --git a/arch/alpha/include/asm/io_trivial.h b/arch/alpha/include/asm/io_trivial.h
new file mode 100644
index 0000000000..a1a29cbe02
--- /dev/null
+++ b/arch/alpha/include/asm/io_trivial.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Trivial implementations of basic i/o routines.  Assumes that all
+   of the hard work has been done by ioremap and ioportmap, and that
+   access to i/o space is linear.  */
+
+/* This file may be included multiple times.  */
+
+#if IO_CONCAT(__IO_PREFIX,trivial_io_bw)
+__EXTERN_INLINE unsigned int
+IO_CONCAT(__IO_PREFIX,ioread8)(const void __iomem *a)
+{
+	return __kernel_ldbu(*(const volatile u8 __force *)a);
+}
+
+__EXTERN_INLINE unsigned int
+IO_CONCAT(__IO_PREFIX,ioread16)(const void __iomem *a)
+{
+	return __kernel_ldwu(*(const volatile u16 __force *)a);
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,iowrite8)(u8 b, void __iomem *a)
+{
+	__kernel_stb(b, *(volatile u8 __force *)a);
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,iowrite16)(u16 b, void __iomem *a)
+{
+	__kernel_stw(b, *(volatile u16 __force *)a);
+}
+#endif
+
+#if IO_CONCAT(__IO_PREFIX,trivial_io_lq)
+__EXTERN_INLINE unsigned int
+IO_CONCAT(__IO_PREFIX,ioread32)(const void __iomem *a)
+{
+	return *(const volatile u32 __force *)a;
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,iowrite32)(u32 b, void __iomem *a)
+{
+	*(volatile u32 __force *)a = b;
+}
+#endif
+
+#if IO_CONCAT(__IO_PREFIX,trivial_rw_bw) == 1
+__EXTERN_INLINE u8
+IO_CONCAT(__IO_PREFIX,readb)(const volatile void __iomem *a)
+{
+	return __kernel_ldbu(*(const volatile u8 __force *)a);
+}
+
+__EXTERN_INLINE u16
+IO_CONCAT(__IO_PREFIX,readw)(const volatile void __iomem *a)
+{
+	return __kernel_ldwu(*(const volatile u16 __force *)a);
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,writeb)(u8 b, volatile void __iomem *a)
+{
+	__kernel_stb(b, *(volatile u8 __force *)a);
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,writew)(u16 b, volatile void __iomem *a)
+{
+	__kernel_stw(b, *(volatile u16 __force *)a);
+}
+#elif IO_CONCAT(__IO_PREFIX,trivial_rw_bw) == 2
+__EXTERN_INLINE u8
+IO_CONCAT(__IO_PREFIX,readb)(const volatile void __iomem *a)
+{
+	const void __iomem *addr = (const void __iomem *)a;
+	return IO_CONCAT(__IO_PREFIX,ioread8)(addr);
+}
+
+__EXTERN_INLINE u16
+IO_CONCAT(__IO_PREFIX,readw)(const volatile void __iomem *a)
+{
+	const void __iomem *addr = (const void __iomem *)a;
+	return IO_CONCAT(__IO_PREFIX,ioread16)(addr);
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,writeb)(u8 b, volatile void __iomem *a)
+{
+	void __iomem *addr = (void __iomem *)a;
+	IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr);
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,writew)(u16 b, volatile void __iomem *a)
+{
+	void __iomem *addr = (void __iomem *)a;
+	IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr);
+}
+#endif
+
+#if IO_CONCAT(__IO_PREFIX,trivial_rw_lq) == 1
+__EXTERN_INLINE u32
+IO_CONCAT(__IO_PREFIX,readl)(const volatile void __iomem *a)
+{
+	return *(const volatile u32 __force *)a;
+}
+
+__EXTERN_INLINE u64
+IO_CONCAT(__IO_PREFIX,readq)(const volatile void __iomem *a)
+{
+	return *(const volatile u64 __force *)a;
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,writel)(u32 b, volatile void __iomem *a)
+{
+	*(volatile u32 __force *)a = b;
+}
+
+__EXTERN_INLINE void
+IO_CONCAT(__IO_PREFIX,writeq)(u64 b, volatile void __iomem *a)
+{
+	*(volatile u64 __force *)a = b;
+}
+#endif
+
+#if IO_CONCAT(__IO_PREFIX,trivial_iounmap)
+__EXTERN_INLINE void IO_CONCAT(__IO_PREFIX,iounmap)(volatile void __iomem *a)
+{
+}
+#endif
diff --git a/arch/alpha/include/asm/irq.h b/arch/alpha/include/asm/irq.h
new file mode 100644
index 0000000000..432402c8e4
--- /dev/null
+++ b/arch/alpha/include/asm/irq.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_IRQ_H
+#define _ALPHA_IRQ_H
+
+/*
+ *	linux/include/alpha/irq.h
+ *
+ *	(C) 1994 Linus Torvalds
+ */
+
+#include <linux/linkage.h>
+
+#if   defined(CONFIG_ALPHA_GENERIC)
+
+/* Here NR_IRQS is not exact, but rather an upper bound.  This is used
+   many places throughout the kernel to size static arrays.  That's ok,
+   we'll use alpha_mv.nr_irqs when we want the real thing.  */
+
+/* When LEGACY_START_ADDRESS is selected, we leave out:
+     TITAN
+     WILDFIRE
+     MARVEL
+
+   This helps keep the kernel object size reasonable for the majority
+   of machines.
+*/
+
+# if defined(CONFIG_ALPHA_LEGACY_START_ADDRESS)
+#  define NR_IRQS      (128)           /* max is RAWHIDE/TAKARA */
+# else
+#  define NR_IRQS      (32768 + 16)    /* marvel - 32 pids */
+# endif
+
+#elif defined(CONFIG_ALPHA_CABRIOLET) || \
+      defined(CONFIG_ALPHA_EB66P)     || \
+      defined(CONFIG_ALPHA_EB164)     || \
+      defined(CONFIG_ALPHA_PC164)     || \
+      defined(CONFIG_ALPHA_LX164)
+# define NR_IRQS	35
+
+#elif defined(CONFIG_ALPHA_EB66)      || \
+      defined(CONFIG_ALPHA_EB64P)     || \
+      defined(CONFIG_ALPHA_MIKASA)
+# define NR_IRQS	32
+
+#elif defined(CONFIG_ALPHA_ALCOR)     || \
+      defined(CONFIG_ALPHA_MIATA)     || \
+      defined(CONFIG_ALPHA_RUFFIAN)   || \
+      defined(CONFIG_ALPHA_RX164)     || \
+      defined(CONFIG_ALPHA_NORITAKE)
+# define NR_IRQS	48
+
+#elif defined(CONFIG_ALPHA_SABLE)     || \
+      defined(CONFIG_ALPHA_SX164)
+# define NR_IRQS	40
+
+#elif defined(CONFIG_ALPHA_DP264) || \
+      defined(CONFIG_ALPHA_LYNX)  || \
+      defined(CONFIG_ALPHA_SHARK)
+# define NR_IRQS	64
+
+#elif defined(CONFIG_ALPHA_TITAN)
+#define NR_IRQS		80
+
+#elif defined(CONFIG_ALPHA_RAWHIDE) || \
+      defined(CONFIG_ALPHA_TAKARA) || \
+      defined(CONFIG_ALPHA_EIGER)
+# define NR_IRQS	128
+
+#elif defined(CONFIG_ALPHA_WILDFIRE)
+# define NR_IRQS	2048 /* enuff for 8 QBBs */
+
+#elif defined(CONFIG_ALPHA_MARVEL)
+# define NR_IRQS	(32768 + 16) 	/* marvel - 32 pids*/
+
+#else /* everyone else */
+# define NR_IRQS	16
+#endif
+
+static __inline__ int irq_canonicalize(int irq)
+{
+	/*
+	 * XXX is this true for all Alpha's?  The old serial driver
+	 * did it this way for years without any complaints, so....
+	 */
+	return ((irq == 2) ? 9 : irq);
+}
+
+struct pt_regs;
+extern void (*perf_irq)(unsigned long, struct pt_regs *);
+
+#endif /* _ALPHA_IRQ_H */
diff --git a/arch/alpha/include/asm/irq_regs.h b/arch/alpha/include/asm/irq_regs.h
new file mode 100644
index 0000000000..3dd9c0b702
--- /dev/null
+++ b/arch/alpha/include/asm/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/arch/alpha/include/asm/irqflags.h b/arch/alpha/include/asm/irqflags.h
new file mode 100644
index 0000000000..9f25d4e0d3
--- /dev/null
+++ b/arch/alpha/include/asm/irqflags.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_IRQFLAGS_H
+#define __ALPHA_IRQFLAGS_H
+
+#include <asm/pal.h>
+
+#define IPL_MIN		0
+#define IPL_SW0		1
+#define IPL_SW1		2
+#define IPL_DEV0	3
+#define IPL_DEV1	4
+#define IPL_TIMER	5
+#define IPL_PERF	6
+#define IPL_POWERFAIL	6
+#define IPL_MCHECK	7
+#define IPL_MAX		7
+
+#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK
+#undef IPL_MIN
+#define IPL_MIN		__min_ipl
+extern int __min_ipl;
+#endif
+
+#define getipl()		(rdps() & 7)
+#define setipl(ipl)		((void) swpipl(ipl))
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return rdps();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	setipl(IPL_MAX);
+	barrier();
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = swpipl(IPL_MAX);
+	barrier();
+	return flags;
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	barrier();
+	setipl(IPL_MIN);
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	barrier();
+	setipl(flags);
+	barrier();
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags == IPL_MAX;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(getipl());
+}
+
+#endif /* __ALPHA_IRQFLAGS_H */
diff --git a/arch/alpha/include/asm/jensen.h b/arch/alpha/include/asm/jensen.h
new file mode 100644
index 0000000000..1c4131453d
--- /dev/null
+++ b/arch/alpha/include/asm/jensen.h
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_JENSEN_H
+#define __ALPHA_JENSEN_H
+
+#include <asm/compiler.h>
+
+/*
+ * Defines for the AlphaPC EISA IO and memory address space.
+ */
+
+/*
+ * NOTE! The memory operations do not set any memory barriers, as it's
+ * not needed for cases like a frame buffer that is essentially memory-like.
+ * You need to do them by hand if the operations depend on ordering.
+ *
+ * Similarly, the port IO operations do a "mb" only after a write operation:
+ * if an mb is needed before (as in the case of doing memory mapped IO
+ * first, and then a port IO operation to the same device), it needs to be
+ * done by hand.
+ *
+ * After the above has bitten me 100 times, I'll give up and just do the
+ * mb all the time, but right now I'm hoping this will work out.  Avoiding
+ * mb's may potentially be a noticeable speed improvement, but I can't
+ * honestly say I've tested it.
+ *
+ * Handling interrupts that need to do mb's to synchronize to non-interrupts
+ * is another fun race area.  Don't do it (because if you do, I'll have to
+ * do *everything* with interrupts disabled, ugh).
+ */
+
+/*
+ * EISA Interrupt Acknowledge address
+ */
+#define EISA_INTA		(IDENT_ADDR + 0x100000000UL)
+
+/*
+ * FEPROM addresses
+ */
+#define EISA_FEPROM0		(IDENT_ADDR + 0x180000000UL)
+#define EISA_FEPROM1		(IDENT_ADDR + 0x1A0000000UL)
+
+/*
+ * VL82C106 base address
+ */
+#define EISA_VL82C106		(IDENT_ADDR + 0x1C0000000UL)
+
+/*
+ * EISA "Host Address Extension" address (bits 25-31 of the EISA address)
+ */
+#define EISA_HAE		(IDENT_ADDR + 0x1D0000000UL)
+
+/*
+ * "SYSCTL" register address
+ */
+#define EISA_SYSCTL		(IDENT_ADDR + 0x1E0000000UL)
+
+/*
+ * "spare" register address
+ */
+#define EISA_SPARE		(IDENT_ADDR + 0x1F0000000UL)
+
+/*
+ * EISA memory address offset
+ */
+#define EISA_MEM		(IDENT_ADDR + 0x200000000UL)
+
+/*
+ * EISA IO address offset
+ */
+#define EISA_IO			(IDENT_ADDR + 0x300000000UL)
+
+
+#ifdef __KERNEL__
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __IO_EXTERN_INLINE
+#endif
+
+/*
+ * Handle the "host address register". This needs to be set
+ * to the high 7 bits of the EISA address.  This is also needed
+ * for EISA IO addresses, which are only 16 bits wide (the
+ * hae needs to be set to 0).
+ *
+ * HAE isn't needed for the local IO operations, though.
+ */
+
+#define JENSEN_HAE_ADDRESS	EISA_HAE
+#define JENSEN_HAE_MASK		0x1ffffff
+
+__EXTERN_INLINE void jensen_set_hae(unsigned long addr)
+{
+	/* hae on the Jensen is bits 31:25 shifted right */
+	addr >>= 25;
+	if (addr != alpha_mv.hae_cache)
+		set_hae(addr);
+}
+
+#define vuip	volatile unsigned int *
+
+/*
+ * IO functions
+ *
+ * The "local" functions are those that don't go out to the EISA bus,
+ * but instead act on the VL82C106 chip directly.. This is mainly the
+ * keyboard, RTC,  printer and first two serial lines..
+ *
+ * The local stuff makes for some complications, but it seems to be
+ * gone in the PCI version. I hope I can get DEC suckered^H^H^H^H^H^H^H^H
+ * convinced that I need one of the newer machines.
+ */
+
+__EXTERN_INLINE unsigned int jensen_local_inb(unsigned long addr)
+{
+	return 0xff & *(vuip)((addr << 9) + EISA_VL82C106);
+}
+
+__EXTERN_INLINE void jensen_local_outb(u8 b, unsigned long addr)
+{
+	*(vuip)((addr << 9) + EISA_VL82C106) = b;
+	mb();
+}
+
+__EXTERN_INLINE unsigned int jensen_bus_inb(unsigned long addr)
+{
+	long result;
+
+	jensen_set_hae(0);
+	result = *(volatile int *)((addr << 7) + EISA_IO + 0x00);
+	return __kernel_extbl(result, addr & 3);
+}
+
+__EXTERN_INLINE void jensen_bus_outb(u8 b, unsigned long addr)
+{
+	jensen_set_hae(0);
+	*(vuip)((addr << 7) + EISA_IO + 0x00) = b * 0x01010101;
+	mb();
+}
+
+/*
+ * It seems gcc is not very good at optimizing away logical
+ * operations that result in operations across inline functions.
+ * Which is why this is a macro.
+ */
+
+#define jensen_is_local(addr) ( \
+/* keyboard */	(addr == 0x60 || addr == 0x64) || \
+/* RTC */	(addr == 0x170 || addr == 0x171) || \
+/* mb COM2 */	(addr >= 0x2f8 && addr <= 0x2ff) || \
+/* mb LPT1 */	(addr >= 0x3bc && addr <= 0x3be) || \
+/* mb COM2 */	(addr >= 0x3f8 && addr <= 0x3ff))
+
+__EXTERN_INLINE u8 jensen_inb(unsigned long addr)
+{
+	if (jensen_is_local(addr))
+		return jensen_local_inb(addr);
+	else
+		return jensen_bus_inb(addr);
+}
+
+__EXTERN_INLINE void jensen_outb(u8 b, unsigned long addr)
+{
+	if (jensen_is_local(addr))
+		jensen_local_outb(b, addr);
+	else
+		jensen_bus_outb(b, addr);
+}
+
+__EXTERN_INLINE u16 jensen_inw(unsigned long addr)
+{
+	long result;
+
+	jensen_set_hae(0);
+	result = *(volatile int *) ((addr << 7) + EISA_IO + 0x20);
+	result >>= (addr & 3) * 8;
+	return 0xffffUL & result;
+}
+
+__EXTERN_INLINE u32 jensen_inl(unsigned long addr)
+{
+	jensen_set_hae(0);
+	return *(vuip) ((addr << 7) + EISA_IO + 0x60);
+}
+
+__EXTERN_INLINE void jensen_outw(u16 b, unsigned long addr)
+{
+	jensen_set_hae(0);
+	*(vuip) ((addr << 7) + EISA_IO + 0x20) = b * 0x00010001;
+	mb();
+}
+
+__EXTERN_INLINE void jensen_outl(u32 b, unsigned long addr)
+{
+	jensen_set_hae(0);
+	*(vuip) ((addr << 7) + EISA_IO + 0x60) = b;
+	mb();
+}
+
+/*
+ * Memory functions.
+ */
+
+__EXTERN_INLINE u8 jensen_readb(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	long result;
+
+	jensen_set_hae(addr);
+	addr &= JENSEN_HAE_MASK;
+	result = *(volatile int *) ((addr << 7) + EISA_MEM + 0x00);
+	result >>= (addr & 3) * 8;
+	return 0xffUL & result;
+}
+
+__EXTERN_INLINE u16 jensen_readw(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	long result;
+
+	jensen_set_hae(addr);
+	addr &= JENSEN_HAE_MASK;
+	result = *(volatile int *) ((addr << 7) + EISA_MEM + 0x20);
+	result >>= (addr & 3) * 8;
+	return 0xffffUL & result;
+}
+
+__EXTERN_INLINE u32 jensen_readl(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	jensen_set_hae(addr);
+	addr &= JENSEN_HAE_MASK;
+	return *(vuip) ((addr << 7) + EISA_MEM + 0x60);
+}
+
+__EXTERN_INLINE u64 jensen_readq(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	unsigned long r0, r1;
+
+	jensen_set_hae(addr);
+	addr &= JENSEN_HAE_MASK;
+	addr = (addr << 7) + EISA_MEM + 0x60;
+	r0 = *(vuip) (addr);
+	r1 = *(vuip) (addr + (4 << 7));
+	return r1 << 32 | r0;
+}
+
+__EXTERN_INLINE void jensen_writeb(u8 b, volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	jensen_set_hae(addr);
+	addr &= JENSEN_HAE_MASK;
+	*(vuip) ((addr << 7) + EISA_MEM + 0x00) = b * 0x01010101;
+}
+
+__EXTERN_INLINE void jensen_writew(u16 b, volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	jensen_set_hae(addr);
+	addr &= JENSEN_HAE_MASK;
+	*(vuip) ((addr << 7) + EISA_MEM + 0x20) = b * 0x00010001;
+}
+
+__EXTERN_INLINE void jensen_writel(u32 b, volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	jensen_set_hae(addr);
+	addr &= JENSEN_HAE_MASK;
+	*(vuip) ((addr << 7) + EISA_MEM + 0x60) = b;
+}
+
+__EXTERN_INLINE void jensen_writeq(u64 b, volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	jensen_set_hae(addr);
+	addr &= JENSEN_HAE_MASK;
+	addr = (addr << 7) + EISA_MEM + 0x60;
+	*(vuip) (addr) = b;
+	*(vuip) (addr + (4 << 7)) = b >> 32;
+}
+
+__EXTERN_INLINE void __iomem *jensen_ioportmap(unsigned long addr)
+{
+	return (void __iomem *)addr;
+}
+
+__EXTERN_INLINE void __iomem *jensen_ioremap(unsigned long addr,
+					     unsigned long size)
+{
+	return (void __iomem *)(addr + 0x100000000ul);
+}
+
+__EXTERN_INLINE int jensen_is_ioaddr(unsigned long addr)
+{
+	return (long)addr >= 0;
+}
+
+__EXTERN_INLINE int jensen_is_mmio(const volatile void __iomem *addr)
+{
+	return (unsigned long)addr >= 0x100000000ul;
+}
+
+/* New-style ioread interface.  All the routines are so ugly for Jensen
+   that it doesn't make sense to merge them.  */
+
+#define IOPORT(OS, NS)							\
+__EXTERN_INLINE unsigned int jensen_ioread##NS(const void __iomem *xaddr)	\
+{									\
+	if (jensen_is_mmio(xaddr))					\
+		return jensen_read##OS(xaddr - 0x100000000ul);		\
+	else								\
+		return jensen_in##OS((unsigned long)xaddr);		\
+}									\
+__EXTERN_INLINE void jensen_iowrite##NS(u##NS b, void __iomem *xaddr)	\
+{									\
+	if (jensen_is_mmio(xaddr))					\
+		jensen_write##OS(b, xaddr - 0x100000000ul);		\
+	else								\
+		jensen_out##OS(b, (unsigned long)xaddr);		\
+}
+
+IOPORT(b, 8)
+IOPORT(w, 16)
+IOPORT(l, 32)
+
+#undef IOPORT
+
+#undef vuip
+
+#undef __IO_PREFIX
+#define __IO_PREFIX		jensen
+#define jensen_trivial_rw_bw	0
+#define jensen_trivial_rw_lq	0
+#define jensen_trivial_io_bw	0
+#define jensen_trivial_io_lq	0
+#define jensen_trivial_iounmap	1
+#include <asm/io_trivial.h>
+
+#ifdef __IO_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __IO_EXTERN_INLINE
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_JENSEN_H */
diff --git a/arch/alpha/include/asm/kdebug.h b/arch/alpha/include/asm/kdebug.h
new file mode 100644
index 0000000000..6ece1b0376
--- /dev/null
+++ b/arch/alpha/include/asm/kdebug.h
@@ -0,0 +1 @@
+#include <asm-generic/kdebug.h>
diff --git a/arch/alpha/include/asm/kmap_types.h b/arch/alpha/include/asm/kmap_types.h
new file mode 100644
index 0000000000..651714b457
--- /dev/null
+++ b/arch/alpha/include/asm/kmap_types.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+/* Dummy header just to define km_type. */
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define  __WITH_KM_FENCE
+#endif
+
+#include <asm-generic/kmap_types.h>
+
+#undef __WITH_KM_FENCE
+
+#endif
diff --git a/arch/alpha/include/asm/linkage.h b/arch/alpha/include/asm/linkage.h
new file mode 100644
index 0000000000..aa8661fa60
--- /dev/null
+++ b/arch/alpha/include/asm/linkage.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define cond_syscall(x)  asm(".weak\t" #x "\n" #x " = sys_ni_syscall")
+#define SYSCALL_ALIAS(alias, name)					\
+	asm ( #alias " = " #name "\n\t.globl " #alias)
+
+#endif
diff --git a/arch/alpha/include/asm/local.h b/arch/alpha/include/asm/local.h
new file mode 100644
index 0000000000..fab26a1c93
--- /dev/null
+++ b/arch/alpha/include/asm/local.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_LOCAL_H
+#define _ALPHA_LOCAL_H
+
+#include <linux/percpu.h>
+#include <linux/atomic.h>
+
+typedef struct
+{
+	atomic_long_t a;
+} local_t;
+
+#define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
+#define local_read(l)	atomic_long_read(&(l)->a)
+#define local_set(l,i)	atomic_long_set(&(l)->a, (i))
+#define local_inc(l)	atomic_long_inc(&(l)->a)
+#define local_dec(l)	atomic_long_dec(&(l)->a)
+#define local_add(i,l)	atomic_long_add((i),(&(l)->a))
+#define local_sub(i,l)	atomic_long_sub((i),(&(l)->a))
+
+static __inline__ long local_add_return(long i, local_t * l)
+{
+	long temp, result;
+	__asm__ __volatile__(
+	"1:	ldq_l %0,%1\n"
+	"	addq %0,%3,%2\n"
+	"	addq %0,%3,%0\n"
+	"	stq_c %0,%1\n"
+	"	beq %0,2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (l->a.counter), "=&r" (result)
+	:"Ir" (i), "m" (l->a.counter) : "memory");
+	return result;
+}
+
+static __inline__ long local_sub_return(long i, local_t * l)
+{
+	long temp, result;
+	__asm__ __volatile__(
+	"1:	ldq_l %0,%1\n"
+	"	subq %0,%3,%2\n"
+	"	subq %0,%3,%0\n"
+	"	stq_c %0,%1\n"
+	"	beq %0,2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	:"=&r" (temp), "=m" (l->a.counter), "=&r" (result)
+	:"Ir" (i), "m" (l->a.counter) : "memory");
+	return result;
+}
+
+#define local_cmpxchg(l, o, n) \
+	(cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u)				\
+({								\
+	long c, old;						\
+	c = local_read(l);					\
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = local_cmpxchg((l), c, c + (a));	\
+		if (likely(old == c))				\
+			break;					\
+		c = old;					\
+	}							\
+	c != (u);						\
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+
+#define local_dec_return(l) local_sub_return(1,(l))
+
+#define local_inc_return(l) local_add_return(1,(l))
+
+#define local_sub_and_test(i,l) (local_sub_return((i), (l)) == 0)
+
+#define local_inc_and_test(l) (local_add_return(1, (l)) == 0)
+
+#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0)
+
+/* Verify if faster than atomic ops */
+#define __local_inc(l)		((l)->a.counter++)
+#define __local_dec(l)		((l)->a.counter++)
+#define __local_add(i,l)	((l)->a.counter+=(i))
+#define __local_sub(i,l)	((l)->a.counter-=(i))
+
+#endif /* _ALPHA_LOCAL_H */
diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h
new file mode 100644
index 0000000000..e49fabce7b
--- /dev/null
+++ b/arch/alpha/include/asm/machvec.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_MACHVEC_H
+#define __ALPHA_MACHVEC_H 1
+
+#include <linux/types.h>
+
+/*
+ *	This file gets pulled in by asm/io.h from user space. We don't
+ *	want most of this escaping.
+ */
+ 
+#ifdef __KERNEL__
+
+/* The following structure vectors all of the I/O and IRQ manipulation
+   from the generic kernel to the hardware specific backend.  */
+
+struct task_struct;
+struct mm_struct;
+struct vm_area_struct;
+struct linux_hose_info;
+struct pci_dev;
+struct pci_ops;
+struct pci_controller;
+struct _alpha_agp_info;
+struct rtc_time;
+
+struct alpha_machine_vector
+{
+	/* This "belongs" down below with the rest of the runtime
+	   variables, but it is convenient for entry.S if these 
+	   two slots are at the beginning of the struct.  */
+	unsigned long hae_cache;
+	unsigned long *hae_register;
+
+	int nr_irqs;
+	int rtc_port;
+	int rtc_boot_cpu_only;
+	unsigned int max_asn;
+	unsigned long max_isa_dma_address;
+	unsigned long irq_probe_mask;
+	unsigned long iack_sc;
+	unsigned long min_io_address;
+	unsigned long min_mem_address;
+	unsigned long pci_dac_offset;
+
+	void (*mv_pci_tbi)(struct pci_controller *hose,
+			   dma_addr_t start, dma_addr_t end);
+
+	unsigned int (*mv_ioread8)(const void __iomem *);
+	unsigned int (*mv_ioread16)(const void __iomem *);
+	unsigned int (*mv_ioread32)(const void __iomem *);
+
+	void (*mv_iowrite8)(u8, void __iomem *);
+	void (*mv_iowrite16)(u16, void __iomem *);
+	void (*mv_iowrite32)(u32, void __iomem *);
+
+	u8 (*mv_readb)(const volatile void __iomem *);
+	u16 (*mv_readw)(const volatile void __iomem *);
+	u32 (*mv_readl)(const volatile void __iomem *);
+	u64 (*mv_readq)(const volatile void __iomem *);
+
+	void (*mv_writeb)(u8, volatile void __iomem *);
+	void (*mv_writew)(u16, volatile void __iomem *);
+	void (*mv_writel)(u32, volatile void __iomem *);
+	void (*mv_writeq)(u64, volatile void __iomem *);
+
+	void __iomem *(*mv_ioportmap)(unsigned long);
+	void __iomem *(*mv_ioremap)(unsigned long, unsigned long);
+	void (*mv_iounmap)(volatile void __iomem *);
+	int (*mv_is_ioaddr)(unsigned long);
+	int (*mv_is_mmio)(const volatile void __iomem *);
+
+	void (*mv_switch_mm)(struct mm_struct *, struct mm_struct *,
+			     struct task_struct *);
+	void (*mv_activate_mm)(struct mm_struct *, struct mm_struct *);
+
+	void (*mv_flush_tlb_current)(struct mm_struct *);
+	void (*mv_flush_tlb_current_page)(struct mm_struct * mm,
+					  struct vm_area_struct *vma,
+					  unsigned long addr);
+
+	void (*update_irq_hw)(unsigned long, unsigned long, int);
+	void (*ack_irq)(unsigned long);
+	void (*device_interrupt)(unsigned long vector);
+	void (*machine_check)(unsigned long vector, unsigned long la);
+
+	void (*smp_callin)(void);
+	void (*init_arch)(void);
+	void (*init_irq)(void);
+	void (*init_rtc)(void);
+	void (*init_pci)(void);
+	void (*kill_arch)(int);
+
+	u8 (*pci_swizzle)(struct pci_dev *, u8 *);
+	int (*pci_map_irq)(const struct pci_dev *, u8, u8);
+	struct pci_ops *pci_ops;
+
+	struct _alpha_agp_info *(*agp_info)(void);
+
+	const char *vector_name;
+
+	/* System specific parameters.  */
+	union {
+	    struct {
+		unsigned long gru_int_req_bits;
+	    } cia;
+
+	    struct {
+		unsigned long gamma_bias;
+	    } t2;
+
+	    struct {
+		unsigned int route_tab;
+	    } sio;
+	} sys;
+};
+
+extern struct alpha_machine_vector alpha_mv;
+
+#ifdef CONFIG_ALPHA_GENERIC
+extern int alpha_using_srm;
+extern int alpha_using_qemu;
+#else
+# ifdef CONFIG_ALPHA_SRM
+#  define alpha_using_srm 1
+# else
+#  define alpha_using_srm 0
+# endif
+# ifdef CONFIG_ALPHA_QEMU
+#  define alpha_using_qemu 1
+# else
+#  define alpha_using_qemu 0
+# endif
+#endif /* GENERIC */
+
+#endif /* __KERNEL__ */
+#endif /* __ALPHA_MACHVEC_H */
diff --git a/arch/alpha/include/asm/mc146818rtc.h b/arch/alpha/include/asm/mc146818rtc.h
new file mode 100644
index 0000000000..2154fd4f78
--- /dev/null
+++ b/arch/alpha/include/asm/mc146818rtc.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef __ASM_ALPHA_MC146818RTC_H
+#define __ASM_ALPHA_MC146818RTC_H
+
+#include <asm/io.h>
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#endif /* __ASM_ALPHA_MC146818RTC_H */
diff --git a/arch/alpha/include/asm/mce.h b/arch/alpha/include/asm/mce.h
new file mode 100644
index 0000000000..200e04e98a
--- /dev/null
+++ b/arch/alpha/include/asm/mce.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_MCE_H
+#define __ALPHA_MCE_H
+
+/*
+ * This is the logout header that should be common to all platforms
+ * (assuming they are running OSF/1 PALcode, I guess).
+ */
+struct el_common {
+	unsigned int	size;		/* size in bytes of logout area */
+	unsigned int	sbz1	: 30;	/* should be zero */
+	unsigned int	err2	:  1;	/* second error */
+	unsigned int	retry	:  1;	/* retry flag */
+	unsigned int	proc_offset;	/* processor-specific offset */
+	unsigned int	sys_offset;	/* system-specific offset */
+	unsigned int	code;		/* machine check code */
+	unsigned int	frame_rev;	/* frame revision */
+};
+
+/* Machine Check Frame for uncorrectable errors (Large format)
+ *      --- This is used to log uncorrectable errors such as
+ *          double bit ECC errors.
+ *      --- These errors are detected by both processor and systems.
+ */
+struct el_common_EV5_uncorrectable_mcheck {
+        unsigned long   shadow[8];        /* Shadow reg. 8-14, 25           */
+        unsigned long   paltemp[24];      /* PAL TEMP REGS.                 */
+        unsigned long   exc_addr;         /* Address of excepting instruction*/
+        unsigned long   exc_sum;          /* Summary of arithmetic traps.   */
+        unsigned long   exc_mask;         /* Exception mask (from exc_sum). */
+        unsigned long   pal_base;         /* Base address for PALcode.      */
+        unsigned long   isr;              /* Interrupt Status Reg.          */
+        unsigned long   icsr;             /* CURRENT SETUP OF EV5 IBOX      */
+        unsigned long   ic_perr_stat;     /* I-CACHE Reg. <11> set Data parity
+                                                         <12> set TAG parity*/
+        unsigned long   dc_perr_stat;     /* D-CACHE error Reg. Bits set to 1:
+                                                     <2> Data error in bank 0
+                                                     <3> Data error in bank 1
+                                                     <4> Tag error in bank 0
+                                                     <5> Tag error in bank 1 */
+        unsigned long   va;               /* Effective VA of fault or miss. */
+        unsigned long   mm_stat;          /* Holds the reason for D-stream 
+                                             fault or D-cache parity errors */
+        unsigned long   sc_addr;          /* Address that was being accessed
+                                             when EV5 detected Secondary cache
+                                             failure.                 */
+        unsigned long   sc_stat;          /* Helps determine if the error was
+                                             TAG/Data parity(Secondary Cache)*/
+        unsigned long   bc_tag_addr;      /* Contents of EV5 BC_TAG_ADDR    */
+        unsigned long   ei_addr;          /* Physical address of any transfer
+                                             that is logged in EV5 EI_STAT */
+        unsigned long   fill_syndrome;    /* For correcting ECC errors.     */
+        unsigned long   ei_stat;          /* Helps identify reason of any 
+                                             processor uncorrectable error
+                                             at its external interface.     */
+        unsigned long   ld_lock;          /* Contents of EV5 LD_LOCK register*/
+};
+
+struct el_common_EV6_mcheck {
+	unsigned int FrameSize;		/* Bytes, including this field */
+	unsigned int FrameFlags;	/* <31> = Retry, <30> = Second Error */
+	unsigned int CpuOffset;		/* Offset to CPU-specific info */
+	unsigned int SystemOffset;	/* Offset to system-specific info */
+	unsigned int MCHK_Code;
+	unsigned int MCHK_Frame_Rev;
+	unsigned long I_STAT;		/* EV6 Internal Processor Registers */
+	unsigned long DC_STAT;		/* (See the 21264 Spec) */
+	unsigned long C_ADDR;
+	unsigned long DC1_SYNDROME;
+	unsigned long DC0_SYNDROME;
+	unsigned long C_STAT;
+	unsigned long C_STS;
+	unsigned long MM_STAT;
+	unsigned long EXC_ADDR;
+	unsigned long IER_CM;
+	unsigned long ISUM;
+	unsigned long RESERVED0;
+	unsigned long PAL_BASE;
+	unsigned long I_CTL;
+	unsigned long PCTX;
+};
+
+
+#endif /* __ALPHA_MCE_H */
diff --git a/arch/alpha/include/asm/mmu.h b/arch/alpha/include/asm/mmu.h
new file mode 100644
index 0000000000..dadfc1b6e3
--- /dev/null
+++ b/arch/alpha/include/asm/mmu.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_MMU_H
+#define __ALPHA_MMU_H
+
+/* The alpha MMU context is one "unsigned long" bitmap per CPU */
+typedef unsigned long mm_context_t[NR_CPUS];
+
+#endif
diff --git a/arch/alpha/include/asm/mmu_context.h b/arch/alpha/include/asm/mmu_context.h
new file mode 100644
index 0000000000..4eea7c6169
--- /dev/null
+++ b/arch/alpha/include/asm/mmu_context.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_MMU_CONTEXT_H
+#define __ALPHA_MMU_CONTEXT_H
+
+/*
+ * get a new mmu context..
+ *
+ * Copyright (C) 1996, Linus Torvalds
+ */
+
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+
+#include <asm/machvec.h>
+#include <asm/compiler.h>
+#include <asm-generic/mm_hooks.h>
+
+/*
+ * Force a context reload. This is needed when we change the page
+ * table pointer or when we update the ASN of the current process.
+ */
+
+/* Don't get into trouble with dueling __EXTERN_INLINEs.  */
+#ifndef __EXTERN_INLINE
+#include <asm/io.h>
+#endif
+
+
+static inline unsigned long
+__reload_thread(struct pcb_struct *pcb)
+{
+	register unsigned long a0 __asm__("$16");
+	register unsigned long v0 __asm__("$0");
+
+	a0 = virt_to_phys(pcb);
+	__asm__ __volatile__(
+		"call_pal %2 #__reload_thread"
+		: "=r"(v0), "=r"(a0)
+		: "i"(PAL_swpctx), "r"(a0)
+		: "$1", "$22", "$23", "$24", "$25");
+
+	return v0;
+}
+
+
+/*
+ * The maximum ASN's the processor supports.  On the EV4 this is 63
+ * but the PAL-code doesn't actually use this information.  On the
+ * EV5 this is 127, and EV6 has 255.
+ *
+ * On the EV4, the ASNs are more-or-less useless anyway, as they are
+ * only used as an icache tag, not for TB entries.  On the EV5 and EV6,
+ * ASN's also validate the TB entries, and thus make a lot more sense.
+ *
+ * The EV4 ASN's don't even match the architecture manual, ugh.  And
+ * I quote: "If a processor implements address space numbers (ASNs),
+ * and the old PTE has the Address Space Match (ASM) bit clear (ASNs
+ * in use) and the Valid bit set, then entries can also effectively be
+ * made coherent by assigning a new, unused ASN to the currently
+ * running process and not reusing the previous ASN before calling the
+ * appropriate PALcode routine to invalidate the translation buffer (TB)". 
+ *
+ * In short, the EV4 has a "kind of" ASN capability, but it doesn't actually
+ * work correctly and can thus not be used (explaining the lack of PAL-code
+ * support).
+ */
+#define EV4_MAX_ASN 63
+#define EV5_MAX_ASN 127
+#define EV6_MAX_ASN 255
+
+#ifdef CONFIG_ALPHA_GENERIC
+# define MAX_ASN	(alpha_mv.max_asn)
+#else
+# ifdef CONFIG_ALPHA_EV4
+#  define MAX_ASN	EV4_MAX_ASN
+# elif defined(CONFIG_ALPHA_EV5)
+#  define MAX_ASN	EV5_MAX_ASN
+# else
+#  define MAX_ASN	EV6_MAX_ASN
+# endif
+#endif
+
+/*
+ * cpu_last_asn(processor):
+ * 63                                            0
+ * +-------------+----------------+--------------+
+ * | asn version | this processor | hardware asn |
+ * +-------------+----------------+--------------+
+ */
+
+#include <asm/smp.h>
+#ifdef CONFIG_SMP
+#define cpu_last_asn(cpuid)	(cpu_data[cpuid].last_asn)
+#else
+extern unsigned long last_asn;
+#define cpu_last_asn(cpuid)	last_asn
+#endif /* CONFIG_SMP */
+
+#define WIDTH_HARDWARE_ASN	8
+#define ASN_FIRST_VERSION (1UL << WIDTH_HARDWARE_ASN)
+#define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1)
+
+/*
+ * NOTE! The way this is set up, the high bits of the "asn_cache" (and
+ * the "mm->context") are the ASN _version_ code. A version of 0 is
+ * always considered invalid, so to invalidate another process you only
+ * need to do "p->mm->context = 0".
+ *
+ * If we need more ASN's than the processor has, we invalidate the old
+ * user TLB's (tbiap()) and start a new ASN version. That will automatically
+ * force a new asn for any other processes the next time they want to
+ * run.
+ */
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __MMU_EXTERN_INLINE
+#endif
+
+extern inline unsigned long
+__get_new_mm_context(struct mm_struct *mm, long cpu)
+{
+	unsigned long asn = cpu_last_asn(cpu);
+	unsigned long next = asn + 1;
+
+	if ((asn & HARDWARE_ASN_MASK) >= MAX_ASN) {
+		tbiap();
+		imb();
+		next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION;
+	}
+	cpu_last_asn(cpu) = next;
+	return next;
+}
+
+__EXTERN_INLINE void
+ev5_switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm,
+	      struct task_struct *next)
+{
+	/* Check if our ASN is of an older version, and thus invalid. */
+	unsigned long asn;
+	unsigned long mmc;
+	long cpu = smp_processor_id();
+
+#ifdef CONFIG_SMP
+	cpu_data[cpu].asn_lock = 1;
+	barrier();
+#endif
+	asn = cpu_last_asn(cpu);
+	mmc = next_mm->context[cpu];
+	if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) {
+		mmc = __get_new_mm_context(next_mm, cpu);
+		next_mm->context[cpu] = mmc;
+	}
+#ifdef CONFIG_SMP
+	else
+		cpu_data[cpu].need_new_asn = 1;
+#endif
+
+	/* Always update the PCB ASN.  Another thread may have allocated
+	   a new mm->context (via flush_tlb_mm) without the ASN serial
+	   number wrapping.  We have no way to detect when this is needed.  */
+	task_thread_info(next)->pcb.asn = mmc & HARDWARE_ASN_MASK;
+}
+
+__EXTERN_INLINE void
+ev4_switch_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm,
+	      struct task_struct *next)
+{
+	/* As described, ASN's are broken for TLB usage.  But we can
+	   optimize for switching between threads -- if the mm is
+	   unchanged from current we needn't flush.  */
+	/* ??? May not be needed because EV4 PALcode recognizes that
+	   ASN's are broken and does a tbiap itself on swpctx, under
+	   the "Must set ASN or flush" rule.  At least this is true
+	   for a 1992 SRM, reports Joseph Martin (jmartin@hlo.dec.com).
+	   I'm going to leave this here anyway, just to Be Sure.  -- r~  */
+	if (prev_mm != next_mm)
+		tbiap();
+
+	/* Do continue to allocate ASNs, because we can still use them
+	   to avoid flushing the icache.  */
+	ev5_switch_mm(prev_mm, next_mm, next);
+}
+
+extern void __load_new_mm_context(struct mm_struct *);
+
+#ifdef CONFIG_SMP
+#define check_mmu_context()					\
+do {								\
+	int cpu = smp_processor_id();				\
+	cpu_data[cpu].asn_lock = 0;				\
+	barrier();						\
+	if (cpu_data[cpu].need_new_asn) {			\
+		struct mm_struct * mm = current->active_mm;	\
+		cpu_data[cpu].need_new_asn = 0;			\
+		if (!mm->context[cpu])			\
+			__load_new_mm_context(mm);		\
+	}							\
+} while(0)
+#else
+#define check_mmu_context()  do { } while(0)
+#endif
+
+__EXTERN_INLINE void
+ev5_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm)
+{
+	__load_new_mm_context(next_mm);
+}
+
+__EXTERN_INLINE void
+ev4_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm)
+{
+	__load_new_mm_context(next_mm);
+	tbiap();
+}
+
+#ifdef CONFIG_ALPHA_GENERIC
+# define switch_mm(a,b,c)	alpha_mv.mv_switch_mm((a),(b),(c))
+# define activate_mm(x,y)	alpha_mv.mv_activate_mm((x),(y))
+#else
+# ifdef CONFIG_ALPHA_EV4
+#  define switch_mm(a,b,c)	ev4_switch_mm((a),(b),(c))
+#  define activate_mm(x,y)	ev4_activate_mm((x),(y))
+# else
+#  define switch_mm(a,b,c)	ev5_switch_mm((a),(b),(c))
+#  define activate_mm(x,y)	ev5_activate_mm((x),(y))
+# endif
+#endif
+
+#define init_new_context init_new_context
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int i;
+
+	for_each_online_cpu(i)
+		mm->context[i] = 0;
+	if (tsk != current)
+		task_thread_info(tsk)->pcb.ptbr
+		  = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
+	return 0;
+}
+
+#define enter_lazy_tlb enter_lazy_tlb
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	task_thread_info(tsk)->pcb.ptbr
+	  = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
+}
+
+#include <asm-generic/mmu_context.h>
+
+#ifdef __MMU_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __MMU_EXTERN_INLINE
+#endif
+
+#endif /* __ALPHA_MMU_CONTEXT_H */
diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
new file mode 100644
index 0000000000..9b521c8574
--- /dev/null
+++ b/arch/alpha/include/asm/mmzone.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99
+ * Adapted for the alpha wildfire architecture Jan 2001.
+ */
+#ifndef _ASM_MMZONE_H_
+#define _ASM_MMZONE_H_
+
+#include <asm/smp.h>
+
+/*
+ * Following are macros that are specific to this numa platform.
+ */
+
+extern pg_data_t node_data[];
+
+#define alpha_pa_to_nid(pa)		\
+        (alpha_mv.pa_to_nid 		\
+	 ? alpha_mv.pa_to_nid(pa)	\
+	 : (0))
+#define node_mem_start(nid)		\
+        (alpha_mv.node_mem_start 	\
+	 ? alpha_mv.node_mem_start(nid) \
+	 : (0UL))
+#define node_mem_size(nid)		\
+        (alpha_mv.node_mem_size 	\
+	 ? alpha_mv.node_mem_size(nid) 	\
+	 : ((nid) ? (0UL) : (~0UL)))
+
+#define pa_to_nid(pa)		alpha_pa_to_nid(pa)
+#define NODE_DATA(nid)		(&node_data[(nid)])
+
+#define node_localnr(pfn, nid)	((pfn) - NODE_DATA(nid)->node_start_pfn)
+
+#if 1
+#define PLAT_NODE_DATA_LOCALNR(p, n)	\
+	(((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn)
+#else
+static inline unsigned long
+PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
+{
+	unsigned long temp;
+	temp = p >> PAGE_SHIFT;
+	return temp - PLAT_NODE_DATA(n)->gendata.node_start_pfn;
+}
+#endif
+
+#ifdef CONFIG_DISCONTIGMEM
+
+/*
+ * Following are macros that each numa implementation must define.
+ */
+
+/*
+ * Given a kernel address, find the home node of the underlying memory.
+ */
+#define kvaddr_to_nid(kaddr)	pa_to_nid(__pa(kaddr))
+
+/*
+ * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory
+ * and returns the kaddr corresponding to first physical page in the
+ * node's mem_map.
+ */
+#define LOCAL_BASE_ADDR(kaddr)						  \
+    ((unsigned long)__va(NODE_DATA(kvaddr_to_nid(kaddr))->node_start_pfn  \
+			 << PAGE_SHIFT))
+
+/* XXX: FIXME -- nyc */
+#define kern_addr_valid(kaddr)	(0)
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+
+#define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> 32))
+#define pte_pfn(pte)		(pte_val(pte) >> 32)
+
+#define mk_pte(page, pgprot)						     \
+({								 	     \
+	pte_t pte;                                                           \
+	unsigned long pfn;                                                   \
+									     \
+	pfn = page_to_pfn(page) << 32; \
+	pte_val(pte) = pfn | pgprot_val(pgprot);			     \
+									     \
+	pte;								     \
+})
+
+#define pte_page(x)							\
+({									\
+       	unsigned long kvirt;						\
+	struct page * __xx;						\
+									\
+	kvirt = (unsigned long)__va(pte_val(x) >> (32-PAGE_SHIFT));	\
+	__xx = virt_to_page(kvirt);					\
+									\
+	__xx;                                                           \
+})
+
+#define page_to_pa(page)						\
+	(page_to_pfn(page) << PAGE_SHIFT)
+
+#define pfn_to_nid(pfn)		pa_to_nid(((u64)(pfn) << PAGE_SHIFT))
+#define pfn_valid(pfn)							\
+	(((pfn) - node_start_pfn(pfn_to_nid(pfn))) <			\
+	 node_spanned_pages(pfn_to_nid(pfn)))					\
+
+#define virt_addr_valid(kaddr)	pfn_valid((__pa(kaddr) >> PAGE_SHIFT))
+
+#endif /* CONFIG_DISCONTIGMEM */
+
+#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/alpha/include/asm/module.h b/arch/alpha/include/asm/module.h
new file mode 100644
index 0000000000..1e96e42f5d
--- /dev/null
+++ b/arch/alpha/include/asm/module.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_MODULE_H
+#define _ALPHA_MODULE_H
+
+#include <asm-generic/module.h>
+
+struct mod_arch_specific
+{
+	unsigned int gotsecindex;
+};
+
+#define ARCH_SHF_SMALL SHF_ALPHA_GPREL
+
+#ifdef MODULE
+asm(".section .got,\"aws\",@progbits; .align 3; .previous");
+#endif
+
+#endif /*_ALPHA_MODULE_H*/
diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
new file mode 100644
index 0000000000..18f48a6f2f
--- /dev/null
+++ b/arch/alpha/include/asm/page.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_PAGE_H
+#define _ALPHA_PAGE_H
+
+#include <linux/const.h>
+#include <asm/pal.h>
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	13
+#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#ifndef __ASSEMBLY__
+
+#define STRICT_MM_TYPECHECKS
+
+extern void clear_page(void *page);
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+
+#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vmaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+
+extern void copy_page(void * _to, void * _from);
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+#ifdef STRICT_MM_TYPECHECKS
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x)	((x).pte)
+#define pmd_val(x)	((x).pmd)
+#define pgd_val(x)	((x).pgd)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) } )
+#define __pmd(x)	((pmd_t) { (x) } )
+#define __pgd(x)	((pgd_t) { (x) } )
+#define __pgprot(x)	((pgprot_t) { (x) } )
+
+#else
+/*
+ * .. while these make it easier on the compiler
+ */
+typedef unsigned long pte_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pgd_t;
+typedef unsigned long pgprot_t;
+
+#define pte_val(x)	(x)
+#define pmd_val(x)	(x)
+#define pgd_val(x)	(x)
+#define pgprot_val(x)	(x)
+
+#define __pte(x)	(x)
+#define __pgd(x)	(x)
+#define __pgprot(x)	(x)
+
+#endif /* STRICT_MM_TYPECHECKS */
+
+typedef struct page *pgtable_t;
+
+#ifdef USE_48_BIT_KSEG
+#define PAGE_OFFSET		0xffff800000000000UL
+#else
+#define PAGE_OFFSET		0xfffffc0000000000UL
+#endif
+
+#else
+
+#ifdef USE_48_BIT_KSEG
+#define PAGE_OFFSET		0xffff800000000000
+#else
+#define PAGE_OFFSET		0xfffffc0000000000
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#define __pa(x)			((unsigned long) (x) - PAGE_OFFSET)
+#define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid((__pa(kaddr) >> PAGE_SHIFT))
+
+#ifdef CONFIG_FLATMEM
+#define pfn_valid(pfn)		((pfn) < max_mapnr)
+#endif /* CONFIG_FLATMEM */
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* _ALPHA_PAGE_H */
diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h
new file mode 100644
index 0000000000..db2b3b18b3
--- /dev/null
+++ b/arch/alpha/include/asm/pal.h
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_PAL_H
+#define __ALPHA_PAL_H
+
+#include <uapi/asm/pal.h>
+
+#ifndef __ASSEMBLY__
+
+extern void halt(void) __attribute__((noreturn));
+#define __halt() __asm__ __volatile__ ("call_pal %0 #halt" : : "i" (PAL_halt))
+
+#define imb() \
+__asm__ __volatile__ ("call_pal %0 #imb" : : "i" (PAL_imb) : "memory")
+
+#define draina() \
+__asm__ __volatile__ ("call_pal %0 #draina" : : "i" (PAL_draina) : "memory")
+
+#define __CALL_PAL_R0(NAME, TYPE)				\
+extern inline TYPE NAME(void)					\
+{								\
+	register TYPE __r0 __asm__("$0");			\
+	__asm__ __volatile__(					\
+		"call_pal %1 # " #NAME				\
+		:"=r" (__r0)					\
+		:"i" (PAL_ ## NAME)				\
+		:"$1", "$16", "$22", "$23", "$24", "$25");	\
+	return __r0;						\
+}
+
+#define __CALL_PAL_W1(NAME, TYPE0)				\
+extern inline void NAME(TYPE0 arg0)				\
+{								\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	__asm__ __volatile__(					\
+		"call_pal %1 # "#NAME				\
+		: "=r"(__r16)					\
+		: "i"(PAL_ ## NAME), "0"(__r16)			\
+		: "$1", "$22", "$23", "$24", "$25");		\
+}
+
+#define __CALL_PAL_W2(NAME, TYPE0, TYPE1)			\
+extern inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
+{								\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	register TYPE1 __r17 __asm__("$17") = arg1;		\
+	__asm__ __volatile__(					\
+		"call_pal %2 # "#NAME				\
+		: "=r"(__r16), "=r"(__r17)			\
+		: "i"(PAL_ ## NAME), "0"(__r16), "1"(__r17)	\
+		: "$1", "$22", "$23", "$24", "$25");		\
+}
+
+#define __CALL_PAL_RW1(NAME, RTYPE, TYPE0)			\
+extern inline RTYPE NAME(TYPE0 arg0)				\
+{								\
+	register RTYPE __r0 __asm__("$0");			\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	__asm__ __volatile__(					\
+		"call_pal %2 # "#NAME				\
+		: "=r"(__r16), "=r"(__r0)			\
+		: "i"(PAL_ ## NAME), "0"(__r16)			\
+		: "$1", "$22", "$23", "$24", "$25");		\
+	return __r0;						\
+}
+
+#define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1)		\
+extern inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1)		\
+{								\
+	register RTYPE __r0 __asm__("$0");			\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	register TYPE1 __r17 __asm__("$17") = arg1;		\
+	__asm__ __volatile__(					\
+		"call_pal %3 # "#NAME				\
+		: "=r"(__r16), "=r"(__r17), "=r"(__r0)		\
+		: "i"(PAL_ ## NAME), "0"(__r16), "1"(__r17)	\
+		: "$1", "$22", "$23", "$24", "$25");		\
+	return __r0;						\
+}
+
+__CALL_PAL_W1(cflush, unsigned long);
+__CALL_PAL_R0(rdmces, unsigned long);
+__CALL_PAL_R0(rdps, unsigned long);
+__CALL_PAL_R0(rdusp, unsigned long);
+__CALL_PAL_RW1(swpipl, unsigned long, unsigned long);
+__CALL_PAL_R0(whami, unsigned long);
+__CALL_PAL_W2(wrent, void*, unsigned long);
+__CALL_PAL_W1(wripir, unsigned long);
+__CALL_PAL_W1(wrkgp, unsigned long);
+__CALL_PAL_W1(wrmces, unsigned long);
+__CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long);
+__CALL_PAL_W1(wrusp, unsigned long);
+__CALL_PAL_W1(wrvptptr, unsigned long);
+__CALL_PAL_RW1(wtint, unsigned long, unsigned long);
+
+/*
+ * TB routines..
+ */
+#define __tbi(nr,arg,arg1...)					\
+({								\
+	register unsigned long __r16 __asm__("$16") = (nr);	\
+	register unsigned long __r17 __asm__("$17"); arg;	\
+	__asm__ __volatile__(					\
+		"call_pal %3 #__tbi"				\
+		:"=r" (__r16),"=r" (__r17)			\
+		:"0" (__r16),"i" (PAL_tbi) ,##arg1		\
+		:"$0", "$1", "$22", "$23", "$24", "$25");	\
+})
+
+#define tbi(x,y)	__tbi(x,__r17=(y),"1" (__r17))
+#define tbisi(x)	__tbi(1,__r17=(x),"1" (__r17))
+#define tbisd(x)	__tbi(2,__r17=(x),"1" (__r17))
+#define tbis(x)		__tbi(3,__r17=(x),"1" (__r17))
+#define tbiap()		__tbi(-1, /* no second argument */)
+#define tbia()		__tbi(-2, /* no second argument */)
+
+/*
+ * QEMU Cserv routines..
+ */
+
+static inline unsigned long
+qemu_get_walltime(void)
+{
+	register unsigned long v0 __asm__("$0");
+	register unsigned long a0 __asm__("$16") = 3;
+
+	asm("call_pal %2 # cserve get_time"
+	    : "=r"(v0), "+r"(a0)
+	    : "i"(PAL_cserve)
+	    : "$17", "$18", "$19", "$20", "$21");
+
+	return v0;
+}
+
+static inline unsigned long
+qemu_get_alarm(void)
+{
+	register unsigned long v0 __asm__("$0");
+	register unsigned long a0 __asm__("$16") = 4;
+
+	asm("call_pal %2 # cserve get_alarm"
+	    : "=r"(v0), "+r"(a0)
+	    : "i"(PAL_cserve)
+	    : "$17", "$18", "$19", "$20", "$21");
+
+	return v0;
+}
+
+static inline void
+qemu_set_alarm_rel(unsigned long expire)
+{
+	register unsigned long a0 __asm__("$16") = 5;
+	register unsigned long a1 __asm__("$17") = expire;
+
+	asm volatile("call_pal %2 # cserve set_alarm_rel"
+		     : "+r"(a0), "+r"(a1)
+		     : "i"(PAL_cserve)
+		     : "$0", "$18", "$19", "$20", "$21");
+}
+
+static inline void
+qemu_set_alarm_abs(unsigned long expire)
+{
+	register unsigned long a0 __asm__("$16") = 6;
+	register unsigned long a1 __asm__("$17") = expire;
+
+	asm volatile("call_pal %2 # cserve set_alarm_abs"
+		     : "+r"(a0), "+r"(a1)
+		     : "i"(PAL_cserve)
+		     : "$0", "$18", "$19", "$20", "$21");
+}
+
+static inline unsigned long
+qemu_get_vmtime(void)
+{
+	register unsigned long v0 __asm__("$0");
+	register unsigned long a0 __asm__("$16") = 7;
+
+	asm("call_pal %2 # cserve get_time"
+	    : "=r"(v0), "+r"(a0)
+	    : "i"(PAL_cserve)
+	    : "$17", "$18", "$19", "$20", "$21");
+
+	return v0;
+}
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __ALPHA_PAL_H */
diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h
new file mode 100644
index 0000000000..cfe947ce94
--- /dev/null
+++ b/arch/alpha/include/asm/param.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALPHA_PARAM_H
+#define _ASM_ALPHA_PARAM_H
+
+#include <uapi/asm/param.h>
+
+# undef HZ
+# define HZ		CONFIG_HZ
+# define USER_HZ	1024
+# define CLOCKS_PER_SEC	USER_HZ	/* frequency at which times() counts */
+
+#endif /* _ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/asm/parport.h b/arch/alpha/include/asm/parport.h
new file mode 100644
index 0000000000..0519a51e61
--- /dev/null
+++ b/arch/alpha/include/asm/parport.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * parport.h: platform-specific PC-style parport initialisation
+ *
+ * Copyright (C) 1999, 2000  Tim Waugh <tim@cyberelk.demon.co.uk>
+ *
+ * This file should only be included by drivers/parport/parport_pc.c.
+ */
+
+#ifndef _ASM_AXP_PARPORT_H
+#define _ASM_AXP_PARPORT_H 1
+
+static int parport_pc_find_isa_ports (int autoirq, int autodma);
+static int parport_pc_find_nonpci_ports (int autoirq, int autodma)
+{
+	return parport_pc_find_isa_ports (autoirq, autodma);
+}
+
+#endif /* !(_ASM_AXP_PARPORT_H) */
diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
new file mode 100644
index 0000000000..cf6bc1e64d
--- /dev/null
+++ b/arch/alpha/include/asm/pci.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_PCI_H
+#define __ALPHA_PCI_H
+
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <asm/machvec.h>
+
+/*
+ * The following structure is used to manage multiple PCI busses.
+ */
+
+struct pci_iommu_arena;
+struct page;
+
+/* A controller.  Used to manage multiple PCI busses.  */
+
+struct pci_controller {
+	struct pci_controller *next;
+        struct pci_bus *bus;
+	struct resource *io_space;
+	struct resource *mem_space;
+
+	/* The following are for reporting to userland.  The invariant is
+	   that if we report a BWX-capable dense memory, we do not report
+	   a sparse memory at all, even if it exists.  */
+	unsigned long sparse_mem_base;
+	unsigned long dense_mem_base;
+	unsigned long sparse_io_base;
+	unsigned long dense_io_base;
+
+	/* This one's for the kernel only.  It's in KSEG somewhere.  */
+	unsigned long config_space_base;
+
+	unsigned int index;
+	/* For compatibility with current (as of July 2003) pciutils
+	   and XFree86. Eventually will be removed. */
+	unsigned int need_domain_info;
+
+	struct pci_iommu_arena *sg_pci;
+	struct pci_iommu_arena *sg_isa;
+
+	void *sysdata;
+};
+
+/* Override the logic in pci_scan_bus for skipping already-configured
+   bus numbers.  */
+
+#define pcibios_assign_all_busses()	1
+
+#define PCIBIOS_MIN_IO		alpha_mv.min_io_address
+#define PCIBIOS_MIN_MEM		alpha_mv.min_mem_address
+
+/* IOMMU controls.  */
+
+/* TODO: integrate with include/asm-generic/pci.h ? */
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return channel ? 15 : 14;
+}
+
+#define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+	struct pci_controller *hose = bus->sysdata;
+	return hose->need_domain_info;
+}
+
+#endif /* __KERNEL__ */
+
+/* Values for the `which' argument to sys_pciconfig_iobase.  */
+#define IOBASE_HOSE		0
+#define IOBASE_SPARSE_MEM	1
+#define IOBASE_DENSE_MEM	2
+#define IOBASE_SPARSE_IO	3
+#define IOBASE_DENSE_IO		4
+#define IOBASE_ROOT_BUS		5
+#define IOBASE_FROM_HOSE	0x10000
+
+extern struct pci_dev *isa_bridge;
+
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+			   size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+			    size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+				   enum pci_mmap_state mmap_type);
+#define HAVE_PCI_LEGACY	1
+
+extern int pci_create_resource_files(struct pci_dev *dev);
+extern void pci_remove_resource_files(struct pci_dev *dev);
+
+#endif /* __ALPHA_PCI_H */
diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h
new file mode 100644
index 0000000000..6923249f2d
--- /dev/null
+++ b/arch/alpha/include/asm/percpu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_PERCPU_H
+#define __ALPHA_PERCPU_H
+
+/*
+ * To calculate addresses of locally defined variables, GCC uses
+ * 32-bit displacement from the GP. Which doesn't work for per cpu
+ * variables in modules, as an offset to the kernel per cpu area is
+ * way above 4G.
+ *
+ * Always use weak definitions for percpu variables in modules.
+ */
+#if defined(MODULE) && defined(CONFIG_SMP)
+#define ARCH_NEEDS_WEAK_PER_CPU
+#endif
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ALPHA_PERCPU_H */
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
new file mode 100644
index 0000000000..5996e7a675
--- /dev/null
+++ b/arch/alpha/include/asm/perf_event.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_ALPHA_PERF_EVENT_H
+#define __ASM_ALPHA_PERF_EVENT_H
+
+#endif /* __ASM_ALPHA_PERF_EVENT_H */
diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
new file mode 100644
index 0000000000..68be7adbfe
--- /dev/null
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_PGALLOC_H
+#define _ALPHA_PGALLOC_H
+
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+
+#include <asm-generic/pgalloc.h>
+
+/*      
+ * Allocate and free page tables. The xxx_kernel() versions are
+ * used to allocate a kernel page table - this turns on ASN bits
+ * if any.
+ */
+
+static inline void
+pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
+{
+	pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET));
+}
+
+static inline void
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+{
+	pmd_set(pmd, pte);
+}
+
+static inline void
+pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_set(pud, pmd);
+}
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+
+#endif /* _ALPHA_PGALLOC_H */
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
new file mode 100644
index 0000000000..02f0429f10
--- /dev/null
+++ b/arch/alpha/include/asm/pgtable.h
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_PGTABLE_H
+#define _ALPHA_PGTABLE_H
+
+#include <asm-generic/pgtable-nopud.h>
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the Alpha page table tree.
+ *
+ * This hopefully works with any standard Alpha page-size, as defined
+ * in <asm/page.h> (currently 8192).
+ */
+#include <linux/mmzone.h>
+
+#include <asm/page.h>
+#include <asm/processor.h>	/* For TASK_SIZE */
+#include <asm/machvec.h>
+#include <asm/setup.h>
+
+struct mm_struct;
+struct vm_area_struct;
+
+/* Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+/* PMD_SHIFT determines the size of the area a second-level page table can map */
+#define PMD_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3))
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a third-level page table entry can map */
+#define PGDIR_SHIFT	(PAGE_SHIFT + 2*(PAGE_SHIFT-3))
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/*
+ * Entries per page directory level:  the Alpha is three-level, with
+ * all levels having a one-page page table.
+ */
+#define PTRS_PER_PTE	(1UL << (PAGE_SHIFT-3))
+#define PTRS_PER_PMD	(1UL << (PAGE_SHIFT-3))
+#define PTRS_PER_PGD	(1UL << (PAGE_SHIFT-3))
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+
+/* Number of pointers that fit on a page:  this will go away. */
+#define PTRS_PER_PAGE	(1UL << (PAGE_SHIFT-3))
+
+#ifdef CONFIG_ALPHA_LARGE_VMALLOC
+#define VMALLOC_START		0xfffffe0000000000
+#else
+#define VMALLOC_START		(-2*PGDIR_SIZE)
+#endif
+#define VMALLOC_END		(-PGDIR_SIZE)
+
+/*
+ * OSF/1 PAL-code-imposed page table bits
+ */
+#define _PAGE_VALID	0x0001
+#define _PAGE_FOR	0x0002	/* used for page protection (fault on read) */
+#define _PAGE_FOW	0x0004	/* used for page protection (fault on write) */
+#define _PAGE_FOE	0x0008	/* used for page protection (fault on exec) */
+#define _PAGE_ASM	0x0010
+#define _PAGE_KRE	0x0100	/* xxx - see below on the "accessed" bit */
+#define _PAGE_URE	0x0200	/* xxx */
+#define _PAGE_KWE	0x1000	/* used to do the dirty bit in software */
+#define _PAGE_UWE	0x2000	/* used to do the dirty bit in software */
+
+/* .. and these are ours ... */
+#define _PAGE_DIRTY	0x20000
+#define _PAGE_ACCESSED	0x40000
+
+/*
+ * NOTE! The "accessed" bit isn't necessarily exact:  it can be kept exactly
+ * by software (use the KRE/URE/KWE/UWE bits appropriately), but I'll fake it.
+ * Under Linux/AXP, the "accessed" bit just means "read", and I'll just use
+ * the KRE/URE bits to watch for it. That way we don't need to overload the
+ * KWE/UWE bits with both handling dirty and accessed.
+ *
+ * Note that the kernel uses the accessed bit just to check whether to page
+ * out a page or not, so it doesn't have to be exact anyway.
+ */
+
+#define __DIRTY_BITS	(_PAGE_DIRTY | _PAGE_KWE | _PAGE_UWE)
+#define __ACCESS_BITS	(_PAGE_ACCESSED | _PAGE_KRE | _PAGE_URE)
+
+#define _PFN_MASK	0xFFFFFFFF00000000UL
+
+#define _PAGE_TABLE	(_PAGE_VALID | __DIRTY_BITS | __ACCESS_BITS)
+#define _PAGE_CHG_MASK	(_PFN_MASK | __DIRTY_BITS | __ACCESS_BITS)
+
+/*
+ * All the normal masks have the "page accessed" bits on, as any time they are used,
+ * the page is accessed. They are cleared only by the page-out routines
+ */
+#define PAGE_NONE	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOR | _PAGE_FOW | _PAGE_FOE)
+#define PAGE_SHARED	__pgprot(_PAGE_VALID | __ACCESS_BITS)
+#define PAGE_COPY	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW)
+#define PAGE_READONLY	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW)
+#define PAGE_KERNEL	__pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE)
+
+#define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x))
+
+#define _PAGE_P(x) _PAGE_NORMAL((x) | (((x) & _PAGE_FOW)?0:_PAGE_FOW))
+#define _PAGE_S(x) _PAGE_NORMAL(x)
+
+/*
+ * The hardware can handle write-only mappings, but as the Alpha
+ * architecture does byte-wide writes with a read-modify-write
+ * sequence, it's not practical to have write-without-read privs.
+ * Thus the "-w- -> rw-" and "-wx -> rwx" mapping here (and in
+ * arch/alpha/mm/fault.c)
+ */
+	/* xwr */
+#define __P000	_PAGE_P(_PAGE_FOE | _PAGE_FOW | _PAGE_FOR)
+#define __P001	_PAGE_P(_PAGE_FOE | _PAGE_FOW)
+#define __P010	_PAGE_P(_PAGE_FOE)
+#define __P011	_PAGE_P(_PAGE_FOE)
+#define __P100	_PAGE_P(_PAGE_FOW | _PAGE_FOR)
+#define __P101	_PAGE_P(_PAGE_FOW)
+#define __P110	_PAGE_P(0)
+#define __P111	_PAGE_P(0)
+
+#define __S000	_PAGE_S(_PAGE_FOE | _PAGE_FOW | _PAGE_FOR)
+#define __S001	_PAGE_S(_PAGE_FOE | _PAGE_FOW)
+#define __S010	_PAGE_S(_PAGE_FOE)
+#define __S011	_PAGE_S(_PAGE_FOE)
+#define __S100	_PAGE_S(_PAGE_FOW | _PAGE_FOR)
+#define __S101	_PAGE_S(_PAGE_FOW)
+#define __S110	_PAGE_S(0)
+#define __S111	_PAGE_S(0)
+
+/*
+ * pgprot_noncached() is only for infiniband pci support, and a real
+ * implementation for RAM would be more complicated.
+ */
+#define pgprot_noncached(prot)	(prot)
+
+/*
+ * BAD_PAGETABLE is used when we need a bogus page-table, while
+ * BAD_PAGE is used for a bogus page.
+ *
+ * ZERO_PAGE is a global shared page that is always zero:  used
+ * for zero-mapped memory areas etc..
+ */
+extern pte_t __bad_page(void);
+extern pmd_t * __bad_pagetable(void);
+
+extern unsigned long __zero_page(void);
+
+#define BAD_PAGETABLE	__bad_pagetable()
+#define BAD_PAGE	__bad_page()
+#define ZERO_PAGE(vaddr)	(virt_to_page(ZERO_PGE))
+
+/* number of bits that fit into a memory pointer */
+#define BITS_PER_PTR			(8*sizeof(unsigned long))
+
+/* to align the pointer to a pointer address */
+#define PTR_MASK			(~(sizeof(void*)-1))
+
+/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
+#define SIZEOF_PTR_LOG2			3
+
+/* to find an entry in a page-table */
+#define PAGE_PTR(address)		\
+  ((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK)
+
+/*
+ * On certain platforms whose physical address space can overlap KSEG,
+ * namely EV6 and above, we must re-twiddle the physaddr to restore the
+ * correct high-order bits.
+ *
+ * This is extremely confusing until you realize that this is actually
+ * just working around a userspace bug.  The X server was intending to
+ * provide the physical address but instead provided the KSEG address.
+ * Or tried to, except it's not representable.
+ * 
+ * On Tsunami there's nothing meaningful at 0x40000000000, so this is
+ * a safe thing to do.  Come the first core logic that does put something
+ * in this area -- memory or whathaveyou -- then this hack will have
+ * to go away.  So be prepared!
+ */
+
+#if defined(CONFIG_ALPHA_GENERIC) && defined(USE_48_BIT_KSEG)
+#error "EV6-only feature in a generic kernel"
+#endif
+#if defined(CONFIG_ALPHA_GENERIC) || \
+    (defined(CONFIG_ALPHA_EV6) && !defined(USE_48_BIT_KSEG))
+#define KSEG_PFN	(0xc0000000000UL >> PAGE_SHIFT)
+#define PHYS_TWIDDLE(pfn) \
+  ((((pfn) & KSEG_PFN) == (0x40000000000UL >> PAGE_SHIFT)) \
+  ? ((pfn) ^= KSEG_PFN) : (pfn))
+#else
+#define PHYS_TWIDDLE(pfn) (pfn)
+#endif
+
+/*
+ * Conversion functions:  convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+#define page_to_pa(page)	(page_to_pfn(page) << PAGE_SHIFT)
+#define pte_pfn(pte)	(pte_val(pte) >> 32)
+
+#define pte_page(pte)	pfn_to_page(pte_pfn(pte))
+#define mk_pte(page, pgprot)						\
+({									\
+	pte_t pte;							\
+									\
+	pte_val(pte) = (page_to_pfn(page) << 32) | pgprot_val(pgprot);	\
+	pte;								\
+})
+
+extern inline pte_t pfn_pte(unsigned long physpfn, pgprot_t pgprot)
+{ pte_t pte; pte_val(pte) = (PHYS_TWIDDLE(physpfn) << 32) | pgprot_val(pgprot); return pte; }
+
+extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }
+
+extern inline void pmd_set(pmd_t * pmdp, pte_t * ptep)
+{ pmd_val(*pmdp) = _PAGE_TABLE | ((((unsigned long) ptep) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
+
+extern inline void pud_set(pud_t * pudp, pmd_t * pmdp)
+{ pud_val(*pudp) = _PAGE_TABLE | ((((unsigned long) pmdp) - PAGE_OFFSET) << (32-PAGE_SHIFT)); }
+
+
+extern inline unsigned long
+pmd_page_vaddr(pmd_t pmd)
+{
+	return ((pmd_val(pmd) & _PFN_MASK) >> (32-PAGE_SHIFT)) + PAGE_OFFSET;
+}
+
+#define pmd_page(pmd)	(pfn_to_page(pmd_val(pmd) >> 32))
+#define pud_page(pud)	(pfn_to_page(pud_val(pud) >> 32))
+
+extern inline pmd_t *pud_pgtable(pud_t pgd)
+{
+	return (pmd_t *)(PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)));
+}
+
+extern inline int pte_none(pte_t pte)		{ return !pte_val(pte); }
+extern inline int pte_present(pte_t pte)	{ return pte_val(pte) & _PAGE_VALID; }
+extern inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_val(*ptep) = 0;
+}
+
+extern inline int pmd_none(pmd_t pmd)		{ return !pmd_val(pmd); }
+extern inline int pmd_bad(pmd_t pmd)		{ return (pmd_val(pmd) & ~_PFN_MASK) != _PAGE_TABLE; }
+extern inline int pmd_present(pmd_t pmd)	{ return pmd_val(pmd) & _PAGE_VALID; }
+extern inline void pmd_clear(pmd_t * pmdp)	{ pmd_val(*pmdp) = 0; }
+
+extern inline int pud_none(pud_t pud)		{ return !pud_val(pud); }
+extern inline int pud_bad(pud_t pud)		{ return (pud_val(pud) & ~_PFN_MASK) != _PAGE_TABLE; }
+extern inline int pud_present(pud_t pud)	{ return pud_val(pud) & _PAGE_VALID; }
+extern inline void pud_clear(pud_t * pudp)	{ pud_val(*pudp) = 0; }
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+extern inline int pte_write(pte_t pte)		{ return !(pte_val(pte) & _PAGE_FOW); }
+extern inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
+extern inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+
+extern inline pte_t pte_wrprotect(pte_t pte)	{ pte_val(pte) |= _PAGE_FOW; return pte; }
+extern inline pte_t pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~(__DIRTY_BITS); return pte; }
+extern inline pte_t pte_mkold(pte_t pte)	{ pte_val(pte) &= ~(__ACCESS_BITS); return pte; }
+extern inline pte_t pte_mkwrite(pte_t pte)	{ pte_val(pte) &= ~_PAGE_FOW; return pte; }
+extern inline pte_t pte_mkdirty(pte_t pte)	{ pte_val(pte) |= __DIRTY_BITS; return pte; }
+extern inline pte_t pte_mkyoung(pte_t pte)	{ pte_val(pte) |= __ACCESS_BITS; return pte; }
+
+/*
+ * The smp_rmb() in the following functions are required to order the load of
+ * *dir (the pointer in the top level page table) with any subsequent load of
+ * the returned pmd_t *ret (ret is data dependent on *dir).
+ *
+ * If this ordering is not enforced, the CPU might load an older value of
+ * *ret, which may be uninitialized data. See mm/memory.c:__pte_alloc for
+ * more details.
+ *
+ * Note that we never change the mm->pgd pointer after the task is running, so
+ * pgd_offset does not require such a barrier.
+ */
+
+/* Find an entry in the second-level page table.. */
+extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address)
+{
+	pmd_t *ret = pud_pgtable(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
+	smp_rmb(); /* see above */
+	return ret;
+}
+#define pmd_offset pmd_offset
+
+/* Find an entry in the third-level page table.. */
+extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address)
+{
+	pte_t *ret = (pte_t *) pmd_page_vaddr(*dir)
+		+ ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1));
+	smp_rmb(); /* see above */
+	return ret;
+}
+#define pte_offset_kernel pte_offset_kernel
+
+extern pgd_t swapper_pg_dir[1024];
+
+/*
+ * The Alpha doesn't have any external MMU info:  the kernel page
+ * tables contain all the necessary information.
+ */
+extern inline void update_mmu_cache(struct vm_area_struct * vma,
+	unsigned long address, pte_t *ptep)
+{
+}
+
+/*
+ * Non-present pages:  high 24 bits are offset, next 8 bits type,
+ * low 32 bits zero.
+ */
+extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
+{ pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; }
+
+#define __swp_type(x)		(((x).val >> 32) & 0xff)
+#define __swp_offset(x)		((x).val >> 40)
+#define __swp_entry(type, off)	((swp_entry_t) { pte_val(mk_swap_pte((type), (off))) })
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+
+#define kern_addr_valid(addr)	(1)
+
+#define pte_ERROR(e) \
+	printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+	printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+extern void paging_init(void);
+
+/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT.  */
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#endif /* _ALPHA_PGTABLE_H */
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
new file mode 100644
index 0000000000..090499c99c
--- /dev/null
+++ b/arch/alpha/include/asm/processor.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm-alpha/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_ALPHA_PROCESSOR_H
+#define __ASM_ALPHA_PROCESSOR_H
+
+#include <linux/personality.h>	/* for ADDR_LIMIT_32BIT */
+
+/*
+ * We have a 42-bit user address space: 4TB user VM...
+ */
+#define TASK_SIZE (0x40000000000UL)
+
+#define STACK_TOP \
+  (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL)
+
+#define STACK_TOP_MAX	0x00120000000UL
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE \
+  ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2)
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+/* This is dead.  Everything has been moved to thread_info.  */
+struct thread_struct { };
+#define INIT_THREAD  { }
+
+/* Do necessary setup to start up a newly executed thread.  */
+struct pt_regs;
+extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
+
+/* Free all resources held by a thread. */
+struct task_struct;
+extern void release_thread(struct task_struct *);
+
+unsigned long __get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
+
+#define KSTK_ESP(tsk) \
+  ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp)
+
+#define cpu_relax()	barrier()
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+#ifndef CONFIG_SMP
+/* Nothing to prefetch. */
+#define spin_lock_prefetch(lock)  	do { } while (0)
+#endif
+
+extern inline void prefetch(const void *ptr)  
+{ 
+	__builtin_prefetch(ptr, 0, 3);
+}
+
+extern inline void prefetchw(const void *ptr)  
+{
+	__builtin_prefetch(ptr, 1, 3);
+}
+
+#ifdef CONFIG_SMP
+extern inline void spin_lock_prefetch(const void *ptr)  
+{
+	__builtin_prefetch(ptr, 1, 3);
+}
+#endif
+
+#endif /* __ASM_ALPHA_PROCESSOR_H */
diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
new file mode 100644
index 0000000000..df5f317ab3
--- /dev/null
+++ b/arch/alpha/include/asm/ptrace.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASMAXP_PTRACE_H
+#define _ASMAXP_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+
+
+#define arch_has_single_step()		(1)
+#define user_mode(regs) (((regs)->ps & 8) != 0)
+#define instruction_pointer(regs) ((regs)->pc)
+#define profile_pc(regs) instruction_pointer(regs)
+#define current_user_stack_pointer() rdusp()
+
+#define task_pt_regs(task) \
+  ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1)
+
+#define current_pt_regs() \
+  ((struct pt_regs *) ((char *)current_thread_info() + 2*PAGE_SIZE) - 1)
+#define signal_pt_regs current_pt_regs
+
+#define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+	return regs->r0;
+}
+
+#endif
diff --git a/arch/alpha/include/asm/rwonce.h b/arch/alpha/include/asm/rwonce.h
new file mode 100644
index 0000000000..35542bcf92
--- /dev/null
+++ b/arch/alpha/include/asm/rwonce.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#ifdef CONFIG_SMP
+
+#include <asm/barrier.h>
+
+/*
+ * Alpha is apparently daft enough to reorder address-dependent loads
+ * on some CPU implementations. Knock some common sense into it with
+ * a memory barrier in READ_ONCE().
+ *
+ * For the curious, more information about this unusual reordering is
+ * available in chapter 15 of the "perfbook":
+ *
+ *  https://kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook.html
+ *
+ */
+#define __READ_ONCE(x)							\
+({									\
+	__unqual_scalar_typeof(x) __x =					\
+		(*(volatile typeof(__x) *)(&(x)));			\
+	mb();								\
+	(typeof(x))__x;							\
+})
+
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
diff --git a/arch/alpha/include/asm/serial.h b/arch/alpha/include/asm/serial.h
new file mode 100644
index 0000000000..9111544731
--- /dev/null
+++ b/arch/alpha/include/asm/serial.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm-alpha/serial.h
+ */
+
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ *
+ * It'd be nice if someone built a serial card with a 24.576 MHz
+ * clock, since the 16550A is capable of handling a top speed of 1.5
+ * megabits/second; but this requires the faster clock.
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+/* Standard COM flags (except for COM4, because of the 8514 problem) */
+#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_AUTO_IRQ)
+#define STD_COM4_FLAGS (UPF_BOOT_AUTOCONF | UPF_AUTO_IRQ)
+#else
+#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST)
+#define STD_COM4_FLAGS UPF_BOOT_AUTOCONF
+#endif
+
+#define SERIAL_PORT_DFNS			\
+	/* UART CLK   PORT IRQ     FLAGS        */			\
+	{ 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS },	/* ttyS0 */	\
+	{ 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS },	/* ttyS1 */	\
+	{ 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS },	/* ttyS2 */	\
+	{ 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS },	/* ttyS3 */
diff --git a/arch/alpha/include/asm/setup.h b/arch/alpha/include/asm/setup.h
new file mode 100644
index 0000000000..262aab99e3
--- /dev/null
+++ b/arch/alpha/include/asm/setup.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ALPHA_SETUP_H
+#define __ALPHA_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+/*
+ * We leave one page for the initial stack page, and one page for
+ * the initial process structure. Also, the console eats 3 MB for
+ * the initial bootloader (one of which we can reclaim later).
+ */
+#define BOOT_PCB	0x20000000
+#define BOOT_ADDR	0x20000000
+/* Remove when official MILO sources have ELF support: */
+#define BOOT_SIZE	(16*1024)
+
+#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS
+#define KERNEL_START_PHYS	0x300000 /* Old bootloaders hardcoded this.  */
+#else
+#define KERNEL_START_PHYS	0x1000000 /* required: Wildfire/Titan/Marvel */
+#endif
+
+#define KERNEL_START	(PAGE_OFFSET+KERNEL_START_PHYS)
+#define SWAPPER_PGD	KERNEL_START
+#define INIT_STACK	(PAGE_OFFSET+KERNEL_START_PHYS+0x02000)
+#define EMPTY_PGT	(PAGE_OFFSET+KERNEL_START_PHYS+0x04000)
+#define EMPTY_PGE	(PAGE_OFFSET+KERNEL_START_PHYS+0x08000)
+#define ZERO_PGE	(PAGE_OFFSET+KERNEL_START_PHYS+0x0A000)
+
+#define START_ADDR	(PAGE_OFFSET+KERNEL_START_PHYS+0x10000)
+
+/*
+ * This is setup by the secondary bootstrap loader.  Because
+ * the zero page is zeroed out as soon as the vm system is
+ * initialized, we need to copy things out into a more permanent
+ * place.
+ */
+#define PARAM			ZERO_PGE
+#define COMMAND_LINE		((char *)(absolute_pointer(PARAM + 0x0000)))
+#define INITRD_START		(*(unsigned long *) (PARAM+0x100))
+#define INITRD_SIZE		(*(unsigned long *) (PARAM+0x108))
+
+#endif
diff --git a/arch/alpha/include/asm/sfp-machine.h b/arch/alpha/include/asm/sfp-machine.h
new file mode 100644
index 0000000000..5fe63afbd4
--- /dev/null
+++ b/arch/alpha/include/asm/sfp-machine.h
@@ -0,0 +1,82 @@
+/* Machine-dependent software floating-point definitions.
+   Alpha kernel version.
+   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jakub@redhat.com) and
+		  David S. Miller (davem@redhat.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef _SFP_MACHINE_H
+#define _SFP_MACHINE_H
+   
+#define _FP_W_TYPE_SIZE		64
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R,X,Y)					\
+  _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y)
+#define _FP_MUL_MEAT_D(R,X,Y)					\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)					\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		_FP_QNANBIT_S
+#define _FP_NANFRAC_D		_FP_QNANBIT_D
+#define _FP_NANFRAC_Q		_FP_QNANBIT_Q
+#define _FP_NANSIGN_S		1
+#define _FP_NANSIGN_D		1
+#define _FP_NANSIGN_Q		1
+
+#define _FP_KEEPNANFRACP 1
+
+/* Alpha Architecture Handbook, 4.7.10.4 sais that
+ * we should prefer any type of NaN in Fb, then Fa.
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    R##_s = Y##_s;						\
+    _FP_FRAC_COPY_##wc(R,X);					\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE	mode
+#define FP_RND_NEAREST	(FPCR_DYN_NORMAL >> FPCR_DYN_SHIFT)
+#define FP_RND_ZERO	(FPCR_DYN_CHOPPED >> FPCR_DYN_SHIFT)
+#define FP_RND_PINF	(FPCR_DYN_PLUS >> FPCR_DYN_SHIFT)
+#define FP_RND_MINF	(FPCR_DYN_MINUS >> FPCR_DYN_SHIFT)
+
+/* Exception flags. */
+#define FP_EX_INVALID		IEEE_TRAP_ENABLE_INV
+#define FP_EX_OVERFLOW		IEEE_TRAP_ENABLE_OVF
+#define FP_EX_UNDERFLOW		IEEE_TRAP_ENABLE_UNF
+#define FP_EX_DIVZERO		IEEE_TRAP_ENABLE_DZE
+#define FP_EX_INEXACT		IEEE_TRAP_ENABLE_INE
+#define FP_EX_DENORM		IEEE_TRAP_ENABLE_DNO
+
+#define FP_DENORM_ZERO		(swcr & IEEE_MAP_DMZ)
+
+/* We write the results always */
+#define FP_INHIBIT_RESULTS 0
+
+#endif
diff --git a/arch/alpha/include/asm/shmparam.h b/arch/alpha/include/asm/shmparam.h
new file mode 100644
index 0000000000..0c04fde1ae
--- /dev/null
+++ b/arch/alpha/include/asm/shmparam.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASMAXP_SHMPARAM_H
+#define _ASMAXP_SHMPARAM_H
+
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
+
+#endif /* _ASMAXP_SHMPARAM_H */
diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h
new file mode 100644
index 0000000000..a40f020116
--- /dev/null
+++ b/arch/alpha/include/asm/signal.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASMAXP_SIGNAL_H
+#define _ASMAXP_SIGNAL_H
+
+#include <uapi/asm/signal.h>
+
+/* Digital Unix defines 64 signals.  Most things should be clean enough
+   to redefine this at will, if care is taken to make libc match.  */
+
+#define _NSIG		64
+#define _NSIG_BPW	64
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t;		/* at least 32 bits */
+
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+struct osf_sigaction {
+	__sighandler_t	sa_handler;
+	old_sigset_t	sa_mask;
+	int		sa_flags;
+};
+
+#define __ARCH_HAS_KA_RESTORER
+#include <asm/sigcontext.h>
+#endif
diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h
new file mode 100644
index 0000000000..2264ae7267
--- /dev/null
+++ b/arch/alpha/include/asm/smp.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/bitops.h>
+#include <asm/pal.h>
+
+/* HACK: Cabrio WHAMI return value is bogus if more than 8 bits used.. :-( */
+
+static __inline__ unsigned char
+__hard_smp_processor_id(void)
+{
+	register unsigned char __r0 __asm__("$0");
+	__asm__ __volatile__(
+		"call_pal %1 #whami"
+		: "=r"(__r0)
+		:"i" (PAL_whami)
+		: "$1", "$22", "$23", "$24", "$25");
+	return __r0;
+}
+
+#ifdef CONFIG_SMP
+
+#include <asm/irq.h>
+
+struct cpuinfo_alpha {
+	unsigned long loops_per_jiffy;
+	unsigned long last_asn;
+	int need_new_asn;
+	int asn_lock;
+	unsigned long ipi_count;
+	unsigned long prof_multiplier;
+	unsigned long prof_counter;
+	unsigned char mcheck_expected;
+	unsigned char mcheck_taken;
+	unsigned char mcheck_extra;
+} __attribute__((aligned(64)));
+
+extern struct cpuinfo_alpha cpu_data[NR_CPUS];
+
+#define hard_smp_processor_id()	__hard_smp_processor_id()
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
+
+extern int smp_num_cpus;
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+#else /* CONFIG_SMP */
+
+#define hard_smp_processor_id()		0
+#define smp_call_function_on_cpu(func,info,wait,cpu)    ({ 0; })
+
+#endif /* CONFIG_SMP */
+
+#define NO_PROC_ID	(-1)
+
+#endif
diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
new file mode 100644
index 0000000000..76a32817e7
--- /dev/null
+++ b/arch/alpha/include/asm/socket.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <uapi/asm/socket.h>
+
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK	0x40000000
+#endif /* _ASM_SOCKET_H */
diff --git a/arch/alpha/include/asm/sparsemem.h b/arch/alpha/include/asm/sparsemem.h
new file mode 100644
index 0000000000..a0820fd2d4
--- /dev/null
+++ b/arch/alpha/include/asm/sparsemem.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALPHA_SPARSEMEM_H
+#define _ASM_ALPHA_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+
+#define SECTION_SIZE_BITS	27
+
+/*
+ * According to "Alpha Architecture Reference Manual" physical
+ * addresses are at most 48 bits.
+ * https://download.majix.org/dec/alpha_arch_ref.pdf
+ */
+#define MAX_PHYSMEM_BITS	48
+
+#endif /* CONFIG_SPARSEMEM */
+
+#endif /* _ASM_ALPHA_SPARSEMEM_H */
diff --git a/arch/alpha/include/asm/special_insns.h b/arch/alpha/include/asm/special_insns.h
new file mode 100644
index 0000000000..ca2c5c30b2
--- /dev/null
+++ b/arch/alpha/include/asm/special_insns.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_SPECIAL_INSNS_H
+#define __ALPHA_SPECIAL_INSNS_H
+
+enum implver_enum {
+	IMPLVER_EV4,
+	IMPLVER_EV5,
+	IMPLVER_EV6
+};
+
+#ifdef CONFIG_ALPHA_GENERIC
+#define implver()				\
+({ unsigned long __implver;			\
+   __asm__ ("implver %0" : "=r"(__implver));	\
+   (enum implver_enum) __implver; })
+#else
+/* Try to eliminate some dead code.  */
+#ifdef CONFIG_ALPHA_EV4
+#define implver() IMPLVER_EV4
+#endif
+#ifdef CONFIG_ALPHA_EV5
+#define implver() IMPLVER_EV5
+#endif
+#if defined(CONFIG_ALPHA_EV6)
+#define implver() IMPLVER_EV6
+#endif
+#endif
+
+enum amask_enum {
+	AMASK_BWX = (1UL << 0),
+	AMASK_FIX = (1UL << 1),
+	AMASK_CIX = (1UL << 2),
+	AMASK_MAX = (1UL << 8),
+	AMASK_PRECISE_TRAP = (1UL << 9),
+};
+
+#define amask(mask)						\
+({ unsigned long __amask, __input = (mask);			\
+   __asm__ ("amask %1,%0" : "=r"(__amask) : "rI"(__input));	\
+   __amask; })
+
+#endif /* __ALPHA_SPECIAL_INSNS_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
new file mode 100644
index 0000000000..1221cbb86a
--- /dev/null
+++ b/arch/alpha/include/asm/spinlock.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_SPINLOCK_H
+#define _ALPHA_SPINLOCK_H
+
+#include <linux/kernel.h>
+#include <asm/current.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define arch_spin_is_locked(x)	((x)->lock != 0)
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+        return lock.lock == 0;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t * lock)
+{
+	mb();
+	lock->lock = 0;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t * lock)
+{
+	long tmp;
+
+	__asm__ __volatile__(
+	"1:	ldl_l	%0,%1\n"
+	"	bne	%0,2f\n"
+	"	lda	%0,1\n"
+	"	stl_c	%0,%1\n"
+	"	beq	%0,2f\n"
+	"	mb\n"
+	".subsection 2\n"
+	"2:	ldl	%0,%1\n"
+	"	bne	%0,2b\n"
+	"	br	1b\n"
+	".previous"
+	: "=&r" (tmp), "=m" (lock->lock)
+	: "m"(lock->lock) : "memory");
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	return !test_and_set_bit(0, &lock->lock);
+}
+
+/***********************************************************/
+
+static inline void arch_read_lock(arch_rwlock_t *lock)
+{
+	long regx;
+
+	__asm__ __volatile__(
+	"1:	ldl_l	%1,%0\n"
+	"	blbs	%1,6f\n"
+	"	subl	%1,2,%1\n"
+	"	stl_c	%1,%0\n"
+	"	beq	%1,6f\n"
+	"	mb\n"
+	".subsection 2\n"
+	"6:	ldl	%1,%0\n"
+	"	blbs	%1,6b\n"
+	"	br	1b\n"
+	".previous"
+	: "=m" (*lock), "=&r" (regx)
+	: "m" (*lock) : "memory");
+}
+
+static inline void arch_write_lock(arch_rwlock_t *lock)
+{
+	long regx;
+
+	__asm__ __volatile__(
+	"1:	ldl_l	%1,%0\n"
+	"	bne	%1,6f\n"
+	"	lda	%1,1\n"
+	"	stl_c	%1,%0\n"
+	"	beq	%1,6f\n"
+	"	mb\n"
+	".subsection 2\n"
+	"6:	ldl	%1,%0\n"
+	"	bne	%1,6b\n"
+	"	br	1b\n"
+	".previous"
+	: "=m" (*lock), "=&r" (regx)
+	: "m" (*lock) : "memory");
+}
+
+static inline int arch_read_trylock(arch_rwlock_t * lock)
+{
+	long regx;
+	int success;
+
+	__asm__ __volatile__(
+	"1:	ldl_l	%1,%0\n"
+	"	lda	%2,0\n"
+	"	blbs	%1,2f\n"
+	"	subl	%1,2,%2\n"
+	"	stl_c	%2,%0\n"
+	"	beq	%2,6f\n"
+	"2:	mb\n"
+	".subsection 2\n"
+	"6:	br	1b\n"
+	".previous"
+	: "=m" (*lock), "=&r" (regx), "=&r" (success)
+	: "m" (*lock) : "memory");
+
+	return success;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t * lock)
+{
+	long regx;
+	int success;
+
+	__asm__ __volatile__(
+	"1:	ldl_l	%1,%0\n"
+	"	lda	%2,0\n"
+	"	bne	%1,2f\n"
+	"	lda	%2,1\n"
+	"	stl_c	%2,%0\n"
+	"	beq	%2,6f\n"
+	"2:	mb\n"
+	".subsection 2\n"
+	"6:	br	1b\n"
+	".previous"
+	: "=m" (*lock), "=&r" (regx), "=&r" (success)
+	: "m" (*lock) : "memory");
+
+	return success;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t * lock)
+{
+	long regx;
+	__asm__ __volatile__(
+	"	mb\n"
+	"1:	ldl_l	%1,%0\n"
+	"	addl	%1,2,%1\n"
+	"	stl_c	%1,%0\n"
+	"	beq	%1,6f\n"
+	".subsection 2\n"
+	"6:	br	1b\n"
+	".previous"
+	: "=m" (*lock), "=&r" (regx)
+	: "m" (*lock) : "memory");
+}
+
+static inline void arch_write_unlock(arch_rwlock_t * lock)
+{
+	mb();
+	lock->lock = 0;
+}
+
+#endif /* _ALPHA_SPINLOCK_H */
diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
new file mode 100644
index 0000000000..2526fd3be5
--- /dev/null
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_SPINLOCK_TYPES_H
+#define _ALPHA_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif
diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h
new file mode 100644
index 0000000000..f043f91ff9
--- /dev/null
+++ b/arch/alpha/include/asm/string.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_STRING_H__
+#define __ALPHA_STRING_H__
+
+#ifdef __KERNEL__
+
+/*
+ * GCC of any recent vintage doesn't do stupid things with bcopy.
+ * EGCS 1.1 knows all about expanding memcpy inline, others don't.
+ *
+ * Similarly for a memset with data = 0.
+ */
+
+#define __HAVE_ARCH_MEMCPY
+extern void * memcpy(void *, const void *, size_t);
+#define __HAVE_ARCH_MEMMOVE
+extern void * memmove(void *, const void *, size_t);
+
+/* For backward compatibility with modules.  Unused otherwise.  */
+extern void * __memcpy(void *, const void *, size_t);
+
+#define memcpy __builtin_memcpy
+
+#define __HAVE_ARCH_MEMSET
+extern void * __constant_c_memset(void *, unsigned long, size_t);
+extern void * ___memset(void *, int, size_t);
+extern void * __memset(void *, int, size_t);
+extern void * memset(void *, int, size_t);
+
+/* For gcc 3.x, we cannot have the inline function named "memset" because
+   the __builtin_memset will attempt to resolve to the inline as well,
+   leading to a "sorry" about unimplemented recursive inlining.  */
+extern inline void *__memset(void *s, int c, size_t n)
+{
+	if (__builtin_constant_p(c)) {
+		if (__builtin_constant_p(n)) {
+			return __builtin_memset(s, c, n);
+		} else {
+			unsigned long c8 = (c & 0xff) * 0x0101010101010101UL;
+			return __constant_c_memset(s, c8, n);
+		}
+	}
+	return ___memset(s, c, n);
+}
+
+#define memset __memset
+
+#define __HAVE_ARCH_STRCPY
+extern char * strcpy(char *,const char *);
+#define __HAVE_ARCH_STRNCPY
+extern char * strncpy(char *, const char *, size_t);
+#define __HAVE_ARCH_STRCAT
+extern char * strcat(char *, const char *);
+#define __HAVE_ARCH_STRNCAT
+extern char * strncat(char *, const char *, size_t);
+#define __HAVE_ARCH_STRCHR
+extern char * strchr(const char *,int);
+#define __HAVE_ARCH_STRRCHR
+extern char * strrchr(const char *,int);
+#define __HAVE_ARCH_STRLEN
+extern size_t strlen(const char *);
+#define __HAVE_ARCH_MEMCHR
+extern void * memchr(const void *, int, size_t);
+
+/* The following routine is like memset except that it writes 16-bit
+   aligned values.  The DEST and COUNT parameters must be even for 
+   correct operation.  */
+
+#define __HAVE_ARCH_MEMSET16
+extern void * __memset16(void *dest, unsigned short, size_t count);
+static inline void *memset16(uint16_t *p, uint16_t v, size_t n)
+{
+	if (__builtin_constant_p(v))
+		return __constant_c_memset(p, 0x0001000100010001UL * v, n * 2);
+	return __memset16(p, v, n * 2);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ALPHA_STRING_H__ */
diff --git a/arch/alpha/include/asm/switch_to.h b/arch/alpha/include/asm/switch_to.h
new file mode 100644
index 0000000000..762b7f9753
--- /dev/null
+++ b/arch/alpha/include/asm/switch_to.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_SWITCH_TO_H
+#define __ALPHA_SWITCH_TO_H
+
+
+struct task_struct;
+extern struct task_struct *alpha_switch_to(unsigned long, struct task_struct *);
+
+#define switch_to(P,N,L)						 \
+  do {									 \
+    (L) = alpha_switch_to(virt_to_phys(&task_thread_info(N)->pcb), (P)); \
+    check_mmu_context();						 \
+  } while (0)
+
+#endif /* __ALPHA_SWITCH_TO_H */
diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h
new file mode 100644
index 0000000000..f21babaeed
--- /dev/null
+++ b/arch/alpha/include/asm/syscall.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALPHA_SYSCALL_H
+#define _ASM_ALPHA_SYSCALL_H
+
+#include <uapi/linux/audit.h>
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+	return AUDIT_ARCH_ALPHA;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->r0;
+}
+
+#endif	/* _ASM_ALPHA_SYSCALL_H */
diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h
new file mode 100644
index 0000000000..b7c77bb1bf
--- /dev/null
+++ b/arch/alpha/include/asm/termios.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_TERMIOS_H
+#define _ALPHA_TERMIOS_H
+
+#include <uapi/asm/termios.h>
+
+/*	eof=^D		eol=\0		eol2=\0		erase=del
+	werase=^W	kill=^U		reprint=^R	sxtc=\0
+	intr=^C		quit=^\		susp=^Z		<OSF/1 VDSUSP>
+	start=^Q	stop=^S		lnext=^V	discard=^U
+	vmin=\1		vtime=\0
+*/
+#define INIT_C_CC "\004\000\000\177\027\025\022\000\003\034\032\000\021\023\026\025\001\000"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+
+#define user_termio_to_kernel_termios(a_termios, u_termio)			\
+({										\
+	struct ktermios *k_termios = (a_termios);				\
+	struct termio k_termio;							\
+	int canon, ret;								\
+										\
+	ret = copy_from_user(&k_termio, u_termio, sizeof(k_termio));		\
+	if (!ret) {								\
+		/* Overwrite only the low bits.  */				\
+		*(unsigned short *)&k_termios->c_iflag = k_termio.c_iflag;	\
+		*(unsigned short *)&k_termios->c_oflag = k_termio.c_oflag;	\
+		*(unsigned short *)&k_termios->c_cflag = k_termio.c_cflag;	\
+		*(unsigned short *)&k_termios->c_lflag = k_termio.c_lflag;	\
+		canon = k_termio.c_lflag & ICANON;				\
+										\
+		k_termios->c_cc[VINTR]  = k_termio.c_cc[_VINTR];		\
+		k_termios->c_cc[VQUIT]  = k_termio.c_cc[_VQUIT];		\
+		k_termios->c_cc[VERASE] = k_termio.c_cc[_VERASE];		\
+		k_termios->c_cc[VKILL]  = k_termio.c_cc[_VKILL];		\
+		k_termios->c_cc[VEOL2]  = k_termio.c_cc[_VEOL2];		\
+		k_termios->c_cc[VSWTC]  = k_termio.c_cc[_VSWTC];		\
+		k_termios->c_cc[canon ? VEOF : VMIN]  = k_termio.c_cc[_VEOF];	\
+		k_termios->c_cc[canon ? VEOL : VTIME] = k_termio.c_cc[_VEOL];	\
+	}									\
+	ret;									\
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ *
+ * Note the "fun" _VMIN overloading.
+ */
+#define kernel_termios_to_user_termio(u_termio, a_termios)		\
+({									\
+	struct ktermios *k_termios = (a_termios);			\
+	struct termio k_termio;						\
+	int canon;							\
+									\
+	k_termio.c_iflag = k_termios->c_iflag;				\
+	k_termio.c_oflag = k_termios->c_oflag;				\
+	k_termio.c_cflag = k_termios->c_cflag;				\
+	canon = (k_termio.c_lflag = k_termios->c_lflag) & ICANON;	\
+									\
+	k_termio.c_line = k_termios->c_line;				\
+	k_termio.c_cc[_VINTR]  = k_termios->c_cc[VINTR];		\
+	k_termio.c_cc[_VQUIT]  = k_termios->c_cc[VQUIT];		\
+	k_termio.c_cc[_VERASE] = k_termios->c_cc[VERASE];		\
+	k_termio.c_cc[_VKILL]  = k_termios->c_cc[VKILL];		\
+	k_termio.c_cc[_VEOF]   = k_termios->c_cc[canon ? VEOF : VMIN];	\
+	k_termio.c_cc[_VEOL]   = k_termios->c_cc[canon ? VEOL : VTIME];	\
+	k_termio.c_cc[_VEOL2]  = k_termios->c_cc[VEOL2];		\
+	k_termio.c_cc[_VSWTC]  = k_termios->c_cc[VSWTC];		\
+									\
+	copy_to_user(u_termio, &k_termio, sizeof(k_termio));		\
+})
+
+#define user_termios_to_kernel_termios(k, u) \
+	copy_from_user(k, u, sizeof(struct termios2))
+
+#define kernel_termios_to_user_termios(u, k) \
+	copy_to_user(u, k, sizeof(struct termios2))
+
+#define user_termios_to_kernel_termios_1(k, u) \
+	copy_from_user(k, u, sizeof(struct termios))
+
+#define kernel_termios_to_user_termios_1(u, k) \
+	copy_to_user(u, k, sizeof(struct termios))
+
+#endif	/* _ALPHA_TERMIOS_H */
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
new file mode 100644
index 0000000000..2592356e32
--- /dev/null
+++ b/arch/alpha/include/asm/thread_info.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_THREAD_INFO_H
+#define _ALPHA_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/types.h>
+#include <asm/hwrpb.h>
+#include <asm/sysinfo.h>
+#endif
+
+#ifndef __ASSEMBLY__
+struct thread_info {
+	struct pcb_struct	pcb;		/* palcode state */
+
+	struct task_struct	*task;		/* main task structure */
+	unsigned int		flags;		/* low level flags */
+	unsigned int		ieee_state;	/* see fpu.h */
+
+	mm_segment_t		addr_limit;	/* thread address space */
+	unsigned		cpu;		/* current CPU */
+	int			preempt_count; /* 0 => preemptable, <0 => BUG */
+	unsigned int		status;		/* thread-synchronous flags */
+
+	int bpt_nsaved;
+	unsigned long bpt_addr[2];		/* breakpoint handling  */
+	unsigned int bpt_insn[2];
+};
+
+/*
+ * Macros/functions for gaining access to the thread information structure.
+ */
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.addr_limit	= KERNEL_DS,		\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+}
+
+/* How to get the thread information struct from C.  */
+register struct thread_info *__current_thread_info __asm__("$8");
+#define current_thread_info()  __current_thread_info
+
+#endif /* __ASSEMBLY__ */
+
+/* Thread information allocation.  */
+#define THREAD_SIZE_ORDER 1
+#define THREAD_SIZE (2*PAGE_SIZE)
+
+/*
+ * Thread information flags:
+ * - these are process state flags and used from assembly
+ * - pending work-to-be-done flags come first and must be assigned to be
+ *   within bits 0 to 7 to fit in and immediate operand.
+ *
+ * TIF_SYSCALL_TRACE is known to be 0 via blbs.
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SYSCALL_AUDIT	4	/* syscall audit active */
+#define TIF_NOTIFY_SIGNAL	5	/* signal notifications exist */
+#define TIF_DIE_IF_KERNEL	9	/* dik recursion lock */
+#define TIF_MEMDIE		13	/* is terminating due to OOM killer */
+#define TIF_POLLING_NRFLAG	14	/* idle is polling for TIF_NEED_RESCHED */
+
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_NOTIFY_SIGNAL	(1<<TIF_NOTIFY_SIGNAL)
+#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+
+/* Work to do on interrupt/exception return.  */
+#define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+				 _TIF_NOTIFY_RESUME)
+
+/* Work to do on any return to userspace.  */
+#define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK		\
+				 | _TIF_SYSCALL_TRACE)
+
+#define TS_UAC_NOPRINT		0x0001	/* ! Preserve the following three */
+#define TS_UAC_NOFIX		0x0002	/* ! flags as they match          */
+#define TS_UAC_SIGBUS		0x0004	/* ! userspace part of 'osf_sysinfo' */
+
+#define SET_UNALIGN_CTL(task,value)	({				\
+	__u32 status = task_thread_info(task)->status & ~UAC_BITMASK;	\
+	if (value & PR_UNALIGN_NOPRINT)					\
+		status |= TS_UAC_NOPRINT;				\
+	if (value & PR_UNALIGN_SIGBUS)					\
+		status |= TS_UAC_SIGBUS;				\
+	if (value & 4)	/* alpha-specific */				\
+		status |= TS_UAC_NOFIX;					\
+	task_thread_info(task)->status = status;			\
+	0; })
+
+#define GET_UNALIGN_CTL(task,value)	({				\
+	__u32 status = task_thread_info(task)->status & ~UAC_BITMASK;	\
+	__u32 res = 0;							\
+	if (status & TS_UAC_NOPRINT)					\
+		res |= PR_UNALIGN_NOPRINT;				\
+	if (status & TS_UAC_SIGBUS)					\
+		res |= PR_UNALIGN_SIGBUS;				\
+	if (status & TS_UAC_NOFIX)					\
+		res |= 4;						\
+	put_user(res, (int __user *)(value));				\
+	})
+
+#endif /* __KERNEL__ */
+#endif /* _ALPHA_THREAD_INFO_H */
diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h
new file mode 100644
index 0000000000..b565cc6f40
--- /dev/null
+++ b/arch/alpha/include/asm/timex.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/include/asm-alpha/timex.h
+ *
+ * ALPHA architecture timex specifications
+ */
+#ifndef _ASMALPHA_TIMEX_H
+#define _ASMALPHA_TIMEX_H
+
+/* With only one or two oddballs, we use the RTC as the ticker, selecting
+   the 32.768kHz reference clock, which nicely divides down to our HZ.  */
+#define CLOCK_TICK_RATE	32768
+
+/*
+ * Standard way to access the cycle counter.
+ * Currently only used on SMP for scheduling.
+ *
+ * Only the low 32 bits are available as a continuously counting entity. 
+ * But this only means we'll force a reschedule every 8 seconds or so,
+ * which isn't an evil thing.
+ */
+
+typedef unsigned int cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+	cycles_t ret;
+	__asm__ __volatile__ ("rpcc %0" : "=r"(ret));
+	return ret;
+}
+
+#endif
diff --git a/arch/alpha/include/asm/tlb.h b/arch/alpha/include/asm/tlb.h
new file mode 100644
index 0000000000..4f79e331af
--- /dev/null
+++ b/arch/alpha/include/asm/tlb.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_TLB_H
+#define _ALPHA_TLB_H
+
+#include <asm-generic/tlb.h>
+
+#define __pte_free_tlb(tlb, pte, address)		pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, address)		pmd_free((tlb)->mm, pmd)
+ 
+#endif
diff --git a/arch/alpha/include/asm/tlbflush.h b/arch/alpha/include/asm/tlbflush.h
new file mode 100644
index 0000000000..94dc37cf87
--- /dev/null
+++ b/arch/alpha/include/asm/tlbflush.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_TLBFLUSH_H
+#define _ALPHA_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/compiler.h>
+
+#ifndef __EXTERN_INLINE
+#define __EXTERN_INLINE extern inline
+#define __MMU_EXTERN_INLINE
+#endif
+
+extern void __load_new_mm_context(struct mm_struct *);
+
+
+/* Use a few helper functions to hide the ugly broken ASN
+   numbers on early Alphas (ev4 and ev45).  */
+
+__EXTERN_INLINE void
+ev4_flush_tlb_current(struct mm_struct *mm)
+{
+	__load_new_mm_context(mm);
+	tbiap();
+}
+
+__EXTERN_INLINE void
+ev5_flush_tlb_current(struct mm_struct *mm)
+{
+	__load_new_mm_context(mm);
+}
+
+/* Flush just one page in the current TLB set.  We need to be very
+   careful about the icache here, there is no way to invalidate a
+   specific icache page.  */
+
+__EXTERN_INLINE void
+ev4_flush_tlb_current_page(struct mm_struct * mm,
+			   struct vm_area_struct *vma,
+			   unsigned long addr)
+{
+	int tbi_flag = 2;
+	if (vma->vm_flags & VM_EXEC) {
+		__load_new_mm_context(mm);
+		tbi_flag = 3;
+	}
+	tbi(tbi_flag, addr);
+}
+
+__EXTERN_INLINE void
+ev5_flush_tlb_current_page(struct mm_struct * mm,
+			   struct vm_area_struct *vma,
+			   unsigned long addr)
+{
+	if (vma->vm_flags & VM_EXEC)
+		__load_new_mm_context(mm);
+	else
+		tbi(2, addr);
+}
+
+
+#ifdef CONFIG_ALPHA_GENERIC
+# define flush_tlb_current		alpha_mv.mv_flush_tlb_current
+# define flush_tlb_current_page		alpha_mv.mv_flush_tlb_current_page
+#else
+# ifdef CONFIG_ALPHA_EV4
+#  define flush_tlb_current		ev4_flush_tlb_current
+#  define flush_tlb_current_page	ev4_flush_tlb_current_page
+# else
+#  define flush_tlb_current		ev5_flush_tlb_current
+#  define flush_tlb_current_page	ev5_flush_tlb_current_page
+# endif
+#endif
+
+#ifdef __MMU_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __MMU_EXTERN_INLINE
+#endif
+
+/* Flush current user mapping.  */
+static inline void
+flush_tlb(void)
+{
+	flush_tlb_current(current->active_mm);
+}
+
+/* Flush someone else's user mapping.  */
+static inline void
+flush_tlb_other(struct mm_struct *mm)
+{
+	unsigned long *mmc = &mm->context[smp_processor_id()];
+	/* Check it's not zero first to avoid cacheline ping pong
+	   when possible.  */
+	if (*mmc) *mmc = 0;
+}
+
+#ifndef CONFIG_SMP
+/* Flush everything (kernel mapping may also have changed
+   due to vmalloc/vfree).  */
+static inline void flush_tlb_all(void)
+{
+	tbia();
+}
+
+/* Flush a specified user mapping.  */
+static inline void
+flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		flush_tlb_current(mm);
+	else
+		flush_tlb_other(mm);
+}
+
+/* Page-granular tlb flush.  */
+static inline void
+flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm == current->active_mm)
+		flush_tlb_current_page(mm, vma, addr);
+	else
+		flush_tlb_other(mm);
+}
+
+/* Flush a specified range of user mapping.  On the Alpha we flush
+   the whole user tlb.  */
+static inline void
+flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end)
+{
+	flush_tlb_mm(vma->vm_mm);
+}
+
+#else /* CONFIG_SMP */
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+extern void flush_tlb_range(struct vm_area_struct *, unsigned long,
+			    unsigned long);
+
+#endif /* CONFIG_SMP */
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					unsigned long end)
+{
+	flush_tlb_all();
+}
+
+#endif /* _ALPHA_TLBFLUSH_H */
diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
new file mode 100644
index 0000000000..7d393036aa
--- /dev/null
+++ b/arch/alpha/include/asm/topology.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALPHA_TOPOLOGY_H
+#define _ASM_ALPHA_TOPOLOGY_H
+
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/numa.h>
+#include <asm/machvec.h>
+
+# include <asm-generic/topology.h>
+
+#endif /* _ASM_ALPHA_TOPOLOGY_H */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
new file mode 100644
index 0000000000..b60b602764
--- /dev/null
+++ b/arch/alpha/include/asm/types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_TYPES_H
+#define _ALPHA_TYPES_H
+
+#include <uapi/asm/types.h>
+
+#endif /* _ALPHA_TYPES_H */
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
new file mode 100644
index 0000000000..1b6f25efa2
--- /dev/null
+++ b/arch/alpha/include/asm/uaccess.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ALPHA_UACCESS_H
+#define __ALPHA_UACCESS_H
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * Or at least it did once upon a time.  Nowadays it is a mask that
+ * defines which bits of the address space are off limits.  This is a
+ * wee bit faster than the above.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define KERNEL_DS	((mm_segment_t) { 0UL })
+#define USER_DS		((mm_segment_t) { -0x40000000000UL })
+
+#define get_fs()  (current_thread_info()->addr_limit)
+#define set_fs(x) (current_thread_info()->addr_limit = (x))
+
+#define uaccess_kernel()	(get_fs().seg == KERNEL_DS.seg)
+
+/*
+ * Is a address valid? This does a straightforward calculation rather
+ * than tests.
+ *
+ * Address valid if:
+ *  - "addr" doesn't have any high-bits set
+ *  - AND "size" doesn't have any high-bits set
+ *  - AND "addr+size-(size != 0)" doesn't have any high-bits set
+ *  - OR we are in kernel mode.
+ */
+#define __access_ok(addr, size) ({				\
+	unsigned long __ao_a = (addr), __ao_b = (size);		\
+	unsigned long __ao_end = __ao_a + __ao_b - !!__ao_b;	\
+	(get_fs().seg & (__ao_a | __ao_b | __ao_end)) == 0; })
+
+#define access_ok(addr, size)				\
+({							\
+	__chk_user_ptr(addr);				\
+	__access_ok(((unsigned long)(addr)), (size));	\
+})
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * As the alpha uses the same address space for kernel and user
+ * data, we can just do these as direct assignments.  (Of course, the
+ * exception handling means that it's no longer "just"...)
+ *
+ * Careful to not
+ * (a) re-use the arguments for side effects (sizeof/typeof is ok)
+ * (b) require any knowledge of processes at this stage
+ */
+#define put_user(x, ptr) \
+  __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define get_user(x, ptr) \
+  __get_user_check((x), (ptr), sizeof(*(ptr)))
+
+/*
+ * The "__xxx" versions do not do address space checking, useful when
+ * doing multiple accesses to the same area (the programmer has to do the
+ * checks by hand with "access_ok()")
+ */
+#define __put_user(x, ptr) \
+  __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr) \
+  __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+  
+/*
+ * The "lda %1, 2b-1b(%0)" bits are magic to get the assembler to
+ * encode the bits we need for resolving the exception.  See the
+ * more extensive comments with fixup_inline_exception below for
+ * more information.
+ */
+#define EXC(label,cont,res,err)				\
+	".section __ex_table,\"a\"\n"			\
+	"	.long "#label"-.\n"			\
+	"	lda "#res","#cont"-"#label"("#err")\n"	\
+	".previous\n"
+
+extern void __get_user_unknown(void);
+
+#define __get_user_nocheck(x, ptr, size)			\
+({								\
+	long __gu_err = 0;					\
+	unsigned long __gu_val;					\
+	__chk_user_ptr(ptr);					\
+	switch (size) {						\
+	  case 1: __get_user_8(ptr); break;			\
+	  case 2: __get_user_16(ptr); break;			\
+	  case 4: __get_user_32(ptr); break;			\
+	  case 8: __get_user_64(ptr); break;			\
+	  default: __get_user_unknown(); break;			\
+	}							\
+	(x) = (__force __typeof__(*(ptr))) __gu_val;		\
+	__gu_err;						\
+})
+
+#define __get_user_check(x, ptr, size)				\
+({								\
+	long __gu_err = -EFAULT;				\
+	unsigned long __gu_val = 0;				\
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
+	if (__access_ok((unsigned long)__gu_addr, size)) {	\
+		__gu_err = 0;					\
+		switch (size) {					\
+		  case 1: __get_user_8(__gu_addr); break;	\
+		  case 2: __get_user_16(__gu_addr); break;	\
+		  case 4: __get_user_32(__gu_addr); break;	\
+		  case 8: __get_user_64(__gu_addr); break;	\
+		  default: __get_user_unknown(); break;		\
+		}						\
+	}							\
+	(x) = (__force __typeof__(*(ptr))) __gu_val;		\
+	__gu_err;						\
+})
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct __user *)(x))
+
+#define __get_user_64(addr)				\
+	__asm__("1: ldq %0,%2\n"			\
+	"2:\n"						\
+	EXC(1b,2b,%0,%1)				\
+		: "=r"(__gu_val), "=r"(__gu_err)	\
+		: "m"(__m(addr)), "1"(__gu_err))
+
+#define __get_user_32(addr)				\
+	__asm__("1: ldl %0,%2\n"			\
+	"2:\n"						\
+	EXC(1b,2b,%0,%1)				\
+		: "=r"(__gu_val), "=r"(__gu_err)	\
+		: "m"(__m(addr)), "1"(__gu_err))
+
+#ifdef __alpha_bwx__
+/* Those lucky bastards with ev56 and later CPUs can do byte/word moves.  */
+
+#define __get_user_16(addr)				\
+	__asm__("1: ldwu %0,%2\n"			\
+	"2:\n"						\
+	EXC(1b,2b,%0,%1)				\
+		: "=r"(__gu_val), "=r"(__gu_err)	\
+		: "m"(__m(addr)), "1"(__gu_err))
+
+#define __get_user_8(addr)				\
+	__asm__("1: ldbu %0,%2\n"			\
+	"2:\n"						\
+	EXC(1b,2b,%0,%1)				\
+		: "=r"(__gu_val), "=r"(__gu_err)	\
+		: "m"(__m(addr)), "1"(__gu_err))
+#else
+/* Unfortunately, we can't get an unaligned access trap for the sub-word
+   load, so we have to do a general unaligned operation.  */
+
+#define __get_user_16(addr)						\
+{									\
+	long __gu_tmp;							\
+	__asm__("1: ldq_u %0,0(%3)\n"					\
+	"2:	ldq_u %1,1(%3)\n"					\
+	"	extwl %0,%3,%0\n"					\
+	"	extwh %1,%3,%1\n"					\
+	"	or %0,%1,%0\n"						\
+	"3:\n"								\
+	EXC(1b,3b,%0,%2)						\
+	EXC(2b,3b,%0,%2)						\
+		: "=&r"(__gu_val), "=&r"(__gu_tmp), "=r"(__gu_err)	\
+		: "r"(addr), "2"(__gu_err));				\
+}
+
+#define __get_user_8(addr)						\
+	__asm__("1: ldq_u %0,0(%2)\n"					\
+	"	extbl %0,%2,%0\n"					\
+	"2:\n"								\
+	EXC(1b,2b,%0,%1)						\
+		: "=&r"(__gu_val), "=r"(__gu_err)			\
+		: "r"(addr), "1"(__gu_err))
+#endif
+
+extern void __put_user_unknown(void);
+
+#define __put_user_nocheck(x, ptr, size)			\
+({								\
+	long __pu_err = 0;					\
+	__chk_user_ptr(ptr);					\
+	switch (size) {						\
+	  case 1: __put_user_8(x, ptr); break;			\
+	  case 2: __put_user_16(x, ptr); break;			\
+	  case 4: __put_user_32(x, ptr); break;			\
+	  case 8: __put_user_64(x, ptr); break;			\
+	  default: __put_user_unknown(); break;			\
+	}							\
+	__pu_err;						\
+})
+
+#define __put_user_check(x, ptr, size)				\
+({								\
+	long __pu_err = -EFAULT;				\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
+	if (__access_ok((unsigned long)__pu_addr, size)) {	\
+		__pu_err = 0;					\
+		switch (size) {					\
+		  case 1: __put_user_8(x, __pu_addr); break;	\
+		  case 2: __put_user_16(x, __pu_addr); break;	\
+		  case 4: __put_user_32(x, __pu_addr); break;	\
+		  case 8: __put_user_64(x, __pu_addr); break;	\
+		  default: __put_user_unknown(); break;		\
+		}						\
+	}							\
+	__pu_err;						\
+})
+
+/*
+ * The "__put_user_xx()" macros tell gcc they read from memory
+ * instead of writing: this is because they do not write to
+ * any memory gcc knows about, so there are no aliasing issues
+ */
+#define __put_user_64(x, addr)					\
+__asm__ __volatile__("1: stq %r2,%1\n"				\
+	"2:\n"							\
+	EXC(1b,2b,$31,%0)					\
+		: "=r"(__pu_err)				\
+		: "m" (__m(addr)), "rJ" (x), "0"(__pu_err))
+
+#define __put_user_32(x, addr)					\
+__asm__ __volatile__("1: stl %r2,%1\n"				\
+	"2:\n"							\
+	EXC(1b,2b,$31,%0)					\
+		: "=r"(__pu_err)				\
+		: "m"(__m(addr)), "rJ"(x), "0"(__pu_err))
+
+#ifdef __alpha_bwx__
+/* Those lucky bastards with ev56 and later CPUs can do byte/word moves.  */
+
+#define __put_user_16(x, addr)					\
+__asm__ __volatile__("1: stw %r2,%1\n"				\
+	"2:\n"							\
+	EXC(1b,2b,$31,%0)					\
+		: "=r"(__pu_err)				\
+		: "m"(__m(addr)), "rJ"(x), "0"(__pu_err))
+
+#define __put_user_8(x, addr)					\
+__asm__ __volatile__("1: stb %r2,%1\n"				\
+	"2:\n"							\
+	EXC(1b,2b,$31,%0)					\
+		: "=r"(__pu_err)				\
+		: "m"(__m(addr)), "rJ"(x), "0"(__pu_err))
+#else
+/* Unfortunately, we can't get an unaligned access trap for the sub-word
+   write, so we have to do a general unaligned operation.  */
+
+#define __put_user_16(x, addr)					\
+{								\
+	long __pu_tmp1, __pu_tmp2, __pu_tmp3, __pu_tmp4;	\
+	__asm__ __volatile__(					\
+	"1:	ldq_u %2,1(%5)\n"				\
+	"2:	ldq_u %1,0(%5)\n"				\
+	"	inswh %6,%5,%4\n"				\
+	"	inswl %6,%5,%3\n"				\
+	"	mskwh %2,%5,%2\n"				\
+	"	mskwl %1,%5,%1\n"				\
+	"	or %2,%4,%2\n"					\
+	"	or %1,%3,%1\n"					\
+	"3:	stq_u %2,1(%5)\n"				\
+	"4:	stq_u %1,0(%5)\n"				\
+	"5:\n"							\
+	EXC(1b,5b,$31,%0)					\
+	EXC(2b,5b,$31,%0)					\
+	EXC(3b,5b,$31,%0)					\
+	EXC(4b,5b,$31,%0)					\
+		: "=r"(__pu_err), "=&r"(__pu_tmp1), 		\
+		  "=&r"(__pu_tmp2), "=&r"(__pu_tmp3), 		\
+		  "=&r"(__pu_tmp4)				\
+		: "r"(addr), "r"((unsigned long)(x)), "0"(__pu_err)); \
+}
+
+#define __put_user_8(x, addr)					\
+{								\
+	long __pu_tmp1, __pu_tmp2;				\
+	__asm__ __volatile__(					\
+	"1:	ldq_u %1,0(%4)\n"				\
+	"	insbl %3,%4,%2\n"				\
+	"	mskbl %1,%4,%1\n"				\
+	"	or %1,%2,%1\n"					\
+	"2:	stq_u %1,0(%4)\n"				\
+	"3:\n"							\
+	EXC(1b,3b,$31,%0)					\
+	EXC(2b,3b,$31,%0)					\
+		: "=r"(__pu_err), 				\
+	  	  "=&r"(__pu_tmp1), "=&r"(__pu_tmp2)		\
+		: "r"((unsigned long)(x)), "r"(addr), "0"(__pu_err)); \
+}
+#endif
+
+
+/*
+ * Complex access routines
+ */
+
+extern long __copy_user(void *to, const void *from, long len);
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long len)
+{
+	return __copy_user(to, (__force const void *)from, len);
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long len)
+{
+	return __copy_user((__force void *)to, from, len);
+}
+
+extern long __clear_user(void __user *to, long len);
+
+extern inline long
+clear_user(void __user *to, long len)
+{
+	if (__access_ok((unsigned long)to, len))
+		len = __clear_user(to, len);
+	return len;
+}
+
+#define user_addr_max() \
+        (uaccess_kernel() ? ~0UL : TASK_SIZE)
+
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
+#include <asm/extable.h>
+
+#endif /* __ALPHA_UACCESS_H */
diff --git a/arch/alpha/include/asm/ucontext.h b/arch/alpha/include/asm/ucontext.h
new file mode 100644
index 0000000000..af1f3465b8
--- /dev/null
+++ b/arch/alpha/include/asm/ucontext.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASMAXP_UCONTEXT_H
+#define _ASMAXP_UCONTEXT_H
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	old_sigset_t	  uc_osf_sigmask;
+	stack_t		  uc_stack;
+	struct sigcontext uc_mcontext;
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
+#endif /* !_ASMAXP_UCONTEXT_H */
diff --git a/arch/alpha/include/asm/unaligned.h b/arch/alpha/include/asm/unaligned.h
new file mode 100644
index 0000000000..863c807b66
--- /dev/null
+++ b/arch/alpha/include/asm/unaligned.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALPHA_UNALIGNED_H
+#define _ASM_ALPHA_UNALIGNED_H
+
+#include <linux/unaligned/le_struct.h>
+#include <linux/unaligned/be_byteshift.h>
+#include <linux/unaligned/generic.h>
+
+#define get_unaligned __get_unaligned_le
+#define put_unaligned __put_unaligned_le
+
+#endif /* _ASM_ALPHA_UNALIGNED_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
new file mode 100644
index 0000000000..986f5da9b7
--- /dev/null
+++ b/arch/alpha/include/asm/unistd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_UNISTD_H
+#define _ALPHA_UNISTD_H
+
+#include <uapi/asm/unistd.h>
+
+#define NR_SYSCALLS	__NR_syscalls
+
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
+
+#endif /* _ALPHA_UNISTD_H */
diff --git a/arch/alpha/include/asm/user.h b/arch/alpha/include/asm/user.h
new file mode 100644
index 0000000000..3df37492c7
--- /dev/null
+++ b/arch/alpha/include/asm/user.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_USER_H
+#define _ALPHA_USER_H
+
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+
+#include <asm/page.h>
+#include <asm/reg.h>
+
+/*
+ * Core file format: The core file is written in such a way that gdb
+ * can understand it and provide useful information to the user (under
+ * linux we use the `trad-core' bfd, NOT the osf-core).  The file contents
+ * are as follows:
+ *
+ *  upage: 1 page consisting of a user struct that tells gdb
+ *	what is present in the file.  Directly after this is a
+ *	copy of the task_struct, which is currently not used by gdb,
+ *	but it may come in handy at some point.  All of the registers
+ *	are stored as part of the upage.  The upage should always be
+ *	only one page long.
+ *  data: The data segment follows next.  We use current->end_text to
+ *	current->brk to pick up all of the user variables, plus any memory
+ *	that may have been sbrk'ed.  No attempt is made to determine if a
+ *	page is demand-zero or if a page is totally unused, we just cover
+ *	the entire range.  All of the addresses are rounded in such a way
+ *	that an integral number of pages is written.
+ *  stack: We need the stack information in order to get a meaningful
+ *	backtrace.  We need to write the data from usp to
+ *	current->start_stack, so we round each of these in order to be able
+ *	to write an integer number of pages.
+ */
+struct user {
+	unsigned long	regs[EF_SIZE/8+32];	/* integer and fp regs */
+	size_t		u_tsize;		/* text size (pages) */
+	size_t		u_dsize;		/* data size (pages) */
+	size_t		u_ssize;		/* stack size (pages) */
+	unsigned long	start_code;		/* text starting address */
+	unsigned long	start_data;		/* data starting address */
+	unsigned long	start_stack;		/* stack starting address */
+	long int	signal;			/* signal causing core dump */
+	unsigned long	u_ar0;			/* help gdb find registers */
+	unsigned long	magic;			/* identifies a core file */
+	char		u_comm[32];		/* user command name */
+};
+
+#define NBPG			PAGE_SIZE
+#define UPAGES			1
+#define HOST_TEXT_START_ADDR	(u.start_code)
+#define HOST_DATA_START_ADDR	(u.start_data)
+#define HOST_STACK_END_ADDR	(u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _ALPHA_USER_H */
diff --git a/arch/alpha/include/asm/vga.h b/arch/alpha/include/asm/vga.h
new file mode 100644
index 0000000000..4c347a8454
--- /dev/null
+++ b/arch/alpha/include/asm/vga.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _LINUX_ASM_VGA_H_
+#define _LINUX_ASM_VGA_H_
+
+#include <asm/io.h>
+
+#define VT_BUF_HAVE_RW
+#define VT_BUF_HAVE_MEMSETW
+#define VT_BUF_HAVE_MEMCPYW
+
+static inline void scr_writew(u16 val, volatile u16 *addr)
+{
+	if (__is_ioaddr(addr))
+		__raw_writew(val, (volatile u16 __iomem *) addr);
+	else
+		*addr = val;
+}
+
+static inline u16 scr_readw(volatile const u16 *addr)
+{
+	if (__is_ioaddr(addr))
+		return __raw_readw((volatile const u16 __iomem *) addr);
+	else
+		return *addr;
+}
+
+static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
+{
+	if (__is_ioaddr(s))
+		memsetw_io((u16 __iomem *) s, c, count);
+	else
+		memset16(s, c, count / 2);
+}
+
+/* Do not trust that the usage will be correct; analyze the arguments.  */
+extern void scr_memcpyw(u16 *d, const u16 *s, unsigned int count);
+
+/* ??? These are currently only used for downloading character sets.  As
+   such, they don't need memory barriers.  Is this all they are intended
+   to be used for?  */
+#define vga_readb(a)	readb((u8 __iomem *)(a))
+#define vga_writeb(v,a)	writeb(v, (u8 __iomem *)(a))
+
+#ifdef CONFIG_VGA_HOSE
+#include <linux/ioport.h>
+#include <linux/pci.h>
+
+extern struct pci_controller *pci_vga_hose;
+
+# define __is_port_vga(a)       \
+	(((a) >= 0x3b0) && ((a) < 0x3e0) && \
+	 ((a) != 0x3b3) && ((a) != 0x3d3))
+
+# define __is_mem_vga(a) \
+	(((a) >= 0xa0000) && ((a) <= 0xc0000))
+
+# define FIXUP_IOADDR_VGA(a) do {                       \
+	if (pci_vga_hose && __is_port_vga(a))     \
+		(a) += pci_vga_hose->io_space->start;	  \
+ } while(0)
+
+# define FIXUP_MEMADDR_VGA(a) do {                       \
+	if (pci_vga_hose && __is_mem_vga(a))     \
+		(a) += pci_vga_hose->mem_space->start; \
+ } while(0)
+
+#else /* CONFIG_VGA_HOSE */
+# define pci_vga_hose 0
+# define __is_port_vga(a) 0
+# define __is_mem_vga(a) 0
+# define FIXUP_IOADDR_VGA(a)
+# define FIXUP_MEMADDR_VGA(a)
+#endif /* CONFIG_VGA_HOSE */
+
+#define VGA_MAP_MEM(x,s)	((unsigned long) ioremap(x, s))
+
+#endif
diff --git a/arch/alpha/include/asm/vmalloc.h b/arch/alpha/include/asm/vmalloc.h
new file mode 100644
index 0000000000..0a9a366a4d
--- /dev/null
+++ b/arch/alpha/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ALPHA_VMALLOC_H
+#define _ASM_ALPHA_VMALLOC_H
+
+#endif /* _ASM_ALPHA_VMALLOC_H */
diff --git a/arch/alpha/include/asm/word-at-a-time.h b/arch/alpha/include/asm/word-at-a-time.h
new file mode 100644
index 0000000000..4035265f1a
--- /dev/null
+++ b/arch/alpha/include/asm/word-at-a-time.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <asm/compiler.h>
+
+/*
+ * word-at-a-time interface for Alpha.
+ */
+
+/*
+ * We do not use the word_at_a_time struct on Alpha, but it needs to be
+ * implemented to humour the generic code.
+ */
+struct word_at_a_time {
+	const unsigned long unused;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { 0 }
+
+/* Return nonzero if val has a zero */
+static inline unsigned long has_zero(unsigned long val, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long zero_locations = __kernel_cmpbge(0, val);
+	*bits = zero_locations;
+	return zero_locations;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long val, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long find_zero(unsigned long bits)
+{
+#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
+	/* Simple if have CIX instructions */
+	return __kernel_cttz(bits);
+#else
+	unsigned long t1, t2, t3;
+	/* Retain lowest set bit only */
+	bits &= -bits;
+	/* Binary search for lowest set bit */
+	t1 = bits & 0xf0;
+	t2 = bits & 0xcc;
+	t3 = bits & 0xaa;
+	if (t1) t1 = 4;
+	if (t2) t2 = 2;
+	if (t3) t3 = 1;
+	return t1 + t2 + t3;
+#endif
+}
+
+#define zero_bytemask(mask) ((2ul << (find_zero(mask) * 8)) - 1)
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/alpha/include/asm/wrperfmon.h b/arch/alpha/include/asm/wrperfmon.h
new file mode 100644
index 0000000000..c97b82a1f7
--- /dev/null
+++ b/arch/alpha/include/asm/wrperfmon.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definitions for use with the Alpha wrperfmon PAL call.
+ */
+
+#ifndef __ALPHA_WRPERFMON_H
+#define __ALPHA_WRPERFMON_H
+
+/* Following commands are implemented on all CPUs */
+#define PERFMON_CMD_DISABLE 0
+#define PERFMON_CMD_ENABLE 1
+#define PERFMON_CMD_DESIRED_EVENTS 2
+#define PERFMON_CMD_LOGGING_OPTIONS 3
+/* Following commands on EV5/EV56/PCA56 only */
+#define PERFMON_CMD_INT_FREQ 4
+#define PERFMON_CMD_ENABLE_CLEAR 7
+/* Following commands are on EV5 and better CPUs */
+#define PERFMON_CMD_READ 5
+#define PERFMON_CMD_WRITE 6
+/* Following command are on EV6 and better CPUs */
+#define PERFMON_CMD_ENABLE_WRITE 7
+/* Following command are on EV67 and better CPUs */
+#define PERFMON_CMD_I_STAT 8
+#define PERFMON_CMD_PMPC 9
+
+
+/* EV5/EV56/PCA56 Counters */
+#define EV5_PCTR_0 (1UL<<0)
+#define EV5_PCTR_1 (1UL<<1)
+#define EV5_PCTR_2 (1UL<<2)
+
+#define EV5_PCTR_0_COUNT_SHIFT 48
+#define EV5_PCTR_1_COUNT_SHIFT 32
+#define EV5_PCTR_2_COUNT_SHIFT 16
+
+#define EV5_PCTR_0_COUNT_MASK 0xffffUL
+#define EV5_PCTR_1_COUNT_MASK 0xffffUL
+#define EV5_PCTR_2_COUNT_MASK 0x3fffUL
+
+/* EV6 Counters */
+#define EV6_PCTR_0 (1UL<<0)
+#define EV6_PCTR_1 (1UL<<1)
+
+#define EV6_PCTR_0_COUNT_SHIFT 28
+#define EV6_PCTR_1_COUNT_SHIFT 6
+
+#define EV6_PCTR_0_COUNT_MASK 0xfffffUL
+#define EV6_PCTR_1_COUNT_MASK 0xfffffUL
+
+/* EV67 (and subsequent) counters */
+#define EV67_PCTR_0 (1UL<<0)
+#define EV67_PCTR_1 (1UL<<1)
+
+#define EV67_PCTR_0_COUNT_SHIFT 28
+#define EV67_PCTR_1_COUNT_SHIFT 6
+
+#define EV67_PCTR_0_COUNT_MASK 0xfffffUL
+#define EV67_PCTR_1_COUNT_MASK 0xfffffUL
+
+
+/*
+ * The Alpha Architecure Handbook, vers. 4 (1998) appears to have a misprint
+ *  in Table E-23 regarding the bits that set the event PCTR 1 counts.
+ *  Hopefully what we have here is correct.
+ */
+#define EV6_PCTR_0_EVENT_MASK 0x10UL
+#define EV6_PCTR_1_EVENT_MASK 0x0fUL
+
+/* EV6 Events */
+#define EV6_PCTR_0_CYCLES (0UL << 4)
+#define EV6_PCTR_0_INSTRUCTIONS (1UL << 4)
+
+#define EV6_PCTR_1_CYCLES 0
+#define EV6_PCTR_1_BRANCHES 1
+#define EV6_PCTR_1_BRANCH_MISPREDICTS 2
+#define EV6_PCTR_1_DTB_SINGLE_MISSES 3
+#define EV6_PCTR_1_DTB_DOUBLE_MISSES 4
+#define EV6_PCTR_1_ITB_MISSES 5
+#define EV6_PCTR_1_UNALIGNED_TRAPS 6
+#define EV6_PCTR_1_REPLY_TRAPS 7
+
+/* From the Alpha Architecture Reference Manual, 4th edn., 2002 */
+#define EV67_PCTR_MODE_MASK 0x10UL
+#define EV67_PCTR_EVENT_MASK 0x0CUL
+
+#define EV67_PCTR_MODE_PROFILEME (1UL<<4)
+#define EV67_PCTR_MODE_AGGREGATE (0UL<<4)
+
+#define EV67_PCTR_INSTR_CYCLES (0UL<<2)
+#define EV67_PCTR_CYCLES_UNDEF (1UL<<2)
+#define EV67_PCTR_INSTR_BCACHEMISS (2UL<<2)
+#define EV67_PCTR_CYCLES_MBOX (3UL<<2)
+
+#endif
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
new file mode 100644
index 0000000000..7adb80c674
--- /dev/null
+++ b/arch/alpha/include/asm/xchg.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ALPHA_CMPXCHG_H
+#error Do not include xchg.h directly!
+#else
+/*
+ * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code
+ * except that local version do not have the expensive memory barrier.
+ * So this file is included twice from asm/cmpxchg.h.
+ */
+
+/*
+ * Atomic exchange.
+ * Since it can be used to implement critical sections
+ * it must clobber "memory" (also for interrupts in UP).
+ */
+
+static inline unsigned long
+____xchg(_u8, volatile char *m, unsigned long val)
+{
+	unsigned long ret, tmp, addr64;
+
+	__asm__ __volatile__(
+	"	andnot	%4,7,%3\n"
+	"	insbl	%1,%4,%1\n"
+	"1:	ldq_l	%2,0(%3)\n"
+	"	extbl	%2,%4,%0\n"
+	"	mskbl	%2,%4,%2\n"
+	"	or	%1,%2,%2\n"
+	"	stq_c	%2,0(%3)\n"
+	"	beq	%2,2f\n"
+	".subsection 2\n"
+	"2:	br	1b\n"
+	".previous"
+	: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+	: "r" ((long)m), "1" (val) : "memory");
+
+	return ret;
+}
+
+static inline unsigned long
+____xchg(_u16, volatile short *m, unsigned long val)
+{
+	unsigned long ret, tmp, addr64;
+
+	__asm__ __volatile__(
+	"	andnot	%4,7,%3\n"
+	"	inswl	%1,%4,%1\n"
+	"1:	ldq_l	%2,0(%3)\n"
+	"	extwl	%2,%4,%0\n"
+	"	mskwl	%2,%4,%2\n"
+	"	or	%1,%2,%2\n"
+	"	stq_c	%2,0(%3)\n"
+	"	beq	%2,2f\n"
+	".subsection 2\n"
+	"2:	br	1b\n"
+	".previous"
+	: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+	: "r" ((long)m), "1" (val) : "memory");
+
+	return ret;
+}
+
+static inline unsigned long
+____xchg(_u32, volatile int *m, unsigned long val)
+{
+	unsigned long dummy;
+
+	__asm__ __volatile__(
+	"1:	ldl_l %0,%4\n"
+	"	bis $31,%3,%1\n"
+	"	stl_c %1,%2\n"
+	"	beq %1,2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	: "=&r" (val), "=&r" (dummy), "=m" (*m)
+	: "rI" (val), "m" (*m) : "memory");
+
+	return val;
+}
+
+static inline unsigned long
+____xchg(_u64, volatile long *m, unsigned long val)
+{
+	unsigned long dummy;
+
+	__asm__ __volatile__(
+	"1:	ldq_l %0,%4\n"
+	"	bis $31,%3,%1\n"
+	"	stq_c %1,%2\n"
+	"	beq %1,2f\n"
+	".subsection 2\n"
+	"2:	br 1b\n"
+	".previous"
+	: "=&r" (val), "=&r" (dummy), "=m" (*m)
+	: "rI" (val), "m" (*m) : "memory");
+
+	return val;
+}
+
+/* This function doesn't exist, so you'll get a linker error
+   if something tries to do an invalid xchg().  */
+extern void __xchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+____xchg(, volatile void *ptr, unsigned long x, int size)
+{
+	switch (size) {
+		case 1:
+			return ____xchg(_u8, ptr, x);
+		case 2:
+			return ____xchg(_u16, ptr, x);
+		case 4:
+			return ____xchg(_u32, ptr, x);
+		case 8:
+			return ____xchg(_u64, ptr, x);
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+static inline unsigned long
+____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
+{
+	unsigned long prev, tmp, cmp, addr64;
+
+	__asm__ __volatile__(
+	"	andnot	%5,7,%4\n"
+	"	insbl	%1,%5,%1\n"
+	"1:	ldq_l	%2,0(%4)\n"
+	"	extbl	%2,%5,%0\n"
+	"	cmpeq	%0,%6,%3\n"
+	"	beq	%3,2f\n"
+	"	mskbl	%2,%5,%2\n"
+	"	or	%1,%2,%2\n"
+	"	stq_c	%2,0(%4)\n"
+	"	beq	%2,3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+	: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+	return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
+{
+	unsigned long prev, tmp, cmp, addr64;
+
+	__asm__ __volatile__(
+	"	andnot	%5,7,%4\n"
+	"	inswl	%1,%5,%1\n"
+	"1:	ldq_l	%2,0(%4)\n"
+	"	extwl	%2,%5,%0\n"
+	"	cmpeq	%0,%6,%3\n"
+	"	beq	%3,2f\n"
+	"	mskwl	%2,%5,%2\n"
+	"	or	%1,%2,%2\n"
+	"	stq_c	%2,0(%4)\n"
+	"	beq	%2,3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+	: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+	return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u32, volatile int *m, int old, int new)
+{
+	unsigned long prev, cmp;
+
+	__asm__ __volatile__(
+	"1:	ldl_l %0,%5\n"
+	"	cmpeq %0,%3,%1\n"
+	"	beq %1,2f\n"
+	"	mov %4,%1\n"
+	"	stl_c %1,%2\n"
+	"	beq %1,3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r"(prev), "=&r"(cmp), "=m"(*m)
+	: "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+	return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
+{
+	unsigned long prev, cmp;
+
+	__asm__ __volatile__(
+	"1:	ldq_l %0,%5\n"
+	"	cmpeq %0,%3,%1\n"
+	"	beq %1,2f\n"
+	"	mov %4,%1\n"
+	"	stq_c %1,%2\n"
+	"	beq %1,3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br 1b\n"
+	".previous"
+	: "=&r"(prev), "=&r"(cmp), "=m"(*m)
+	: "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+	return prev;
+}
+
+/* This function doesn't exist, so you'll get a linker error
+   if something tries to do an invalid cmpxchg().  */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+____cmpxchg(, volatile void *ptr, unsigned long old, unsigned long new,
+	      int size)
+{
+	switch (size) {
+		case 1:
+			return ____cmpxchg(_u8, ptr, old, new);
+		case 2:
+			return ____cmpxchg(_u16, ptr, old, new);
+		case 4:
+			return ____cmpxchg(_u32, ptr, old, new);
+		case 8:
+			return ____cmpxchg(_u64, ptr, old, new);
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#endif
diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h
new file mode 100644
index 0000000000..5aeb4fb3cb
--- /dev/null
+++ b/arch/alpha/include/asm/xor.h
@@ -0,0 +1,847 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * include/asm-alpha/xor.h
+ *
+ * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
+ */
+
+extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
+extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
+		        unsigned long *);
+extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
+		        unsigned long *, unsigned long *);
+extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
+		        unsigned long *, unsigned long *, unsigned long *);
+
+extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
+				 unsigned long *);
+extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
+				 unsigned long *, unsigned long *);
+extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
+				 unsigned long *, unsigned long *,
+				 unsigned long *);
+extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
+				 unsigned long *, unsigned long *,
+				 unsigned long *, unsigned long *);
+
+asm("								\n\
+	.text							\n\
+	.align 3						\n\
+	.ent xor_alpha_2					\n\
+xor_alpha_2:							\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+	.align 4						\n\
+2:								\n\
+	ldq $0,0($17)						\n\
+	ldq $1,0($18)						\n\
+	ldq $2,8($17)						\n\
+	ldq $3,8($18)						\n\
+								\n\
+	ldq $4,16($17)						\n\
+	ldq $5,16($18)						\n\
+	ldq $6,24($17)						\n\
+	ldq $7,24($18)						\n\
+								\n\
+	ldq $19,32($17)						\n\
+	ldq $20,32($18)						\n\
+	ldq $21,40($17)						\n\
+	ldq $22,40($18)						\n\
+								\n\
+	ldq $23,48($17)						\n\
+	ldq $24,48($18)						\n\
+	ldq $25,56($17)						\n\
+	xor $0,$1,$0		# 7 cycles from $1 load		\n\
+								\n\
+	ldq $27,56($18)						\n\
+	xor $2,$3,$2						\n\
+	stq $0,0($17)						\n\
+	xor $4,$5,$4						\n\
+								\n\
+	stq $2,8($17)						\n\
+	xor $6,$7,$6						\n\
+	stq $4,16($17)						\n\
+	xor $19,$20,$19						\n\
+								\n\
+	stq $6,24($17)						\n\
+	xor $21,$22,$21						\n\
+	stq $19,32($17)						\n\
+	xor $23,$24,$23						\n\
+								\n\
+	stq $21,40($17)						\n\
+	xor $25,$27,$25						\n\
+	stq $23,48($17)						\n\
+	subq $16,1,$16						\n\
+								\n\
+	stq $25,56($17)						\n\
+	addq $17,64,$17						\n\
+	addq $18,64,$18						\n\
+	bgt $16,2b						\n\
+								\n\
+	ret							\n\
+	.end xor_alpha_2					\n\
+								\n\
+	.align 3						\n\
+	.ent xor_alpha_3					\n\
+xor_alpha_3:							\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+	.align 4						\n\
+3:								\n\
+	ldq $0,0($17)						\n\
+	ldq $1,0($18)						\n\
+	ldq $2,0($19)						\n\
+	ldq $3,8($17)						\n\
+								\n\
+	ldq $4,8($18)						\n\
+	ldq $6,16($17)						\n\
+	ldq $7,16($18)						\n\
+	ldq $21,24($17)						\n\
+								\n\
+	ldq $22,24($18)						\n\
+	ldq $24,32($17)						\n\
+	ldq $25,32($18)						\n\
+	ldq $5,8($19)						\n\
+								\n\
+	ldq $20,16($19)						\n\
+	ldq $23,24($19)						\n\
+	ldq $27,32($19)						\n\
+	nop							\n\
+								\n\
+	xor $0,$1,$1		# 8 cycles from $0 load		\n\
+	xor $3,$4,$4		# 6 cycles from $4 load		\n\
+	xor $6,$7,$7		# 6 cycles from $7 load		\n\
+	xor $21,$22,$22		# 5 cycles from $22 load	\n\
+								\n\
+	xor $1,$2,$2		# 9 cycles from $2 load		\n\
+	xor $24,$25,$25		# 5 cycles from $25 load	\n\
+	stq $2,0($17)						\n\
+	xor $4,$5,$5		# 6 cycles from $5 load		\n\
+								\n\
+	stq $5,8($17)						\n\
+	xor $7,$20,$20		# 7 cycles from $20 load	\n\
+	stq $20,16($17)						\n\
+	xor $22,$23,$23		# 7 cycles from $23 load	\n\
+								\n\
+	stq $23,24($17)						\n\
+	xor $25,$27,$27		# 7 cycles from $27 load	\n\
+	stq $27,32($17)						\n\
+	nop							\n\
+								\n\
+	ldq $0,40($17)						\n\
+	ldq $1,40($18)						\n\
+	ldq $3,48($17)						\n\
+	ldq $4,48($18)						\n\
+								\n\
+	ldq $6,56($17)						\n\
+	ldq $7,56($18)						\n\
+	ldq $2,40($19)						\n\
+	ldq $5,48($19)						\n\
+								\n\
+	ldq $20,56($19)						\n\
+	xor $0,$1,$1		# 4 cycles from $1 load		\n\
+	xor $3,$4,$4		# 5 cycles from $4 load		\n\
+	xor $6,$7,$7		# 5 cycles from $7 load		\n\
+								\n\
+	xor $1,$2,$2		# 4 cycles from $2 load		\n\
+	xor $4,$5,$5		# 5 cycles from $5 load		\n\
+	stq $2,40($17)						\n\
+	xor $7,$20,$20		# 4 cycles from $20 load	\n\
+								\n\
+	stq $5,48($17)						\n\
+	subq $16,1,$16						\n\
+	stq $20,56($17)						\n\
+	addq $19,64,$19						\n\
+								\n\
+	addq $18,64,$18						\n\
+	addq $17,64,$17						\n\
+	bgt $16,3b						\n\
+	ret							\n\
+	.end xor_alpha_3					\n\
+								\n\
+	.align 3						\n\
+	.ent xor_alpha_4					\n\
+xor_alpha_4:							\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+	.align 4						\n\
+4:								\n\
+	ldq $0,0($17)						\n\
+	ldq $1,0($18)						\n\
+	ldq $2,0($19)						\n\
+	ldq $3,0($20)						\n\
+								\n\
+	ldq $4,8($17)						\n\
+	ldq $5,8($18)						\n\
+	ldq $6,8($19)						\n\
+	ldq $7,8($20)						\n\
+								\n\
+	ldq $21,16($17)						\n\
+	ldq $22,16($18)						\n\
+	ldq $23,16($19)						\n\
+	ldq $24,16($20)						\n\
+								\n\
+	ldq $25,24($17)						\n\
+	xor $0,$1,$1		# 6 cycles from $1 load		\n\
+	ldq $27,24($18)						\n\
+	xor $2,$3,$3		# 6 cycles from $3 load		\n\
+								\n\
+	ldq $0,24($19)						\n\
+	xor $1,$3,$3						\n\
+	ldq $1,24($20)						\n\
+	xor $4,$5,$5		# 7 cycles from $5 load		\n\
+								\n\
+	stq $3,0($17)						\n\
+	xor $6,$7,$7						\n\
+	xor $21,$22,$22		# 7 cycles from $22 load	\n\
+	xor $5,$7,$7						\n\
+								\n\
+	stq $7,8($17)						\n\
+	xor $23,$24,$24		# 7 cycles from $24 load	\n\
+	ldq $2,32($17)						\n\
+	xor $22,$24,$24						\n\
+								\n\
+	ldq $3,32($18)						\n\
+	ldq $4,32($19)						\n\
+	ldq $5,32($20)						\n\
+	xor $25,$27,$27		# 8 cycles from $27 load	\n\
+								\n\
+	ldq $6,40($17)						\n\
+	ldq $7,40($18)						\n\
+	ldq $21,40($19)						\n\
+	ldq $22,40($20)						\n\
+								\n\
+	stq $24,16($17)						\n\
+	xor $0,$1,$1		# 9 cycles from $1 load		\n\
+	xor $2,$3,$3		# 5 cycles from $3 load		\n\
+	xor $27,$1,$1						\n\
+								\n\
+	stq $1,24($17)						\n\
+	xor $4,$5,$5		# 5 cycles from $5 load		\n\
+	ldq $23,48($17)						\n\
+	ldq $24,48($18)						\n\
+								\n\
+	ldq $25,48($19)						\n\
+	xor $3,$5,$5						\n\
+	ldq $27,48($20)						\n\
+	ldq $0,56($17)						\n\
+								\n\
+	ldq $1,56($18)						\n\
+	ldq $2,56($19)						\n\
+	xor $6,$7,$7		# 8 cycles from $6 load		\n\
+	ldq $3,56($20)						\n\
+								\n\
+	stq $5,32($17)						\n\
+	xor $21,$22,$22		# 8 cycles from $22 load	\n\
+	xor $7,$22,$22						\n\
+	xor $23,$24,$24		# 5 cycles from $24 load	\n\
+								\n\
+	stq $22,40($17)						\n\
+	xor $25,$27,$27		# 5 cycles from $27 load	\n\
+	xor $24,$27,$27						\n\
+	xor $0,$1,$1		# 5 cycles from $1 load		\n\
+								\n\
+	stq $27,48($17)						\n\
+	xor $2,$3,$3		# 4 cycles from $3 load		\n\
+	xor $1,$3,$3						\n\
+	subq $16,1,$16						\n\
+								\n\
+	stq $3,56($17)						\n\
+	addq $20,64,$20						\n\
+	addq $19,64,$19						\n\
+	addq $18,64,$18						\n\
+								\n\
+	addq $17,64,$17						\n\
+	bgt $16,4b						\n\
+	ret							\n\
+	.end xor_alpha_4					\n\
+								\n\
+	.align 3						\n\
+	.ent xor_alpha_5					\n\
+xor_alpha_5:							\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+	.align 4						\n\
+5:								\n\
+	ldq $0,0($17)						\n\
+	ldq $1,0($18)						\n\
+	ldq $2,0($19)						\n\
+	ldq $3,0($20)						\n\
+								\n\
+	ldq $4,0($21)						\n\
+	ldq $5,8($17)						\n\
+	ldq $6,8($18)						\n\
+	ldq $7,8($19)						\n\
+								\n\
+	ldq $22,8($20)						\n\
+	ldq $23,8($21)						\n\
+	ldq $24,16($17)						\n\
+	ldq $25,16($18)						\n\
+								\n\
+	ldq $27,16($19)						\n\
+	xor $0,$1,$1		# 6 cycles from $1 load		\n\
+	ldq $28,16($20)						\n\
+	xor $2,$3,$3		# 6 cycles from $3 load		\n\
+								\n\
+	ldq $0,16($21)						\n\
+	xor $1,$3,$3						\n\
+	ldq $1,24($17)						\n\
+	xor $3,$4,$4		# 7 cycles from $4 load		\n\
+								\n\
+	stq $4,0($17)						\n\
+	xor $5,$6,$6		# 7 cycles from $6 load		\n\
+	xor $7,$22,$22		# 7 cycles from $22 load	\n\
+	xor $6,$23,$23		# 7 cycles from $23 load	\n\
+								\n\
+	ldq $2,24($18)						\n\
+	xor $22,$23,$23						\n\
+	ldq $3,24($19)						\n\
+	xor $24,$25,$25		# 8 cycles from $25 load	\n\
+								\n\
+	stq $23,8($17)						\n\
+	xor $25,$27,$27		# 8 cycles from $27 load	\n\
+	ldq $4,24($20)						\n\
+	xor $28,$0,$0		# 7 cycles from $0 load		\n\
+								\n\
+	ldq $5,24($21)						\n\
+	xor $27,$0,$0						\n\
+	ldq $6,32($17)						\n\
+	ldq $7,32($18)						\n\
+								\n\
+	stq $0,16($17)						\n\
+	xor $1,$2,$2		# 6 cycles from $2 load		\n\
+	ldq $22,32($19)						\n\
+	xor $3,$4,$4		# 4 cycles from $4 load		\n\
+								\n\
+	ldq $23,32($20)						\n\
+	xor $2,$4,$4						\n\
+	ldq $24,32($21)						\n\
+	ldq $25,40($17)						\n\
+								\n\
+	ldq $27,40($18)						\n\
+	ldq $28,40($19)						\n\
+	ldq $0,40($20)						\n\
+	xor $4,$5,$5		# 7 cycles from $5 load		\n\
+								\n\
+	stq $5,24($17)						\n\
+	xor $6,$7,$7		# 7 cycles from $7 load		\n\
+	ldq $1,40($21)						\n\
+	ldq $2,48($17)						\n\
+								\n\
+	ldq $3,48($18)						\n\
+	xor $7,$22,$22		# 7 cycles from $22 load	\n\
+	ldq $4,48($19)						\n\
+	xor $23,$24,$24		# 6 cycles from $24 load	\n\
+								\n\
+	ldq $5,48($20)						\n\
+	xor $22,$24,$24						\n\
+	ldq $6,48($21)						\n\
+	xor $25,$27,$27		# 7 cycles from $27 load	\n\
+								\n\
+	stq $24,32($17)						\n\
+	xor $27,$28,$28		# 8 cycles from $28 load	\n\
+	ldq $7,56($17)						\n\
+	xor $0,$1,$1		# 6 cycles from $1 load		\n\
+								\n\
+	ldq $22,56($18)						\n\
+	ldq $23,56($19)						\n\
+	ldq $24,56($20)						\n\
+	ldq $25,56($21)						\n\
+								\n\
+	xor $28,$1,$1						\n\
+	xor $2,$3,$3		# 9 cycles from $3 load		\n\
+	xor $3,$4,$4		# 9 cycles from $4 load		\n\
+	xor $5,$6,$6		# 8 cycles from $6 load		\n\
+								\n\
+	stq $1,40($17)						\n\
+	xor $4,$6,$6						\n\
+	xor $7,$22,$22		# 7 cycles from $22 load	\n\
+	xor $23,$24,$24		# 6 cycles from $24 load	\n\
+								\n\
+	stq $6,48($17)						\n\
+	xor $22,$24,$24						\n\
+	subq $16,1,$16						\n\
+	xor $24,$25,$25		# 8 cycles from $25 load	\n\
+								\n\
+	stq $25,56($17)						\n\
+	addq $21,64,$21						\n\
+	addq $20,64,$20						\n\
+	addq $19,64,$19						\n\
+								\n\
+	addq $18,64,$18						\n\
+	addq $17,64,$17						\n\
+	bgt $16,5b						\n\
+	ret							\n\
+	.end xor_alpha_5					\n\
+								\n\
+	.align 3						\n\
+	.ent xor_alpha_prefetch_2				\n\
+xor_alpha_prefetch_2:						\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+								\n\
+	ldq $31, 0($17)						\n\
+	ldq $31, 0($18)						\n\
+								\n\
+	ldq $31, 64($17)					\n\
+	ldq $31, 64($18)					\n\
+								\n\
+	ldq $31, 128($17)					\n\
+	ldq $31, 128($18)					\n\
+								\n\
+	ldq $31, 192($17)					\n\
+	ldq $31, 192($18)					\n\
+	.align 4						\n\
+2:								\n\
+	ldq $0,0($17)						\n\
+	ldq $1,0($18)						\n\
+	ldq $2,8($17)						\n\
+	ldq $3,8($18)						\n\
+								\n\
+	ldq $4,16($17)						\n\
+	ldq $5,16($18)						\n\
+	ldq $6,24($17)						\n\
+	ldq $7,24($18)						\n\
+								\n\
+	ldq $19,32($17)						\n\
+	ldq $20,32($18)						\n\
+	ldq $21,40($17)						\n\
+	ldq $22,40($18)						\n\
+								\n\
+	ldq $23,48($17)						\n\
+	ldq $24,48($18)						\n\
+	ldq $25,56($17)						\n\
+	ldq $27,56($18)						\n\
+								\n\
+	ldq $31,256($17)					\n\
+	xor $0,$1,$0		# 8 cycles from $1 load		\n\
+	ldq $31,256($18)					\n\
+	xor $2,$3,$2						\n\
+								\n\
+	stq $0,0($17)						\n\
+	xor $4,$5,$4						\n\
+	stq $2,8($17)						\n\
+	xor $6,$7,$6						\n\
+								\n\
+	stq $4,16($17)						\n\
+	xor $19,$20,$19						\n\
+	stq $6,24($17)						\n\
+	xor $21,$22,$21						\n\
+								\n\
+	stq $19,32($17)						\n\
+	xor $23,$24,$23						\n\
+	stq $21,40($17)						\n\
+	xor $25,$27,$25						\n\
+								\n\
+	stq $23,48($17)						\n\
+	subq $16,1,$16						\n\
+	stq $25,56($17)						\n\
+	addq $17,64,$17						\n\
+								\n\
+	addq $18,64,$18						\n\
+	bgt $16,2b						\n\
+	ret							\n\
+	.end xor_alpha_prefetch_2				\n\
+								\n\
+	.align 3						\n\
+	.ent xor_alpha_prefetch_3				\n\
+xor_alpha_prefetch_3:						\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+								\n\
+	ldq $31, 0($17)						\n\
+	ldq $31, 0($18)						\n\
+	ldq $31, 0($19)						\n\
+								\n\
+	ldq $31, 64($17)					\n\
+	ldq $31, 64($18)					\n\
+	ldq $31, 64($19)					\n\
+								\n\
+	ldq $31, 128($17)					\n\
+	ldq $31, 128($18)					\n\
+	ldq $31, 128($19)					\n\
+								\n\
+	ldq $31, 192($17)					\n\
+	ldq $31, 192($18)					\n\
+	ldq $31, 192($19)					\n\
+	.align 4						\n\
+3:								\n\
+	ldq $0,0($17)						\n\
+	ldq $1,0($18)						\n\
+	ldq $2,0($19)						\n\
+	ldq $3,8($17)						\n\
+								\n\
+	ldq $4,8($18)						\n\
+	ldq $6,16($17)						\n\
+	ldq $7,16($18)						\n\
+	ldq $21,24($17)						\n\
+								\n\
+	ldq $22,24($18)						\n\
+	ldq $24,32($17)						\n\
+	ldq $25,32($18)						\n\
+	ldq $5,8($19)						\n\
+								\n\
+	ldq $20,16($19)						\n\
+	ldq $23,24($19)						\n\
+	ldq $27,32($19)						\n\
+	nop							\n\
+								\n\
+	xor $0,$1,$1		# 8 cycles from $0 load		\n\
+	xor $3,$4,$4		# 7 cycles from $4 load		\n\
+	xor $6,$7,$7		# 6 cycles from $7 load		\n\
+	xor $21,$22,$22		# 5 cycles from $22 load	\n\
+								\n\
+	xor $1,$2,$2		# 9 cycles from $2 load		\n\
+	xor $24,$25,$25		# 5 cycles from $25 load	\n\
+	stq $2,0($17)						\n\
+	xor $4,$5,$5		# 6 cycles from $5 load		\n\
+								\n\
+	stq $5,8($17)						\n\
+	xor $7,$20,$20		# 7 cycles from $20 load	\n\
+	stq $20,16($17)						\n\
+	xor $22,$23,$23		# 7 cycles from $23 load	\n\
+								\n\
+	stq $23,24($17)						\n\
+	xor $25,$27,$27		# 7 cycles from $27 load	\n\
+	stq $27,32($17)						\n\
+	nop							\n\
+								\n\
+	ldq $0,40($17)						\n\
+	ldq $1,40($18)						\n\
+	ldq $3,48($17)						\n\
+	ldq $4,48($18)						\n\
+								\n\
+	ldq $6,56($17)						\n\
+	ldq $7,56($18)						\n\
+	ldq $2,40($19)						\n\
+	ldq $5,48($19)						\n\
+								\n\
+	ldq $20,56($19)						\n\
+	ldq $31,256($17)					\n\
+	ldq $31,256($18)					\n\
+	ldq $31,256($19)					\n\
+								\n\
+	xor $0,$1,$1		# 6 cycles from $1 load		\n\
+	xor $3,$4,$4		# 5 cycles from $4 load		\n\
+	xor $6,$7,$7		# 5 cycles from $7 load		\n\
+	xor $1,$2,$2		# 4 cycles from $2 load		\n\
+								\n\
+	xor $4,$5,$5		# 5 cycles from $5 load		\n\
+	xor $7,$20,$20		# 4 cycles from $20 load	\n\
+	stq $2,40($17)						\n\
+	subq $16,1,$16						\n\
+								\n\
+	stq $5,48($17)						\n\
+	addq $19,64,$19						\n\
+	stq $20,56($17)						\n\
+	addq $18,64,$18						\n\
+								\n\
+	addq $17,64,$17						\n\
+	bgt $16,3b						\n\
+	ret							\n\
+	.end xor_alpha_prefetch_3				\n\
+								\n\
+	.align 3						\n\
+	.ent xor_alpha_prefetch_4				\n\
+xor_alpha_prefetch_4:						\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+								\n\
+	ldq $31, 0($17)						\n\
+	ldq $31, 0($18)						\n\
+	ldq $31, 0($19)						\n\
+	ldq $31, 0($20)						\n\
+								\n\
+	ldq $31, 64($17)					\n\
+	ldq $31, 64($18)					\n\
+	ldq $31, 64($19)					\n\
+	ldq $31, 64($20)					\n\
+								\n\
+	ldq $31, 128($17)					\n\
+	ldq $31, 128($18)					\n\
+	ldq $31, 128($19)					\n\
+	ldq $31, 128($20)					\n\
+								\n\
+	ldq $31, 192($17)					\n\
+	ldq $31, 192($18)					\n\
+	ldq $31, 192($19)					\n\
+	ldq $31, 192($20)					\n\
+	.align 4						\n\
+4:								\n\
+	ldq $0,0($17)						\n\
+	ldq $1,0($18)						\n\
+	ldq $2,0($19)						\n\
+	ldq $3,0($20)						\n\
+								\n\
+	ldq $4,8($17)						\n\
+	ldq $5,8($18)						\n\
+	ldq $6,8($19)						\n\
+	ldq $7,8($20)						\n\
+								\n\
+	ldq $21,16($17)						\n\
+	ldq $22,16($18)						\n\
+	ldq $23,16($19)						\n\
+	ldq $24,16($20)						\n\
+								\n\
+	ldq $25,24($17)						\n\
+	xor $0,$1,$1		# 6 cycles from $1 load		\n\
+	ldq $27,24($18)						\n\
+	xor $2,$3,$3		# 6 cycles from $3 load		\n\
+								\n\
+	ldq $0,24($19)						\n\
+	xor $1,$3,$3						\n\
+	ldq $1,24($20)						\n\
+	xor $4,$5,$5		# 7 cycles from $5 load		\n\
+								\n\
+	stq $3,0($17)						\n\
+	xor $6,$7,$7						\n\
+	xor $21,$22,$22		# 7 cycles from $22 load	\n\
+	xor $5,$7,$7						\n\
+								\n\
+	stq $7,8($17)						\n\
+	xor $23,$24,$24		# 7 cycles from $24 load	\n\
+	ldq $2,32($17)						\n\
+	xor $22,$24,$24						\n\
+								\n\
+	ldq $3,32($18)						\n\
+	ldq $4,32($19)						\n\
+	ldq $5,32($20)						\n\
+	xor $25,$27,$27		# 8 cycles from $27 load	\n\
+								\n\
+	ldq $6,40($17)						\n\
+	ldq $7,40($18)						\n\
+	ldq $21,40($19)						\n\
+	ldq $22,40($20)						\n\
+								\n\
+	stq $24,16($17)						\n\
+	xor $0,$1,$1		# 9 cycles from $1 load		\n\
+	xor $2,$3,$3		# 5 cycles from $3 load		\n\
+	xor $27,$1,$1						\n\
+								\n\
+	stq $1,24($17)						\n\
+	xor $4,$5,$5		# 5 cycles from $5 load		\n\
+	ldq $23,48($17)						\n\
+	xor $3,$5,$5						\n\
+								\n\
+	ldq $24,48($18)						\n\
+	ldq $25,48($19)						\n\
+	ldq $27,48($20)						\n\
+	ldq $0,56($17)						\n\
+								\n\
+	ldq $1,56($18)						\n\
+	ldq $2,56($19)						\n\
+	ldq $3,56($20)						\n\
+	xor $6,$7,$7		# 8 cycles from $6 load		\n\
+								\n\
+	ldq $31,256($17)					\n\
+	xor $21,$22,$22		# 8 cycles from $22 load	\n\
+	ldq $31,256($18)					\n\
+	xor $7,$22,$22						\n\
+								\n\
+	ldq $31,256($19)					\n\
+	xor $23,$24,$24		# 6 cycles from $24 load	\n\
+	ldq $31,256($20)					\n\
+	xor $25,$27,$27		# 6 cycles from $27 load	\n\
+								\n\
+	stq $5,32($17)						\n\
+	xor $24,$27,$27						\n\
+	xor $0,$1,$1		# 7 cycles from $1 load		\n\
+	xor $2,$3,$3		# 6 cycles from $3 load		\n\
+								\n\
+	stq $22,40($17)						\n\
+	xor $1,$3,$3						\n\
+	stq $27,48($17)						\n\
+	subq $16,1,$16						\n\
+								\n\
+	stq $3,56($17)						\n\
+	addq $20,64,$20						\n\
+	addq $19,64,$19						\n\
+	addq $18,64,$18						\n\
+								\n\
+	addq $17,64,$17						\n\
+	bgt $16,4b						\n\
+	ret							\n\
+	.end xor_alpha_prefetch_4				\n\
+								\n\
+	.align 3						\n\
+	.ent xor_alpha_prefetch_5				\n\
+xor_alpha_prefetch_5:						\n\
+	.prologue 0						\n\
+	srl $16, 6, $16						\n\
+								\n\
+	ldq $31, 0($17)						\n\
+	ldq $31, 0($18)						\n\
+	ldq $31, 0($19)						\n\
+	ldq $31, 0($20)						\n\
+	ldq $31, 0($21)						\n\
+								\n\
+	ldq $31, 64($17)					\n\
+	ldq $31, 64($18)					\n\
+	ldq $31, 64($19)					\n\
+	ldq $31, 64($20)					\n\
+	ldq $31, 64($21)					\n\
+								\n\
+	ldq $31, 128($17)					\n\
+	ldq $31, 128($18)					\n\
+	ldq $31, 128($19)					\n\
+	ldq $31, 128($20)					\n\
+	ldq $31, 128($21)					\n\
+								\n\
+	ldq $31, 192($17)					\n\
+	ldq $31, 192($18)					\n\
+	ldq $31, 192($19)					\n\
+	ldq $31, 192($20)					\n\
+	ldq $31, 192($21)					\n\
+	.align 4						\n\
+5:								\n\
+	ldq $0,0($17)						\n\
+	ldq $1,0($18)						\n\
+	ldq $2,0($19)						\n\
+	ldq $3,0($20)						\n\
+								\n\
+	ldq $4,0($21)						\n\
+	ldq $5,8($17)						\n\
+	ldq $6,8($18)						\n\
+	ldq $7,8($19)						\n\
+								\n\
+	ldq $22,8($20)						\n\
+	ldq $23,8($21)						\n\
+	ldq $24,16($17)						\n\
+	ldq $25,16($18)						\n\
+								\n\
+	ldq $27,16($19)						\n\
+	xor $0,$1,$1		# 6 cycles from $1 load		\n\
+	ldq $28,16($20)						\n\
+	xor $2,$3,$3		# 6 cycles from $3 load		\n\
+								\n\
+	ldq $0,16($21)						\n\
+	xor $1,$3,$3						\n\
+	ldq $1,24($17)						\n\
+	xor $3,$4,$4		# 7 cycles from $4 load		\n\
+								\n\
+	stq $4,0($17)						\n\
+	xor $5,$6,$6		# 7 cycles from $6 load		\n\
+	xor $7,$22,$22		# 7 cycles from $22 load	\n\
+	xor $6,$23,$23		# 7 cycles from $23 load	\n\
+								\n\
+	ldq $2,24($18)						\n\
+	xor $22,$23,$23						\n\
+	ldq $3,24($19)						\n\
+	xor $24,$25,$25		# 8 cycles from $25 load	\n\
+								\n\
+	stq $23,8($17)						\n\
+	xor $25,$27,$27		# 8 cycles from $27 load	\n\
+	ldq $4,24($20)						\n\
+	xor $28,$0,$0		# 7 cycles from $0 load		\n\
+								\n\
+	ldq $5,24($21)						\n\
+	xor $27,$0,$0						\n\
+	ldq $6,32($17)						\n\
+	ldq $7,32($18)						\n\
+								\n\
+	stq $0,16($17)						\n\
+	xor $1,$2,$2		# 6 cycles from $2 load		\n\
+	ldq $22,32($19)						\n\
+	xor $3,$4,$4		# 4 cycles from $4 load		\n\
+								\n\
+	ldq $23,32($20)						\n\
+	xor $2,$4,$4						\n\
+	ldq $24,32($21)						\n\
+	ldq $25,40($17)						\n\
+								\n\
+	ldq $27,40($18)						\n\
+	ldq $28,40($19)						\n\
+	ldq $0,40($20)						\n\
+	xor $4,$5,$5		# 7 cycles from $5 load		\n\
+								\n\
+	stq $5,24($17)						\n\
+	xor $6,$7,$7		# 7 cycles from $7 load		\n\
+	ldq $1,40($21)						\n\
+	ldq $2,48($17)						\n\
+								\n\
+	ldq $3,48($18)						\n\
+	xor $7,$22,$22		# 7 cycles from $22 load	\n\
+	ldq $4,48($19)						\n\
+	xor $23,$24,$24		# 6 cycles from $24 load	\n\
+								\n\
+	ldq $5,48($20)						\n\
+	xor $22,$24,$24						\n\
+	ldq $6,48($21)						\n\
+	xor $25,$27,$27		# 7 cycles from $27 load	\n\
+								\n\
+	stq $24,32($17)						\n\
+	xor $27,$28,$28		# 8 cycles from $28 load	\n\
+	ldq $7,56($17)						\n\
+	xor $0,$1,$1		# 6 cycles from $1 load		\n\
+								\n\
+	ldq $22,56($18)						\n\
+	ldq $23,56($19)						\n\
+	ldq $24,56($20)						\n\
+	ldq $25,56($21)						\n\
+								\n\
+	ldq $31,256($17)					\n\
+	xor $28,$1,$1						\n\
+	ldq $31,256($18)					\n\
+	xor $2,$3,$3		# 9 cycles from $3 load		\n\
+								\n\
+	ldq $31,256($19)					\n\
+	xor $3,$4,$4		# 9 cycles from $4 load		\n\
+	ldq $31,256($20)					\n\
+	xor $5,$6,$6		# 8 cycles from $6 load		\n\
+								\n\
+	stq $1,40($17)						\n\
+	xor $4,$6,$6						\n\
+	xor $7,$22,$22		# 7 cycles from $22 load	\n\
+	xor $23,$24,$24		# 6 cycles from $24 load	\n\
+								\n\
+	stq $6,48($17)						\n\
+	xor $22,$24,$24						\n\
+	ldq $31,256($21)					\n\
+	xor $24,$25,$25		# 8 cycles from $25 load	\n\
+								\n\
+	stq $25,56($17)						\n\
+	subq $16,1,$16						\n\
+	addq $21,64,$21						\n\
+	addq $20,64,$20						\n\
+								\n\
+	addq $19,64,$19						\n\
+	addq $18,64,$18						\n\
+	addq $17,64,$17						\n\
+	bgt $16,5b						\n\
+								\n\
+	ret							\n\
+	.end xor_alpha_prefetch_5				\n\
+");
+
+static struct xor_block_template xor_block_alpha = {
+	.name	= "alpha",
+	.do_2	= xor_alpha_2,
+	.do_3	= xor_alpha_3,
+	.do_4	= xor_alpha_4,
+	.do_5	= xor_alpha_5,
+};
+
+static struct xor_block_template xor_block_alpha_prefetch = {
+	.name	= "alpha prefetch",
+	.do_2	= xor_alpha_prefetch_2,
+	.do_3	= xor_alpha_prefetch_3,
+	.do_4	= xor_alpha_prefetch_4,
+	.do_5	= xor_alpha_prefetch_5,
+};
+
+/* For grins, also test the generic routines.  */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+	do {						\
+		xor_speed(&xor_block_8regs);		\
+		xor_speed(&xor_block_32regs);		\
+		xor_speed(&xor_block_alpha);		\
+		xor_speed(&xor_block_alpha_prefetch);	\
+	} while (0)
+
+/* Force the use of alpha_prefetch if EV6, as it is significantly
+   faster in the cold cache case.  */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+	(implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
new file mode 100644
index 0000000000..b97c552db3
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+generated-y += unistd_32.h
diff --git a/arch/alpha/include/uapi/asm/a.out.h b/arch/alpha/include/uapi/asm/a.out.h
new file mode 100644
index 0000000000..7d692df04b
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/a.out.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ALPHA_A_OUT_H__
+#define _UAPI__ALPHA_A_OUT_H__
+
+#include <linux/types.h>
+
+/*
+ * OSF/1 ECOFF header structs.  ECOFF files consist of:
+ * 	- a file header (struct filehdr),
+ *	- an a.out header (struct aouthdr),
+ *	- one or more section headers (struct scnhdr). 
+ *	  The filhdr's "f_nscns" field contains the
+ *	  number of section headers.
+ */
+
+struct filehdr
+{
+	/* OSF/1 "file" header */
+	__u16 f_magic, f_nscns;
+	__u32 f_timdat;
+	__u64 f_symptr;
+	__u32 f_nsyms;
+	__u16 f_opthdr, f_flags;
+};
+
+struct aouthdr
+{
+	__u64 info;		/* after that it looks quite normal.. */
+	__u64 tsize;
+	__u64 dsize;
+	__u64 bsize;
+	__u64 entry;
+	__u64 text_start;	/* with a few additions that actually make sense */
+	__u64 data_start;
+	__u64 bss_start;
+	__u32 gprmask, fprmask;	/* bitmask of general & floating point regs used in binary */
+	__u64 gpvalue;
+};
+
+struct scnhdr
+{
+	char	s_name[8];
+	__u64	s_paddr;
+	__u64	s_vaddr;
+	__u64	s_size;
+	__u64	s_scnptr;
+	__u64	s_relptr;
+	__u64	s_lnnoptr;
+	__u16	s_nreloc;
+	__u16	s_nlnno;
+	__u32	s_flags;
+};
+
+struct exec
+{
+	/* OSF/1 "file" header */
+	struct filehdr		fh;
+	struct aouthdr		ah;
+};
+
+/*
+ * Define's so that the kernel exec code can access the a.out header
+ * fields...
+ */
+#define	a_info		ah.info
+#define	a_text		ah.tsize
+#define a_data		ah.dsize
+#define a_bss		ah.bsize
+#define a_entry		ah.entry
+#define a_textstart	ah.text_start
+#define	a_datastart	ah.data_start
+#define	a_bssstart	ah.bss_start
+#define	a_gprmask	ah.gprmask
+#define a_fprmask	ah.fprmask
+#define a_gpvalue	ah.gpvalue
+
+#define N_TXTADDR(x) ((x).a_textstart)
+#define N_DATADDR(x) ((x).a_datastart)
+#define N_BSSADDR(x) ((x).a_bssstart)
+#define N_DRSIZE(x) 0
+#define N_TRSIZE(x) 0
+#define N_SYMSIZE(x) 0
+
+#define AOUTHSZ		sizeof(struct aouthdr)
+#define SCNHSZ		sizeof(struct scnhdr)
+#define SCNROUND	16
+
+#define N_TXTOFF(x) \
+  ((long) N_MAGIC(x) == ZMAGIC ? 0 : \
+   (sizeof(struct exec) + (x).fh.f_nscns*SCNHSZ + SCNROUND - 1) & ~(SCNROUND - 1))
+
+#endif /* _UAPI__ALPHA_A_OUT_H__ */
diff --git a/arch/alpha/include/uapi/asm/auxvec.h b/arch/alpha/include/uapi/asm/auxvec.h
new file mode 100644
index 0000000000..57cae8780d
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/auxvec.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_ALPHA_AUXVEC_H
+#define __ASM_ALPHA_AUXVEC_H
+
+/* Reserve these numbers for any future use of a VDSO.  */
+#if 0
+#define AT_SYSINFO		32
+#define AT_SYSINFO_EHDR		33
+#endif
+
+/* More complete cache descriptions than AT_[DIU]CACHEBSIZE.  If the
+   value is -1, then the cache doesn't exist.  Otherwise:
+
+      bit 0-3:	  Cache set-associativity; 0 means fully associative.
+      bit 4-7:	  Log2 of cacheline size.
+      bit 8-31:	  Size of the entire cache >> 8.
+      bit 32-63:  Reserved.
+*/
+
+#define AT_L1I_CACHESHAPE	34
+#define AT_L1D_CACHESHAPE	35
+#define AT_L2_CACHESHAPE	36
+#define AT_L3_CACHESHAPE	37
+
+#define AT_VECTOR_SIZE_ARCH 4 /* entries in ARCH_DLINFO */
+
+#endif /* __ASM_ALPHA_AUXVEC_H */
diff --git a/arch/alpha/include/uapi/asm/bitsperlong.h b/arch/alpha/include/uapi/asm/bitsperlong.h
new file mode 100644
index 0000000000..6c5bf7d03f
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
diff --git a/arch/alpha/include/uapi/asm/byteorder.h b/arch/alpha/include/uapi/asm/byteorder.h
new file mode 100644
index 0000000000..efa9b51b45
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/byteorder.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_BYTEORDER_H
+#define _ALPHA_BYTEORDER_H
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _ALPHA_BYTEORDER_H */
diff --git a/arch/alpha/include/uapi/asm/compiler.h b/arch/alpha/include/uapi/asm/compiler.h
new file mode 100644
index 0000000000..0e00c0e133
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/compiler.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ALPHA_COMPILER_H
+#define _UAPI__ALPHA_COMPILER_H
+
+/* 
+ * Herein are macros we use when describing various patterns we want to GCC.
+ * In all cases we can get better schedules out of the compiler if we hide
+ * as little as possible inside inline assembly.  However, we want to be
+ * able to know what we'll get out before giving up inline assembly.  Thus
+ * these tests and macros.
+ */
+
+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3
+# define __kernel_insbl(val, shift)	__builtin_alpha_insbl(val, shift)
+# define __kernel_inswl(val, shift)	__builtin_alpha_inswl(val, shift)
+# define __kernel_insql(val, shift)	__builtin_alpha_insql(val, shift)
+# define __kernel_inslh(val, shift)	__builtin_alpha_inslh(val, shift)
+# define __kernel_extbl(val, shift)	__builtin_alpha_extbl(val, shift)
+# define __kernel_extwl(val, shift)	__builtin_alpha_extwl(val, shift)
+# define __kernel_cmpbge(a, b)		__builtin_alpha_cmpbge(a, b)
+#else
+# define __kernel_insbl(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("insbl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_inswl(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("inswl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_insql(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("insql %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_inslh(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("inslh %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_extbl(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("extbl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_extwl(val, shift)					\
+  ({ unsigned long __kir;						\
+     __asm__("extwl %2,%1,%0" : "=r"(__kir) : "rI"(shift), "r"(val));	\
+     __kir; })
+# define __kernel_cmpbge(a, b)						\
+  ({ unsigned long __kir;						\
+     __asm__("cmpbge %r2,%1,%0" : "=r"(__kir) : "rI"(b), "rJ"(a));	\
+     __kir; })
+#endif
+
+#ifdef __alpha_cix__
+# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3
+#  define __kernel_cttz(x)		__builtin_ctzl(x)
+#  define __kernel_ctlz(x)		__builtin_clzl(x)
+#  define __kernel_ctpop(x)		__builtin_popcountl(x)
+# else
+#  define __kernel_cttz(x)						\
+   ({ unsigned long __kir;						\
+      __asm__("cttz %1,%0" : "=r"(__kir) : "r"(x));			\
+      __kir; })
+#  define __kernel_ctlz(x)						\
+   ({ unsigned long __kir;						\
+      __asm__("ctlz %1,%0" : "=r"(__kir) : "r"(x));			\
+      __kir; })
+#  define __kernel_ctpop(x)						\
+   ({ unsigned long __kir;						\
+      __asm__("ctpop %1,%0" : "=r"(__kir) : "r"(x));			\
+      __kir; })
+# endif
+#else
+# define __kernel_cttz(x)						\
+  ({ unsigned long __kir;						\
+     __asm__(".arch ev67; cttz %1,%0" : "=r"(__kir) : "r"(x));		\
+     __kir; })
+# define __kernel_ctlz(x)						\
+  ({ unsigned long __kir;						\
+     __asm__(".arch ev67; ctlz %1,%0" : "=r"(__kir) : "r"(x));		\
+     __kir; })
+# define __kernel_ctpop(x)						\
+  ({ unsigned long __kir;						\
+     __asm__(".arch ev67; ctpop %1,%0" : "=r"(__kir) : "r"(x));		\
+     __kir; })
+#endif
+
+
+/* 
+ * Beginning with EGCS 1.1, GCC defines __alpha_bwx__ when the BWX 
+ * extension is enabled.  Previous versions did not define anything
+ * we could test during compilation -- too bad, so sad.
+ */
+
+#if defined(__alpha_bwx__)
+#define __kernel_ldbu(mem)	(mem)
+#define __kernel_ldwu(mem)	(mem)
+#define __kernel_stb(val,mem)	((mem) = (val))
+#define __kernel_stw(val,mem)	((mem) = (val))
+#else
+#define __kernel_ldbu(mem)				\
+  ({ unsigned char __kir;				\
+     __asm__(".arch ev56;				\
+	      ldbu %0,%1" : "=r"(__kir) : "m"(mem));	\
+     __kir; })
+#define __kernel_ldwu(mem)				\
+  ({ unsigned short __kir;				\
+     __asm__(".arch ev56;				\
+	      ldwu %0,%1" : "=r"(__kir) : "m"(mem));	\
+     __kir; })
+#define __kernel_stb(val,mem)				\
+  __asm__(".arch ev56;					\
+	   stb %1,%0" : "=m"(mem) : "r"(val))
+#define __kernel_stw(val,mem)				\
+  __asm__(".arch ev56;					\
+	   stw %1,%0" : "=m"(mem) : "r"(val))
+#endif
+
+
+#endif /* _UAPI__ALPHA_COMPILER_H */
diff --git a/arch/alpha/include/uapi/asm/console.h b/arch/alpha/include/uapi/asm/console.h
new file mode 100644
index 0000000000..5fcb65300b
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/console.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__AXP_CONSOLE_H
+#define _UAPI__AXP_CONSOLE_H
+
+/*
+ * Console callback routine numbers
+ */
+#define CCB_GETC		0x01
+#define CCB_PUTS		0x02
+#define CCB_RESET_TERM		0x03
+#define CCB_SET_TERM_INT	0x04
+#define CCB_SET_TERM_CTL	0x05
+#define CCB_PROCESS_KEYCODE	0x06
+#define CCB_OPEN_CONSOLE	0x07
+#define CCB_CLOSE_CONSOLE	0x08
+
+#define CCB_OPEN		0x10
+#define CCB_CLOSE		0x11
+#define CCB_IOCTL		0x12
+#define CCB_READ		0x13
+#define CCB_WRITE		0x14
+
+#define CCB_SET_ENV		0x20
+#define CCB_RESET_ENV		0x21
+#define CCB_GET_ENV		0x22
+#define CCB_SAVE_ENV		0x23
+
+#define CCB_PSWITCH		0x30
+#define CCB_BIOS_EMUL		0x32
+
+/*
+ * Environment variable numbers
+ */
+#define ENV_AUTO_ACTION		0x01
+#define ENV_BOOT_DEV		0x02
+#define ENV_BOOTDEF_DEV		0x03
+#define ENV_BOOTED_DEV		0x04
+#define ENV_BOOT_FILE		0x05
+#define ENV_BOOTED_FILE		0x06
+#define ENV_BOOT_OSFLAGS	0x07
+#define ENV_BOOTED_OSFLAGS	0x08
+#define ENV_BOOT_RESET		0x09
+#define ENV_DUMP_DEV		0x0A
+#define ENV_ENABLE_AUDIT	0x0B
+#define ENV_LICENSE		0x0C
+#define ENV_CHAR_SET		0x0D
+#define ENV_LANGUAGE		0x0E
+#define ENV_TTY_DEV		0x0F
+
+
+#endif /* _UAPI__AXP_CONSOLE_H */
diff --git a/arch/alpha/include/uapi/asm/errno.h b/arch/alpha/include/uapi/asm/errno.h
new file mode 100644
index 0000000000..3d265f6bab
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/errno.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_ERRNO_H
+#define _ALPHA_ERRNO_H
+
+#include <asm-generic/errno-base.h>
+
+#undef	EAGAIN			/* 11 in errno-base.h */
+
+#define	EDEADLK		11	/* Resource deadlock would occur */
+
+#define	EAGAIN		35	/* Try again */
+#define	EWOULDBLOCK	EAGAIN	/* Operation would block */
+#define	EINPROGRESS	36	/* Operation now in progress */
+#define	EALREADY	37	/* Operation already in progress */
+#define	ENOTSOCK	38	/* Socket operation on non-socket */
+#define	EDESTADDRREQ	39	/* Destination address required */
+#define	EMSGSIZE	40	/* Message too long */
+#define	EPROTOTYPE	41	/* Protocol wrong type for socket */
+#define	ENOPROTOOPT	42	/* Protocol not available */
+#define	EPROTONOSUPPORT	43	/* Protocol not supported */
+#define	ESOCKTNOSUPPORT	44	/* Socket type not supported */
+#define	EOPNOTSUPP	45	/* Operation not supported on transport endpoint */
+#define	EPFNOSUPPORT	46	/* Protocol family not supported */
+#define	EAFNOSUPPORT	47	/* Address family not supported by protocol */
+#define	EADDRINUSE	48	/* Address already in use */
+#define	EADDRNOTAVAIL	49	/* Cannot assign requested address */
+#define	ENETDOWN	50	/* Network is down */
+#define	ENETUNREACH	51	/* Network is unreachable */
+#define	ENETRESET	52	/* Network dropped connection because of reset */
+#define	ECONNABORTED	53	/* Software caused connection abort */
+#define	ECONNRESET	54	/* Connection reset by peer */
+#define	ENOBUFS		55	/* No buffer space available */
+#define	EISCONN		56	/* Transport endpoint is already connected */
+#define	ENOTCONN	57	/* Transport endpoint is not connected */
+#define	ESHUTDOWN	58	/* Cannot send after transport endpoint shutdown */
+#define	ETOOMANYREFS	59	/* Too many references: cannot splice */
+#define	ETIMEDOUT	60	/* Connection timed out */
+#define	ECONNREFUSED	61	/* Connection refused */
+#define	ELOOP		62	/* Too many symbolic links encountered */
+#define	ENAMETOOLONG	63	/* File name too long */
+#define	EHOSTDOWN	64	/* Host is down */
+#define	EHOSTUNREACH	65	/* No route to host */
+#define	ENOTEMPTY	66	/* Directory not empty */
+
+#define	EUSERS		68	/* Too many users */
+#define	EDQUOT		69	/* Quota exceeded */
+#define	ESTALE		70	/* Stale file handle */
+#define	EREMOTE		71	/* Object is remote */
+
+#define	ENOLCK		77	/* No record locks available */
+#define	ENOSYS		78	/* Function not implemented */
+
+#define	ENOMSG		80	/* No message of desired type */
+#define	EIDRM		81	/* Identifier removed */
+#define	ENOSR		82	/* Out of streams resources */
+#define	ETIME		83	/* Timer expired */
+#define	EBADMSG		84	/* Not a data message */
+#define	EPROTO		85	/* Protocol error */
+#define	ENODATA		86	/* No data available */
+#define	ENOSTR		87	/* Device not a stream */
+
+#define	ENOPKG		92	/* Package not installed */
+
+#define	EILSEQ		116	/* Illegal byte sequence */
+
+/* The following are just random noise.. */
+#define	ECHRNG		88	/* Channel number out of range */
+#define	EL2NSYNC	89	/* Level 2 not synchronized */
+#define	EL3HLT		90	/* Level 3 halted */
+#define	EL3RST		91	/* Level 3 reset */
+
+#define	ELNRNG		93	/* Link number out of range */
+#define	EUNATCH		94	/* Protocol driver not attached */
+#define	ENOCSI		95	/* No CSI structure available */
+#define	EL2HLT		96	/* Level 2 halted */
+#define	EBADE		97	/* Invalid exchange */
+#define	EBADR		98	/* Invalid request descriptor */
+#define	EXFULL		99	/* Exchange full */
+#define	ENOANO		100	/* No anode */
+#define	EBADRQC		101	/* Invalid request code */
+#define	EBADSLT		102	/* Invalid slot */
+
+#define	EDEADLOCK	EDEADLK
+
+#define	EBFONT		104	/* Bad font file format */
+#define	ENONET		105	/* Machine is not on the network */
+#define	ENOLINK		106	/* Link has been severed */
+#define	EADV		107	/* Advertise error */
+#define	ESRMNT		108	/* Srmount error */
+#define	ECOMM		109	/* Communication error on send */
+#define	EMULTIHOP	110	/* Multihop attempted */
+#define	EDOTDOT		111	/* RFS specific error */
+#define	EOVERFLOW	112	/* Value too large for defined data type */
+#define	ENOTUNIQ	113	/* Name not unique on network */
+#define	EBADFD		114	/* File descriptor in bad state */
+#define	EREMCHG		115	/* Remote address changed */
+
+#define	EUCLEAN		117	/* Structure needs cleaning */
+#define	ENOTNAM		118	/* Not a XENIX named type file */
+#define	ENAVAIL		119	/* No XENIX semaphores available */
+#define	EISNAM		120	/* Is a named type file */
+#define	EREMOTEIO	121	/* Remote I/O error */
+
+#define	ELIBACC		122	/* Can not access a needed shared library */
+#define	ELIBBAD		123	/* Accessing a corrupted shared library */
+#define	ELIBSCN		124	/* .lib section in a.out corrupted */
+#define	ELIBMAX		125	/* Attempting to link in too many shared libraries */
+#define	ELIBEXEC	126	/* Cannot exec a shared library directly */
+#define	ERESTART	127	/* Interrupted system call should be restarted */
+#define	ESTRPIPE	128	/* Streams pipe error */
+
+#define ENOMEDIUM	129	/* No medium found */
+#define EMEDIUMTYPE	130	/* Wrong medium type */
+#define	ECANCELED	131	/* Operation Cancelled */
+#define	ENOKEY		132	/* Required key not available */
+#define	EKEYEXPIRED	133	/* Key has expired */
+#define	EKEYREVOKED	134	/* Key has been revoked */
+#define	EKEYREJECTED	135	/* Key was rejected by service */
+
+/* for robust mutexes */
+#define	EOWNERDEAD	136	/* Owner died */
+#define	ENOTRECOVERABLE	137	/* State not recoverable */
+
+#define	ERFKILL		138	/* Operation not possible due to RF-kill */
+
+#define EHWPOISON	139	/* Memory page has hardware error */
+
+#endif
diff --git a/arch/alpha/include/uapi/asm/fcntl.h b/arch/alpha/include/uapi/asm/fcntl.h
new file mode 100644
index 0000000000..50bdc8e8a2
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/fcntl.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_FCNTL_H
+#define _ALPHA_FCNTL_H
+
+#define O_CREAT		 01000	/* not fcntl */
+#define O_TRUNC		 02000	/* not fcntl */
+#define O_EXCL		 04000	/* not fcntl */
+#define O_NOCTTY	010000	/* not fcntl */
+
+#define O_NONBLOCK	 00004
+#define O_APPEND	 00010
+#define O_DSYNC		040000	/* used to be O_SYNC, see below */
+#define O_DIRECTORY	0100000	/* must be a directory */
+#define O_NOFOLLOW	0200000 /* don't follow links */
+#define O_LARGEFILE	0400000 /* will be set by the kernel on every open */
+#define O_DIRECT	02000000 /* direct disk access - should check with OSF/1 */
+#define O_NOATIME	04000000
+#define O_CLOEXEC	010000000 /* set close_on_exec */
+/*
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
+ * the O_SYNC flag.  We continue to use the existing numerical value
+ * for O_DSYNC semantics now, but using the correct symbolic name for it.
+ * This new value is used to request true Posix O_SYNC semantics.  It is
+ * defined in this strange way to make sure applications compiled against
+ * new headers get at least O_DSYNC semantics on older kernels.
+ *
+ * This has the nice side-effect that we can simply test for O_DSYNC
+ * wherever we do not care if O_DSYNC or O_SYNC is used.
+ *
+ * Note: __O_SYNC must never be used directly.
+ */
+#define __O_SYNC	020000000
+#define O_SYNC		(__O_SYNC|O_DSYNC)
+
+#define O_PATH		040000000
+#define __O_TMPFILE	0100000000
+
+#define F_GETLK		7
+#define F_SETLK		8
+#define F_SETLKW	9
+
+#define F_SETOWN	5	/*  for sockets. */
+#define F_GETOWN	6	/*  for sockets. */
+#define F_SETSIG	10	/*  for sockets. */
+#define F_GETSIG	11	/*  for sockets. */
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK		1
+#define F_WRLCK		2
+#define F_UNLCK		8
+
+/* for old implementation of bsd flock () */
+#define F_EXLCK		16	/* or 3 */
+#define F_SHLCK		32	/* or 4 */
+
+#include <asm-generic/fcntl.h>
+
+#endif
diff --git a/arch/alpha/include/uapi/asm/fpu.h b/arch/alpha/include/uapi/asm/fpu.h
new file mode 100644
index 0000000000..cea9eafa05
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/fpu.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_ALPHA_FPU_H
+#define _UAPI__ASM_ALPHA_FPU_H
+
+
+/*
+ * Alpha floating-point control register defines:
+ */
+#define FPCR_DNOD	(1UL<<47)	/* denorm INV trap disable */
+#define FPCR_DNZ	(1UL<<48)	/* denorms to zero */
+#define FPCR_INVD	(1UL<<49)	/* invalid op disable (opt.) */
+#define FPCR_DZED	(1UL<<50)	/* division by zero disable (opt.) */
+#define FPCR_OVFD	(1UL<<51)	/* overflow disable (optional) */
+#define FPCR_INV	(1UL<<52)	/* invalid operation */
+#define FPCR_DZE	(1UL<<53)	/* division by zero */
+#define FPCR_OVF	(1UL<<54)	/* overflow */
+#define FPCR_UNF	(1UL<<55)	/* underflow */
+#define FPCR_INE	(1UL<<56)	/* inexact */
+#define FPCR_IOV	(1UL<<57)	/* integer overflow */
+#define FPCR_UNDZ	(1UL<<60)	/* underflow to zero (opt.) */
+#define FPCR_UNFD	(1UL<<61)	/* underflow disable (opt.) */
+#define FPCR_INED	(1UL<<62)	/* inexact disable (opt.) */
+#define FPCR_SUM	(1UL<<63)	/* summary bit */
+
+#define FPCR_DYN_SHIFT	58		/* first dynamic rounding mode bit */
+#define FPCR_DYN_CHOPPED (0x0UL << FPCR_DYN_SHIFT)	/* towards 0 */
+#define FPCR_DYN_MINUS	 (0x1UL << FPCR_DYN_SHIFT)	/* towards -INF */
+#define FPCR_DYN_NORMAL	 (0x2UL << FPCR_DYN_SHIFT)	/* towards nearest */
+#define FPCR_DYN_PLUS	 (0x3UL << FPCR_DYN_SHIFT)	/* towards +INF */
+#define FPCR_DYN_MASK	 (0x3UL << FPCR_DYN_SHIFT)
+
+#define FPCR_MASK	0xffff800000000000L
+
+/*
+ * IEEE trap enables are implemented in software.  These per-thread
+ * bits are stored in the "ieee_state" field of "struct thread_info".
+ * Thus, the bits are defined so as not to conflict with the
+ * floating-point enable bit (which is architected).  On top of that,
+ * we want to make these bits compatible with OSF/1 so
+ * ieee_set_fp_control() etc. can be implemented easily and
+ * compatibly.  The corresponding definitions are in
+ * /usr/include/machine/fpu.h under OSF/1.
+ */
+#define IEEE_TRAP_ENABLE_INV	(1UL<<1)	/* invalid op */
+#define IEEE_TRAP_ENABLE_DZE	(1UL<<2)	/* division by zero */
+#define IEEE_TRAP_ENABLE_OVF	(1UL<<3)	/* overflow */
+#define IEEE_TRAP_ENABLE_UNF	(1UL<<4)	/* underflow */
+#define IEEE_TRAP_ENABLE_INE	(1UL<<5)	/* inexact */
+#define IEEE_TRAP_ENABLE_DNO	(1UL<<6)	/* denorm */
+#define IEEE_TRAP_ENABLE_MASK	(IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\
+				 IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\
+				 IEEE_TRAP_ENABLE_INE | IEEE_TRAP_ENABLE_DNO)
+
+/* Denorm and Underflow flushing */
+#define IEEE_MAP_DMZ		(1UL<<12)	/* Map denorm inputs to zero */
+#define IEEE_MAP_UMZ		(1UL<<13)	/* Map underflowed outputs to zero */
+
+#define IEEE_MAP_MASK		(IEEE_MAP_DMZ | IEEE_MAP_UMZ)
+
+/* status bits coming from fpcr: */
+#define IEEE_STATUS_INV		(1UL<<17)
+#define IEEE_STATUS_DZE		(1UL<<18)
+#define IEEE_STATUS_OVF		(1UL<<19)
+#define IEEE_STATUS_UNF		(1UL<<20)
+#define IEEE_STATUS_INE		(1UL<<21)
+#define IEEE_STATUS_DNO		(1UL<<22)
+
+#define IEEE_STATUS_MASK	(IEEE_STATUS_INV | IEEE_STATUS_DZE |	\
+				 IEEE_STATUS_OVF | IEEE_STATUS_UNF |	\
+				 IEEE_STATUS_INE | IEEE_STATUS_DNO)
+
+#define IEEE_SW_MASK		(IEEE_TRAP_ENABLE_MASK |		\
+				 IEEE_STATUS_MASK | IEEE_MAP_MASK)
+
+#define IEEE_CURRENT_RM_SHIFT	32
+#define IEEE_CURRENT_RM_MASK	(3UL<<IEEE_CURRENT_RM_SHIFT)
+
+#define IEEE_STATUS_TO_EXCSUM_SHIFT	16
+
+#define IEEE_INHERIT    (1UL<<63)	/* inherit on thread create? */
+
+/*
+ * Convert the software IEEE trap enable and status bits into the
+ * hardware fpcr format. 
+ *
+ * Digital Unix engineers receive my thanks for not defining the
+ * software bits identical to the hardware bits.  The chip designers
+ * receive my thanks for making all the not-implemented fpcr bits
+ * RAZ forcing us to use system calls to read/write this value.
+ */
+
+static inline unsigned long
+ieee_swcr_to_fpcr(unsigned long sw)
+{
+	unsigned long fp;
+	fp = (sw & IEEE_STATUS_MASK) << 35;
+	fp |= (sw & IEEE_MAP_DMZ) << 36;
+	fp |= (sw & IEEE_STATUS_MASK ? FPCR_SUM : 0);
+	fp |= (~sw & (IEEE_TRAP_ENABLE_INV
+		      | IEEE_TRAP_ENABLE_DZE
+		      | IEEE_TRAP_ENABLE_OVF)) << 48;
+	fp |= (~sw & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE)) << 57;
+	fp |= (sw & IEEE_MAP_UMZ ? FPCR_UNDZ | FPCR_UNFD : 0);
+	fp |= (~sw & IEEE_TRAP_ENABLE_DNO) << 41;
+	return fp;
+}
+
+static inline unsigned long
+ieee_fpcr_to_swcr(unsigned long fp)
+{
+	unsigned long sw;
+	sw = (fp >> 35) & IEEE_STATUS_MASK;
+	sw |= (fp >> 36) & IEEE_MAP_DMZ;
+	sw |= (~fp >> 48) & (IEEE_TRAP_ENABLE_INV
+			     | IEEE_TRAP_ENABLE_DZE
+			     | IEEE_TRAP_ENABLE_OVF);
+	sw |= (~fp >> 57) & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE);
+	sw |= (fp >> 47) & IEEE_MAP_UMZ;
+	sw |= (~fp >> 41) & IEEE_TRAP_ENABLE_DNO;
+	return sw;
+}
+
+
+#endif /* _UAPI__ASM_ALPHA_FPU_H */
diff --git a/arch/alpha/include/uapi/asm/gentrap.h b/arch/alpha/include/uapi/asm/gentrap.h
new file mode 100644
index 0000000000..c02ccc5ece
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/gentrap.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASMAXP_GENTRAP_H
+#define _ASMAXP_GENTRAP_H
+
+/*
+ * Definitions for gentrap causes.  They are generated by user-level
+ * programs and therefore should be compatible with the corresponding
+ * OSF/1 definitions.
+ */
+#define GEN_INTOVF	-1	/* integer overflow */
+#define GEN_INTDIV	-2	/* integer division by zero */
+#define GEN_FLTOVF	-3	/* fp overflow */
+#define GEN_FLTDIV	-4	/* fp division by zero */
+#define GEN_FLTUND	-5	/* fp underflow */
+#define GEN_FLTINV	-6	/* invalid fp operand */
+#define GEN_FLTINE	-7	/* inexact fp operand */
+#define GEN_DECOVF	-8	/* decimal overflow (for COBOL??) */
+#define GEN_DECDIV	-9	/* decimal division by zero */
+#define GEN_DECINV	-10	/* invalid decimal operand */
+#define GEN_ROPRAND	-11	/* reserved operand */
+#define GEN_ASSERTERR	-12	/* assertion error */
+#define GEN_NULPTRERR	-13	/* null pointer error */
+#define GEN_STKOVF	-14	/* stack overflow */
+#define GEN_STRLENERR	-15	/* string length error */
+#define GEN_SUBSTRERR	-16	/* substring error */
+#define GEN_RANGERR	-17	/* range error */
+#define GEN_SUBRNG	-18
+#define GEN_SUBRNG1	-19	 
+#define GEN_SUBRNG2	-20
+#define GEN_SUBRNG3	-21	/* these report range errors for */
+#define GEN_SUBRNG4	-22	/* subscripting (indexing) at levels 0..7 */
+#define GEN_SUBRNG5	-23
+#define GEN_SUBRNG6	-24
+#define GEN_SUBRNG7	-25
+
+/* the remaining codes (-26..-1023) are reserved. */
+
+#endif /* _ASMAXP_GENTRAP_H */
diff --git a/arch/alpha/include/uapi/asm/ioctl.h b/arch/alpha/include/uapi/asm/ioctl.h
new file mode 100644
index 0000000000..a9d68a08ee
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/ioctl.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_IOCTL_H
+#define _ALPHA_IOCTL_H
+
+/*
+ * The original linux ioctl numbering scheme was just a general
+ * "anything goes" setup, where more or less random numbers were
+ * assigned.  Sorry, I was clueless when I started out on this.
+ *
+ * On the alpha, we'll try to clean it up a bit, using a more sane
+ * ioctl numbering, and also trying to be compatible with OSF/1 in
+ * the process. I'd like to clean it up for the i386 as well, but
+ * it's so painful recognizing both the new and the old numbers..
+ */
+
+#define _IOC_NRBITS	8
+#define _IOC_TYPEBITS	8
+#define _IOC_SIZEBITS	13
+#define _IOC_DIRBITS	3
+
+#define _IOC_NRMASK	((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK	((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK	((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK	((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT	0
+#define _IOC_TYPESHIFT	(_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT	(_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT	(_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit.
+ * And this turns out useful to catch old ioctl numbers in header
+ * files for us.
+ */
+#define _IOC_NONE	1U
+#define _IOC_READ	2U
+#define _IOC_WRITE	4U
+
+#define _IOC(dir,type,nr,size)			\
+	((unsigned int)				\
+	 (((dir)  << _IOC_DIRSHIFT) |		\
+	  ((type) << _IOC_TYPESHIFT) |		\
+	  ((nr)   << _IOC_NRSHIFT) |		\
+	  ((size) << _IOC_SIZESHIFT)))
+
+/* used to create numbers */
+#define _IO(type,nr)		_IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)	_IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)	_IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size)	_IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode them.. */
+#define _IOC_DIR(nr)		(((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr)		(((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)		(((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)		(((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN		(_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT		(_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT	((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK	(_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT	(_IOC_SIZESHIFT)
+
+#endif /* _ALPHA_IOCTL_H */
diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
new file mode 100644
index 0000000000..9713116052
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_ALPHA_IOCTLS_H
+#define _ASM_ALPHA_IOCTLS_H
+
+#include <asm/ioctl.h>
+
+#define FIOCLEX		_IO('f', 1)
+#define FIONCLEX	_IO('f', 2)
+#define FIOASYNC	_IOW('f', 125, int)
+#define FIONBIO		_IOW('f', 126, int)
+#define FIONREAD	_IOR('f', 127, int)
+#define TIOCINQ		FIONREAD
+#define FIOQSIZE	_IOR('f', 128, loff_t)
+
+#define TIOCGETP	_IOR('t', 8, struct sgttyb)
+#define TIOCSETP	_IOW('t', 9, struct sgttyb)
+#define TIOCSETN	_IOW('t', 10, struct sgttyb)	/* TIOCSETP wo flush */
+
+#define TIOCSETC	_IOW('t', 17, struct tchars)
+#define TIOCGETC	_IOR('t', 18, struct tchars)
+#define TCGETS		_IOR('t', 19, struct termios)
+#define TCSETS		_IOW('t', 20, struct termios)
+#define TCSETSW		_IOW('t', 21, struct termios)
+#define TCSETSF		_IOW('t', 22, struct termios)
+
+#define TCGETA		_IOR('t', 23, struct termio)
+#define TCSETA		_IOW('t', 24, struct termio)
+#define TCSETAW		_IOW('t', 25, struct termio)
+#define TCSETAF		_IOW('t', 28, struct termio)
+
+#define TCSBRK		_IO('t', 29)
+#define TCXONC		_IO('t', 30)
+#define TCFLSH		_IO('t', 31)
+
+#define TCGETS2		_IOR('T', 42, struct termios2)
+#define TCSETS2		_IOW('T', 43, struct termios2)
+#define TCSETSW2	_IOW('T', 44, struct termios2)
+#define TCSETSF2	_IOW('T', 45, struct termios2)
+
+#define TIOCSWINSZ	_IOW('t', 103, struct winsize)
+#define TIOCGWINSZ	_IOR('t', 104, struct winsize)
+#define	TIOCSTART	_IO('t', 110)		/* start output, like ^Q */
+#define	TIOCSTOP	_IO('t', 111)		/* stop output, like ^S */
+#define TIOCOUTQ        _IOR('t', 115, int)     /* output queue size */
+
+#define TIOCGLTC	_IOR('t', 116, struct ltchars)
+#define TIOCSLTC	_IOW('t', 117, struct ltchars)
+#define TIOCSPGRP	_IOW('t', 118, int)
+#define TIOCGPGRP	_IOR('t', 119, int)
+
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+
+#define TIOCSTI		0x5412
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+# define TIOCM_LE	0x001
+# define TIOCM_DTR	0x002
+# define TIOCM_RTS	0x004
+# define TIOCM_ST	0x008
+# define TIOCM_SR	0x010
+# define TIOCM_CTS	0x020
+# define TIOCM_CAR	0x040
+# define TIOCM_RNG	0x080
+# define TIOCM_DSR	0x100
+# define TIOCM_CD	TIOCM_CAR
+# define TIOCM_RI	TIOCM_RNG
+# define TIOCM_OUT1	0x2000
+# define TIOCM_OUT2	0x4000
+# define TIOCM_LOOP	0x8000
+
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+# define TIOCPKT_DATA		 0
+# define TIOCPKT_FLUSHREAD	 1
+# define TIOCPKT_FLUSHWRITE	 2
+# define TIOCPKT_STOP		 4
+# define TIOCPKT_START		 8
+# define TIOCPKT_NOSTOP		16
+# define TIOCPKT_DOSTOP		32
+# define TIOCPKT_IOCTL		64
+
+
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
+#define TIOCSBRK	0x5427  /* BSD compatibility */
+#define TIOCCBRK	0x5428  /* BSD compatibility */
+#define TIOCGSID	0x5429  /* Return the session ID of FD */
+#define TIOCGRS485	_IOR('T', 0x2E, struct serial_rs485)
+#define TIOCSRS485	_IOWR('T', 0x2F, struct serial_rs485)
+#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
+#define TIOCGDEV	_IOR('T',0x32, unsigned int) /* Get primary device node of /dev/console */
+#define TIOCSIG		_IOW('T',0x36, int)  /* Generate signal on Pty slave */
+#define TIOCVHANGUP	0x5437
+#define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
+#define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
+#define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
+
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+  /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+# define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+
+#endif /* _ASM_ALPHA_IOCTLS_H */
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
new file mode 100644
index 0000000000..56b4ee5a6c
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ALPHA_MMAN_H__
+#define __ALPHA_MMAN_H__
+
+#define PROT_READ	0x1		/* page can be read */
+#define PROT_WRITE	0x2		/* page can be written */
+#define PROT_EXEC	0x4		/* page can be executed */
+#define PROT_SEM	0x8		/* page may be used for atomic ops */
+#define PROT_NONE	0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+/* 0x01 - 0x03 are defined in linux/mman.h */
+#define MAP_TYPE	0x0f		/* Mask for type of mapping (OSF/1 is _wrong_) */
+#define MAP_FIXED	0x100		/* Interpret addr exactly */
+#define MAP_ANONYMOUS	0x10		/* don't use a file */
+
+/* not used by linux, but here to make sure we don't clash with OSF/1 defines */
+#define _MAP_HASSEMAPHORE 0x0200
+#define _MAP_INHERIT	0x0400
+#define _MAP_UNALIGNED	0x0800
+
+/* These are linux-specific */
+#define MAP_GROWSDOWN	0x01000		/* stack-like segment */
+#define MAP_DENYWRITE	0x02000		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x04000		/* mark it as an executable */
+#define MAP_LOCKED	0x08000		/* lock the mapping */
+#define MAP_NORESERVE	0x10000		/* don't check for reservations */
+#define MAP_POPULATE	0x20000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x40000		/* do not block on IO */
+#define MAP_STACK	0x80000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x100000	/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE	0x200000/* MAP_FIXED which doesn't unmap underlying mapping */
+
+#define MS_ASYNC	1		/* sync memory asynchronously */
+#define MS_SYNC		2		/* synchronous memory sync */
+#define MS_INVALIDATE	4		/* invalidate the caches */
+
+#define MCL_CURRENT	 8192		/* lock all currently mapped pages */
+#define MCL_FUTURE	16384		/* lock all additions to address space */
+#define MCL_ONFAULT	32768		/* lock all pages that are faulted in */
+
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
+
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define	MADV_SPACEAVAIL	5		/* ensure resources are available */
+#define MADV_DONTNEED	6		/* don't need these pages */
+
+/* common/generic parameters */
+#define MADV_FREE	8		/* free pages only if memory pressure */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
+
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
+#define MADV_COLD	20		/* deactivate these pages */
+#define MADV_PAGEOUT	21		/* reclaim these pages */
+
+#define MADV_POPULATE_READ	22	/* populate (prefault) page tables readable */
+#define MADV_POPULATE_WRITE	23	/* populate (prefault) page tables writable */
+
+/* compatibility flags */
+#define MAP_FILE	0
+
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
+#endif /* __ALPHA_MMAN_H__ */
diff --git a/arch/alpha/include/uapi/asm/pal.h b/arch/alpha/include/uapi/asm/pal.h
new file mode 100644
index 0000000000..7427e028db
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/pal.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ALPHA_PAL_H
+#define _UAPI__ALPHA_PAL_H
+
+/*
+ * Common PAL-code
+ */
+#define PAL_halt	  0
+#define PAL_cflush	  1
+#define PAL_draina	  2
+#define PAL_bpt		128
+#define PAL_bugchk	129
+#define PAL_chmk	131
+#define PAL_callsys	131
+#define PAL_imb		134
+#define PAL_rduniq	158
+#define PAL_wruniq	159
+#define PAL_gentrap	170
+#define PAL_nphalt	190
+
+/*
+ * VMS specific PAL-code
+ */
+#define PAL_swppal	10
+#define PAL_mfpr_vptb	41
+
+/*
+ * OSF specific PAL-code
+ */
+#define PAL_cserve	 9
+#define PAL_wripir	13
+#define PAL_rdmces	16
+#define PAL_wrmces	17
+#define PAL_wrfen	43
+#define PAL_wrvptptr	45
+#define PAL_jtopal	46
+#define PAL_swpctx	48
+#define PAL_wrval	49
+#define PAL_rdval	50
+#define PAL_tbi		51
+#define PAL_wrent	52
+#define PAL_swpipl	53
+#define PAL_rdps	54
+#define PAL_wrkgp	55
+#define PAL_wrusp	56
+#define PAL_wrperfmon	57
+#define PAL_rdusp	58
+#define PAL_whami	60
+#define PAL_retsys	61
+#define PAL_wtint	62
+#define PAL_rti		63
+
+
+#endif /* _UAPI__ALPHA_PAL_H */
diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h
new file mode 100644
index 0000000000..49c7119934
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/param.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_ALPHA_PARAM_H
+#define _UAPI_ASM_ALPHA_PARAM_H
+
+#define HZ		1024
+
+#define EXEC_PAGESIZE	8192
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+#endif /* _UAPI_ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/uapi/asm/posix_types.h b/arch/alpha/include/uapi/asm/posix_types.h
new file mode 100644
index 0000000000..04f1ea5750
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/posix_types.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_POSIX_TYPES_H
+#define _ALPHA_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned int	__kernel_ino_t;
+#define __kernel_ino_t __kernel_ino_t
+
+typedef unsigned long	__kernel_sigset_t;	/* at least 32 bits */
+
+#include <asm-generic/posix_types.h>
+
+#endif /* _ALPHA_POSIX_TYPES_H */
diff --git a/arch/alpha/include/uapi/asm/ptrace.h b/arch/alpha/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000000..c291941810
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/ptrace.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASMAXP_PTRACE_H
+#define _UAPI_ASMAXP_PTRACE_H
+
+
+/*
+ * This struct defines the way the registers are stored on the
+ * kernel stack during a system call or other kernel entry
+ *
+ * NOTE! I want to minimize the overhead of system calls, so this
+ * struct has as little information as possible. It does not have
+ *
+ *  - floating point regs: the kernel doesn't change those
+ *  - r9-15: saved by the C compiler
+ *
+ * This makes "fork()" and "exec()" a bit more complex, but should
+ * give us low system call latency.
+ */
+
+struct pt_regs {
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	unsigned long r7;
+	unsigned long r8;
+	unsigned long r19;
+	unsigned long r20;
+	unsigned long r21;
+	unsigned long r22;
+	unsigned long r23;
+	unsigned long r24;
+	unsigned long r25;
+	unsigned long r26;
+	unsigned long r27;
+	unsigned long r28;
+	unsigned long hae;
+/* JRP - These are the values provided to a0-a2 by PALcode */
+	unsigned long trap_a0;
+	unsigned long trap_a1;
+	unsigned long trap_a2;
+/* These are saved by PAL-code: */
+	unsigned long ps;
+	unsigned long pc;
+	unsigned long gp;
+	unsigned long r16;
+	unsigned long r17;
+	unsigned long r18;
+};
+
+/*
+ * This is the extended stack used by signal handlers and the context
+ * switcher: it's pushed after the normal "struct pt_regs".
+ */
+struct switch_stack {
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long r26;
+	unsigned long fp[32];	/* fp[31] is fpcr */
+};
+
+
+#endif /* _UAPI_ASMAXP_PTRACE_H */
diff --git a/arch/alpha/include/uapi/asm/reg.h b/arch/alpha/include/uapi/asm/reg.h
new file mode 100644
index 0000000000..2652f3a385
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/reg.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __reg_h__
+#define __reg_h__
+
+/*
+ * Exception frame offsets.
+ */
+#define EF_V0		0
+#define EF_T0		1
+#define EF_T1		2
+#define EF_T2		3
+#define EF_T3		4
+#define EF_T4		5
+#define EF_T5		6
+#define EF_T6		7
+#define EF_T7		8
+#define EF_S0		9
+#define EF_S1		10
+#define EF_S2		11
+#define EF_S3		12
+#define EF_S4		13
+#define EF_S5		14
+#define EF_S6		15
+#define EF_A3		16
+#define EF_A4		17
+#define EF_A5		18
+#define EF_T8		19
+#define EF_T9		20
+#define EF_T10		21
+#define EF_T11		22
+#define EF_RA		23
+#define EF_T12		24
+#define EF_AT		25
+#define EF_SP		26
+#define EF_PS		27
+#define EF_PC		28
+#define EF_GP		29
+#define EF_A0		30
+#define EF_A1		31
+#define EF_A2		32
+
+#define EF_SIZE		(33*8)
+#define HWEF_SIZE	(6*8)		/* size of PAL frame (PS-A2) */
+
+#define EF_SSIZE	(EF_SIZE - HWEF_SIZE)
+
+/*
+ * Map register number into core file offset.
+ */
+#define CORE_REG(reg, ubase) \
+	(((unsigned long *)((unsigned long)(ubase)))[reg])
+
+#endif /* __reg_h__ */
diff --git a/arch/alpha/include/uapi/asm/regdef.h b/arch/alpha/include/uapi/asm/regdef.h
new file mode 100644
index 0000000000..cc99df0c60
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/regdef.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __alpha_regdef_h__
+#define __alpha_regdef_h__
+
+#define v0	$0	/* function return value */
+
+#define t0	$1	/* temporary registers (caller-saved) */
+#define t1	$2
+#define t2	$3
+#define t3	$4
+#define t4	$5
+#define t5	$6
+#define t6	$7
+#define t7	$8
+
+#define	s0	$9	/* saved-registers (callee-saved registers) */
+#define	s1	$10
+#define	s2	$11
+#define	s3	$12
+#define	s4	$13
+#define	s5	$14
+#define	s6	$15
+#define	fp	s6	/* frame-pointer (s6 in frame-less procedures) */
+
+#define a0	$16	/* argument registers (caller-saved) */
+#define a1	$17
+#define a2	$18
+#define a3	$19
+#define a4	$20
+#define a5	$21
+
+#define t8	$22	/* more temps (caller-saved) */
+#define t9	$23
+#define t10	$24
+#define t11	$25
+#define ra	$26	/* return address register */
+#define t12	$27
+
+#define pv	t12	/* procedure-variable register */
+#define AT	$at	/* assembler temporary */
+#define gp	$29	/* global pointer */
+#define sp	$30	/* stack pointer */
+#define zero	$31	/* reads as zero, writes are noops */
+
+#endif /* __alpha_regdef_h__ */
diff --git a/arch/alpha/include/uapi/asm/resource.h b/arch/alpha/include/uapi/asm/resource.h
new file mode 100644
index 0000000000..362423ffe1
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/resource.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_RESOURCE_H
+#define _ALPHA_RESOURCE_H
+
+/*
+ * Alpha/Linux-specific ordering of these four resource limit IDs,
+ * the rest comes from the generic header:
+ */
+#define RLIMIT_NOFILE		6	/* max number of open files */
+#define RLIMIT_AS		7	/* address space limit */
+#define RLIMIT_NPROC		8	/* max number of processes */
+#define RLIMIT_MEMLOCK		9	/* max locked-in-memory address space */
+
+/*
+ * SuS says limits have to be unsigned.  Fine, it's unsigned, but
+ * we retain the old value for compatibility, especially with DU. 
+ * When you run into the 2^63 barrier, you call me.
+ */
+#define RLIM_INFINITY		0x7ffffffffffffffful
+
+#include <asm-generic/resource.h>
+
+#endif /* _ALPHA_RESOURCE_H */
diff --git a/arch/alpha/include/uapi/asm/setup.h b/arch/alpha/include/uapi/asm/setup.h
new file mode 100644
index 0000000000..f881ea5947
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/setup.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ALPHA_SETUP_H
+#define _UAPI__ALPHA_SETUP_H
+
+#define COMMAND_LINE_SIZE	256
+
+#endif /* _UAPI__ALPHA_SETUP_H */
diff --git a/arch/alpha/include/uapi/asm/sigcontext.h b/arch/alpha/include/uapi/asm/sigcontext.h
new file mode 100644
index 0000000000..5428c42567
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/sigcontext.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASMAXP_SIGCONTEXT_H
+#define _ASMAXP_SIGCONTEXT_H
+
+struct sigcontext {
+	/*
+	 * What should we have here? I'd probably better use the same
+	 * stack layout as OSF/1, just in case we ever want to try
+	 * running their binaries.. 
+	 *
+	 * This is the basic layout, but I don't know if we'll ever
+	 * actually fill in all the values..
+	 */
+	 long		sc_onstack;
+	 long		sc_mask;
+	 long		sc_pc;
+	 long		sc_ps;
+	 long		sc_regs[32];
+	 long		sc_ownedfp;
+	 long		sc_fpregs[32];
+	 unsigned long	sc_fpcr;
+	 unsigned long	sc_fp_control;
+	 unsigned long	sc_reserved1, sc_reserved2;
+	 unsigned long	sc_ssize;
+	 char *		sc_sbase;
+	 unsigned long	sc_traparg_a0;
+	 unsigned long	sc_traparg_a1;
+	 unsigned long	sc_traparg_a2;
+	 unsigned long	sc_fp_trap_pc;
+	 unsigned long	sc_fp_trigger_sum;
+	 unsigned long	sc_fp_trigger_inst;
+};
+
+
+#endif
diff --git a/arch/alpha/include/uapi/asm/siginfo.h b/arch/alpha/include/uapi/asm/siginfo.h
new file mode 100644
index 0000000000..e08eae8818
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/siginfo.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_SIGINFO_H
+#define _ALPHA_SIGINFO_H
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/alpha/include/uapi/asm/signal.h b/arch/alpha/include/uapi/asm/signal.h
new file mode 100644
index 0000000000..a69dd8d080
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/signal.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASMAXP_SIGNAL_H
+#define _UAPI_ASMAXP_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG		32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * Linux/AXP has different signal numbers that Linux/i386: I'm trying
+ * to make it OSF/1 binary compatible, at least for normal binaries.
+ */
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGEMT		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGBUS		10
+#define SIGSEGV		11
+#define SIGSYS		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGURG		16
+#define SIGSTOP		17
+#define SIGTSTP		18
+#define SIGCONT		19
+#define SIGCHLD		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGIO		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGINFO		29
+#define SIGUSR1		30
+#define SIGUSR2		31
+
+#define SIGPOLL	SIGIO
+#define SIGPWR	SIGINFO
+#define SIGIOT	SIGABRT
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	_NSIG
+
+#define SA_ONSTACK	0x00000001
+#define SA_RESTART	0x00000002
+#define SA_NOCLDSTOP	0x00000004
+#define SA_NODEFER	0x00000008
+#define SA_RESETHAND	0x00000010
+#define SA_NOCLDWAIT	0x00000020
+#define SA_SIGINFO	0x00000040
+
+#define SA_ONESHOT	SA_RESETHAND
+#define SA_NOMASK	SA_NODEFER
+
+#define MINSIGSTKSZ	4096
+#define SIGSTKSZ	16384
+
+#define SIG_BLOCK          1	/* for blocking signals */
+#define SIG_UNBLOCK        2	/* for unblocking signals */
+#define SIG_SETMASK        3	/* for setting the signal mask */
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+	union {
+	  __sighandler_t	_sa_handler;
+	  void (*_sa_sigaction)(int, struct siginfo *, void *);
+	} _u;
+	sigset_t	sa_mask;
+	int		sa_flags;
+};
+
+#define sa_handler	_u._sa_handler
+#define sa_sigaction	_u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+/* sigstack(2) is deprecated, and will be withdrawn in a future version
+   of the X/Open CAE Specification.  Use sigaltstack instead.  It is only
+   implemented here for OSF/1 compatibility.  */
+
+struct sigstack {
+	void __user *ss_sp;
+	int ss_onstack;
+};
+
+
+#endif /* _UAPI_ASMAXP_SIGNAL_H */
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
new file mode 100644
index 0000000000..284d28755b
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_SOCKET_H
+#define _UAPI_ASM_SOCKET_H
+
+#include <linux/posix_types.h>
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+/*
+ * Note: we only bother about making the SOL_SOCKET options
+ * same as OSF/1, as that's all that "normal" programs are
+ * likely to set.  We don't necessarily want to be binary
+ * compatible with _everything_. 
+ */
+#define SOL_SOCKET	0xffff
+
+#define SO_DEBUG	0x0001
+#define SO_REUSEADDR	0x0004
+#define SO_KEEPALIVE	0x0008
+#define SO_DONTROUTE	0x0010
+#define SO_BROADCAST	0x0020
+#define SO_LINGER	0x0080
+#define SO_OOBINLINE	0x0100
+#define SO_REUSEPORT	0x0200
+
+#define SO_TYPE		0x1008
+#define SO_ERROR	0x1007
+#define SO_SNDBUF	0x1001
+#define SO_RCVBUF	0x1002
+#define SO_SNDBUFFORCE	0x100a
+#define SO_RCVBUFFORCE	0x100b
+#define	SO_RCVLOWAT	0x1010
+#define	SO_SNDLOWAT	0x1011
+#define	SO_RCVTIMEO_OLD	0x1012
+#define	SO_SNDTIMEO_OLD	0x1013
+#define SO_ACCEPTCONN	0x1014
+#define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
+
+/* linux-specific, might as well be the same as on i386 */
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_BSDCOMPAT	14
+
+#define SO_PASSCRED	17
+#define SO_PEERCRED	18
+#define SO_BINDTODEVICE 25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        26
+#define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
+
+#define SO_PEERNAME		28
+
+#define SO_PEERSEC		30
+#define SO_PASSSEC		34
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		19
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	20
+#define SO_SECURITY_ENCRYPTION_NETWORK		21
+
+#define SO_MARK			36
+
+#define SO_RXQ_OVFL             40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+#define SO_PEEK_OFF		42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS		43
+
+#define SO_LOCK_FILTER		44
+
+#define SO_SELECT_ERR_QUEUE	45
+
+#define SO_BUSY_POLL		46
+
+#define SO_MAX_PACING_RATE	47
+
+#define SO_BPF_EXTENSIONS	48
+
+#define SO_INCOMING_CPU		49
+
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
+#define SO_ATTACH_REUSEPORT_CBPF	51
+#define SO_ATTACH_REUSEPORT_EBPF	52
+
+#define SO_CNX_ADVICE		53
+
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
+#define SO_MEMINFO		55
+
+#define SO_INCOMING_NAPI_ID	56
+
+#define SO_COOKIE		57
+
+#define SCM_TIMESTAMPING_PKTINFO	58
+
+#define SO_PEERGROUPS		59
+
+#define SO_ZEROCOPY		60
+
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
+#define SO_BINDTOIFINDEX	62
+
+#define SO_TIMESTAMP_OLD        29
+#define SO_TIMESTAMPNS_OLD      35
+#define SO_TIMESTAMPING_OLD     37
+
+#define SO_TIMESTAMP_NEW        63
+#define SO_TIMESTAMPNS_NEW      64
+#define SO_TIMESTAMPING_NEW     65
+
+#define SO_RCVTIMEO_NEW         66
+#define SO_SNDTIMEO_NEW         67
+
+#define SO_DETACH_REUSEPORT_BPF 68
+
+#define SO_PREFER_BUSY_POLL	69
+#define SO_BUSY_POLL_BUDGET	70
+
+#define SO_NETNS_COOKIE		71
+
+#define SO_BUF_LOCK		72
+
+#define SO_RESERVE_MEM		73
+
+#if !defined(__KERNEL__)
+
+#if __BITS_PER_LONG == 64
+#define SO_TIMESTAMP		SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
+
+#define SO_RCVTIMEO		SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO		SO_SNDTIMEO_OLD
+#else
+#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
+#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
+
+#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
+#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
+#endif
+
+#define SCM_TIMESTAMP           SO_TIMESTAMP
+#define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
+#define SCM_TIMESTAMPING        SO_TIMESTAMPING
+
+#endif
+
+#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/alpha/include/uapi/asm/sockios.h b/arch/alpha/include/uapi/asm/sockios.h
new file mode 100644
index 0000000000..af92bc27c3
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/sockios.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_ALPHA_SOCKIOS_H
+#define _ASM_ALPHA_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+
+#define FIOGETOWN	_IOR('f', 123, int)
+#define FIOSETOWN 	_IOW('f', 124, int)
+
+#define SIOCATMARK	_IOR('s', 7, int)
+#define SIOCSPGRP	_IOW('s', 8, pid_t)
+#define SIOCGPGRP	_IOR('s', 9, pid_t)
+
+#define SIOCGSTAMP_OLD	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907		/* Get stamp (timespec) */
+
+#endif /* _ASM_ALPHA_SOCKIOS_H */
diff --git a/arch/alpha/include/uapi/asm/stat.h b/arch/alpha/include/uapi/asm/stat.h
new file mode 100644
index 0000000000..3f454fbd30
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/stat.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_STAT_H
+#define _ALPHA_STAT_H
+
+struct stat {
+	unsigned int	st_dev;
+	unsigned int	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	st_rdev;
+	long		st_size;
+	unsigned long	st_atime;
+	unsigned long	st_mtime;
+	unsigned long	st_ctime;
+	unsigned int	st_blksize;
+	unsigned int	st_blocks;
+	unsigned int	st_flags;
+	unsigned int	st_gen;
+};
+
+/* The stat64 structure increases the size of dev_t, blkcnt_t, adds
+   nanosecond resolution times, and padding for expansion.  */
+
+struct stat64 {
+	unsigned long	st_dev;
+	unsigned long	st_ino;
+	unsigned long	st_rdev;
+	long		st_size;
+	unsigned long	st_blocks;
+
+	unsigned int	st_mode;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	st_blksize;
+	unsigned int	st_nlink;
+	unsigned int	__pad0;
+
+	unsigned long	st_atime;
+	unsigned long 	st_atime_nsec; 
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long   st_ctime_nsec;
+  	long		__unused[3];
+};
+
+#endif
diff --git a/arch/alpha/include/uapi/asm/statfs.h b/arch/alpha/include/uapi/asm/statfs.h
new file mode 100644
index 0000000000..95852a4f57
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/statfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_STATFS_H
+#define _ALPHA_STATFS_H
+
+#include <linux/types.h>
+
+/* Alpha is the only 64-bit platform with 32-bit statfs. And doesn't
+   even seem to implement statfs64 */
+#define __statfs_word __u32
+
+#include <asm-generic/statfs.h>
+
+#endif
diff --git a/arch/alpha/include/uapi/asm/swab.h b/arch/alpha/include/uapi/asm/swab.h
new file mode 100644
index 0000000000..1cc70d2727
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/swab.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_SWAB_H
+#define _ALPHA_SWAB_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/compiler.h>
+
+#ifdef __GNUC__
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
+{
+	/*
+	 * Unfortunately, we can't use the 6 instruction sequence
+	 * on ev6 since the latency of the UNPKBW is 3, which is
+	 * pretty hard to hide.  Just in case a future implementation
+	 * has a lower latency, here's the sequence (also by Mike Burrows)
+	 *
+	 * UNPKBW a0, v0       v0: 00AA00BB00CC00DD
+	 * SLL v0, 24, a0      a0: BB00CC00DD000000
+	 * BIS v0, a0, a0      a0: BBAACCBBDDCC00DD
+	 * EXTWL a0, 6, v0     v0: 000000000000BBAA
+	 * ZAP a0, 0xf3, a0    a0: 00000000DDCC0000
+	 * ADDL a0, v0, v0     v0: ssssssssDDCCBBAA
+	 */
+
+	__u64 t0, t1, t2, t3;
+
+	t0 = __kernel_inslh(x, 7);	/* t0 : 0000000000AABBCC */
+	t1 = __kernel_inswl(x, 3);	/* t1 : 000000CCDD000000 */
+	t1 |= t0;			/* t1 : 000000CCDDAABBCC */
+	t2 = t1 >> 16;			/* t2 : 0000000000CCDDAA */
+	t0 = t1 & 0xFF00FF00;		/* t0 : 00000000DD00BB00 */
+	t3 = t2 & 0x00FF00FF;		/* t3 : 0000000000CC00AA */
+	t1 = t0 + t3;			/* t1 : ssssssssDDCCBBAA */
+
+	return t1;
+}
+#define __arch_swab32 __arch_swab32
+
+#endif /* __GNUC__ */
+
+#endif /* _ALPHA_SWAB_H */
diff --git a/arch/alpha/include/uapi/asm/sysinfo.h b/arch/alpha/include/uapi/asm/sysinfo.h
new file mode 100644
index 0000000000..188ea76c7f
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/sysinfo.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * include/asm-alpha/sysinfo.h
+ */
+
+#ifndef __ASM_ALPHA_SYSINFO_H
+#define __ASM_ALPHA_SYSINFO_H
+
+/* This defines the subset of the OSF/1 getsysinfo/setsysinfo calls
+   that we support.  */
+
+#define GSI_UACPROC			8
+#define GSI_IEEE_FP_CONTROL		45
+#define GSI_IEEE_STATE_AT_SIGNAL	46
+#define GSI_PROC_TYPE			60
+#define GSI_GET_HWRPB			101
+
+#define SSI_NVPAIRS			1
+#define SSI_LMF				7
+#define SSI_IEEE_FP_CONTROL		14
+#define SSI_IEEE_STATE_AT_SIGNAL	15
+#define SSI_IEEE_IGNORE_STATE_AT_SIGNAL	16
+#define SSI_IEEE_RAISE_EXCEPTION	1001	/* linux specific */
+
+#define SSIN_UACPROC			6
+
+#define UAC_BITMASK			7
+#define UAC_NOPRINT			1
+#define UAC_NOFIX			2
+#define UAC_SIGBUS			4
+
+#endif /* __ASM_ALPHA_SYSINFO_H */
diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h
new file mode 100644
index 0000000000..4575ba34a0
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/termbits.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_TERMBITS_H
+#define _ALPHA_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+/*
+ * termios type and macro definitions.  Be careful about adding stuff
+ * to this file since it's used in GNU libc and there are strict rules
+ * concerning namespace pollution.
+ */
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* Alpha has identical termios and termios2 */
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* Alpha has matching termios and ktermios */
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VEOF 0
+#define VEOL 1
+#define VEOL2 2
+#define VERASE 3
+#define VWERASE 4
+#define VKILL 5
+#define VREPRINT 6
+#define VSWTC 7
+#define VINTR 8
+#define VQUIT 9
+#define VSUSP 10
+#define VSTART 12
+#define VSTOP 13
+#define VLNEXT 14
+#define VDISCARD 15
+#define VMIN 16
+#define VTIME 17
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IXON	0001000
+#define IXOFF	0002000
+#define IXANY	0004000
+#define IUCLC	0010000
+#define IMAXBEL	0020000
+#define IUTF8	0040000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define ONLCR	0000002
+#define OLCUC	0000004
+
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+
+#define OFILL	00000100
+#define OFDEL	00000200
+#define NLDLY	00001400
+#define   NL0	00000000
+#define   NL1	00000400
+#define   NL2	00001000
+#define   NL3	00001400
+#define TABDLY	00006000
+#define   TAB0	00000000
+#define   TAB1	00002000
+#define   TAB2	00004000
+#define   TAB3	00006000
+#define CRDLY	00030000
+#define   CR0	00000000
+#define   CR1	00010000
+#define   CR2	00020000
+#define   CR3	00030000
+#define FFDLY	00040000
+#define   FF0	00000000
+#define   FF1	00040000
+#define BSDLY	00100000
+#define   BS0	00000000
+#define   BS1	00100000
+#define VTDLY	00200000
+#define   VT0	00000000
+#define   VT1	00200000
+/*
+ * Should be equivalent to TAB3, see description of TAB3 in
+ * POSIX.1-2008, Ch. 11.2.3 "Output Modes"
+ */
+#define XTABS	TAB3
+
+/* c_cflag bit meaning */
+#define CBAUD	0000037
+#define  B0	0000000		/* hang up */
+#define  B50	0000001
+#define  B75	0000002
+#define  B110	0000003
+#define  B134	0000004
+#define  B150	0000005
+#define  B200	0000006
+#define  B300	0000007
+#define  B600	0000010
+#define  B1200	0000011
+#define  B1800	0000012
+#define  B2400	0000013
+#define  B4800	0000014
+#define  B9600	0000015
+#define  B19200	0000016
+#define  B38400	0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CBAUDEX 0000000
+#define  B57600   00020
+#define  B115200  00021
+#define  B230400  00022
+#define  B460800  00023
+#define  B500000  00024
+#define  B576000  00025
+#define  B921600  00026
+#define B1000000  00027
+#define B1152000  00030
+#define B1500000  00031
+#define B2000000  00032
+#define B2500000  00033
+#define B3000000  00034
+#define B3500000  00035
+#define B4000000  00036
+#define BOTHER    00037
+
+#define CSIZE	00001400
+#define   CS5	00000000
+#define   CS6	00000400
+#define   CS7	00001000
+#define   CS8	00001400
+
+#define CSTOPB	00002000
+#define CREAD	00004000
+#define PARENB	00010000
+#define PARODD	00020000
+#define HUPCL	00040000
+
+#define CLOCAL	00100000
+#define CMSPAR	  010000000000		/* mark or space (stick) parity */
+#define CRTSCTS	  020000000000		/* flow control */
+
+#define CIBAUD	07600000
+#define IBSHIFT	16
+
+/* c_lflag bits */
+#define ISIG	0x00000080
+#define ICANON	0x00000100
+#define XCASE	0x00004000
+#define ECHO	0x00000008
+#define ECHOE	0x00000002
+#define ECHOK	0x00000004
+#define ECHONL	0x00000010
+#define NOFLSH	0x80000000
+#define TOSTOP	0x00400000
+#define ECHOCTL	0x00000040
+#define ECHOPRT	0x00000020
+#define ECHOKE	0x00000001
+#define FLUSHO	0x00800000
+#define PENDIN	0x20000000
+#define IEXTEN	0x00000400
+#define EXTPROC	0x10000000
+
+/* Values for the ACTION argument to `tcflow'.  */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* Values for the QUEUE_SELECTOR argument to `tcflush'.  */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* Values for the OPTIONAL_ACTIONS argument to `tcsetattr'.  */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* _ALPHA_TERMBITS_H */
diff --git a/arch/alpha/include/uapi/asm/termios.h b/arch/alpha/include/uapi/asm/termios.h
new file mode 100644
index 0000000000..e1b981222a
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/termios.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ALPHA_TERMIOS_H
+#define _UAPI_ALPHA_TERMIOS_H
+
+#include <asm/ioctls.h>
+#include <asm/termbits.h>
+
+struct sgttyb {
+	char	sg_ispeed;
+	char	sg_ospeed;
+	char	sg_erase;
+	char	sg_kill;
+	short	sg_flags;
+};
+
+struct tchars {
+	char	t_intrc;
+	char	t_quitc;
+	char	t_startc;
+	char	t_stopc;
+	char	t_eofc;
+	char	t_brkc;
+};
+
+struct ltchars {
+	char	t_suspc;
+	char	t_dsuspc;
+	char	t_rprntc;
+	char	t_flushc;
+	char	t_werasc;
+	char	t_lnextc;
+};
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/*
+ * c_cc characters in the termio structure.  Oh, how I love being
+ * backwardly compatible.  Notice that character 4 and 5 are
+ * interpreted differently depending on whether ICANON is set in
+ * c_lflag.  If it's set, they are used as _VEOF and _VEOL, otherwise
+ * as _VMIN and V_TIME.  This is for compatibility with OSF/1 (which
+ * is compatible with sysV)...
+ */
+#define _VINTR	0
+#define _VQUIT	1
+#define _VERASE	2
+#define _VKILL	3
+#define _VEOF	4
+#define _VMIN	4
+#define _VEOL	5
+#define _VTIME	5
+#define _VEOL2	6
+#define _VSWTC	7
+
+
+#endif /* _UAPI_ALPHA_TERMIOS_H */
diff --git a/arch/alpha/include/uapi/asm/types.h b/arch/alpha/include/uapi/asm/types.h
new file mode 100644
index 0000000000..6c3d499381
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ALPHA_TYPES_H
+#define _UAPI_ALPHA_TYPES_H
+
+/*
+ * This file is never included by application software unless
+ * explicitly requested (e.g., via linux/types.h) in which case the
+ * application is Linux specific so (user-) name space pollution is
+ * not a major issue.  However, for interoperability, libraries still
+ * need to be careful to avoid a name clashes.
+ */
+
+/*
+ * This is here because we used to use l64 for alpha
+ * and we don't want to impact user mode with our change to ll64
+ * in the kernel.
+ *
+ * However, some user programs are fine with this.  They can
+ * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
+ */
+#if !defined(__SANE_USERSPACE_TYPES__) && !defined(__KERNEL__)
+#include <asm-generic/int-l64.h>
+#else
+#include <asm-generic/int-ll64.h>
+#endif
+
+#endif /* _UAPI_ALPHA_TYPES_H */
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
new file mode 100644
index 0000000000..71fd5db068
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ALPHA_UNISTD_H
+#define _UAPI_ALPHA_UNISTD_H
+
+/* These are traditionally the names linux-alpha uses for
+ * the two otherwise generic system calls */
+#define __NR_umount	__NR_umount2
+#define __NR_osf_shmat	__NR_shmat
+
+/* These return an extra value but can be used as aliases */
+#define __NR_getpid	__NR_getxpid
+#define __NR_getuid	__NR_getxuid
+#define __NR_getgid	__NR_getxgid
+
+#include <asm/unistd_32.h>
+
+#endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/.gitignore b/arch/alpha/kernel/.gitignore
new file mode 100644
index 0000000000..bbb90f92d0
--- /dev/null
+++ b/arch/alpha/kernel/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vmlinux.lds
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
new file mode 100644
index 0000000000..5a74581bf0
--- /dev/null
+++ b/arch/alpha/kernel/Makefile
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+extra-y		:= head.o vmlinux.lds
+asflags-y	:= $(KBUILD_CFLAGS)
+ccflags-y	:= -Wno-sign-compare
+
+obj-y    := entry.o traps.o process.o osf_sys.o irq.o \
+	    irq_alpha.o signal.o setup.o ptrace.o time.o \
+	    systbls.o err_common.o io.o bugs.o
+
+obj-$(CONFIG_VGA_HOSE)	+= console.o
+obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_PCI)	+= pci.o pci_iommu.o pci-sysfs.o
+obj-$(CONFIG_SRM_ENV)	+= srm_env.o
+obj-$(CONFIG_MODULES)	+= module.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_RTC_DRV_ALPHA) += rtc.o
+obj-$(CONFIG_AUDIT)	+= audit.o
+
+ifdef CONFIG_ALPHA_GENERIC
+
+obj-y 	 += core_apecs.o core_cia.o core_irongate.o core_lca.o \
+	    core_mcpcia.o core_polaris.o core_t2.o \
+	    core_tsunami.o
+
+obj-y	 += sys_alcor.o sys_cabriolet.o sys_dp264.o sys_eb64p.o sys_eiger.o \
+	    sys_jensen.o sys_miata.o sys_mikasa.o sys_nautilus.o \
+	    sys_noritake.o sys_rawhide.o sys_ruffian.o sys_rx164.o \
+	    sys_sable.o sys_sio.o sys_sx164.o sys_takara.o
+
+ifndef CONFIG_ALPHA_LEGACY_START_ADDRESS
+obj-y	 += core_marvel.o core_titan.o core_wildfire.o
+obj-y	 += sys_marvel.o sys_titan.o sys_wildfire.o
+obj-y    += err_ev7.o err_titan.o err_marvel.o
+endif
+
+obj-y	 += irq_pyxis.o irq_i8259.o irq_srm.o
+obj-y	 += err_ev6.o
+obj-y	 += es1888.o smc37c669.o smc37c93x.o pc873xx.o gct.o
+obj-y    += srmcons.o
+
+else
+
+# Misc support
+obj-$(CONFIG_ALPHA_SRM)		+= srmcons.o
+
+ifdef CONFIG_BINFMT_AOUT
+obj-y	+= binfmt_loader.o
+endif
+
+# Core logic support
+obj-$(CONFIG_ALPHA_APECS)	+= core_apecs.o
+obj-$(CONFIG_ALPHA_CIA)		+= core_cia.o
+obj-$(CONFIG_ALPHA_IRONGATE)	+= core_irongate.o
+obj-$(CONFIG_ALPHA_LCA)		+= core_lca.o
+obj-$(CONFIG_ALPHA_MARVEL)	+= core_marvel.o gct.o
+obj-$(CONFIG_ALPHA_MCPCIA)	+= core_mcpcia.o
+obj-$(CONFIG_ALPHA_POLARIS)	+= core_polaris.o
+obj-$(CONFIG_ALPHA_T2)		+= core_t2.o
+obj-$(CONFIG_ALPHA_TSUNAMI)	+= core_tsunami.o
+obj-$(CONFIG_ALPHA_TITAN)	+= core_titan.o
+obj-$(CONFIG_ALPHA_WILDFIRE)	+= core_wildfire.o
+
+# Board support
+obj-$(CONFIG_ALPHA_ALCOR)	+= sys_alcor.o irq_i8259.o irq_srm.o
+obj-$(CONFIG_ALPHA_CABRIOLET)	+= sys_cabriolet.o irq_i8259.o irq_srm.o \
+				   pc873xx.o
+obj-$(CONFIG_ALPHA_EB164)	+= sys_cabriolet.o irq_i8259.o irq_srm.o \
+				   pc873xx.o
+obj-$(CONFIG_ALPHA_EB66P)	+= sys_cabriolet.o irq_i8259.o irq_srm.o \
+				   pc873xx.o
+obj-$(CONFIG_ALPHA_LX164)	+= sys_cabriolet.o irq_i8259.o irq_srm.o \
+				   smc37c93x.o
+obj-$(CONFIG_ALPHA_PC164)	+= sys_cabriolet.o irq_i8259.o irq_srm.o \
+				   smc37c93x.o
+obj-$(CONFIG_ALPHA_DP264)	+= sys_dp264.o irq_i8259.o es1888.o smc37c669.o
+obj-$(CONFIG_ALPHA_SHARK)	+= sys_dp264.o irq_i8259.o es1888.o smc37c669.o
+obj-$(CONFIG_ALPHA_TITAN)	+= sys_titan.o irq_i8259.o smc37c669.o
+obj-$(CONFIG_ALPHA_EB64P)	+= sys_eb64p.o irq_i8259.o
+obj-$(CONFIG_ALPHA_EB66)	+= sys_eb64p.o irq_i8259.o
+obj-$(CONFIG_ALPHA_EIGER)	+= sys_eiger.o irq_i8259.o
+obj-$(CONFIG_ALPHA_JENSEN)	+= sys_jensen.o pci-noop.o irq_i8259.o
+obj-$(CONFIG_ALPHA_MARVEL)	+= sys_marvel.o 
+obj-$(CONFIG_ALPHA_MIATA)	+= sys_miata.o irq_pyxis.o irq_i8259.o \
+				   es1888.o smc37c669.o
+obj-$(CONFIG_ALPHA_MIKASA)	+= sys_mikasa.o irq_i8259.o irq_srm.o
+obj-$(CONFIG_ALPHA_NAUTILUS)	+= sys_nautilus.o irq_i8259.o irq_srm.o
+obj-$(CONFIG_ALPHA_NORITAKE)	+= sys_noritake.o irq_i8259.o
+obj-$(CONFIG_ALPHA_RAWHIDE)	+= sys_rawhide.o irq_i8259.o
+obj-$(CONFIG_ALPHA_RUFFIAN)	+= sys_ruffian.o irq_pyxis.o irq_i8259.o
+obj-$(CONFIG_ALPHA_RX164)	+= sys_rx164.o irq_i8259.o
+obj-$(CONFIG_ALPHA_SABLE)	+= sys_sable.o
+obj-$(CONFIG_ALPHA_LYNX)	+= sys_sable.o
+obj-$(CONFIG_ALPHA_BOOK1)	+= sys_sio.o irq_i8259.o irq_srm.o pc873xx.o
+obj-$(CONFIG_ALPHA_AVANTI)	+= sys_sio.o irq_i8259.o irq_srm.o pc873xx.o
+obj-$(CONFIG_ALPHA_NONAME)	+= sys_sio.o irq_i8259.o irq_srm.o pc873xx.o
+obj-$(CONFIG_ALPHA_P2K)		+= sys_sio.o irq_i8259.o irq_srm.o pc873xx.o
+obj-$(CONFIG_ALPHA_XL)		+= sys_sio.o irq_i8259.o irq_srm.o pc873xx.o
+obj-$(CONFIG_ALPHA_SX164)	+= sys_sx164.o irq_pyxis.o irq_i8259.o \
+				   irq_srm.o smc37c669.o
+obj-$(CONFIG_ALPHA_TAKARA)	+= sys_takara.o irq_i8259.o pc873xx.o
+obj-$(CONFIG_ALPHA_WILDFIRE)	+= sys_wildfire.o irq_i8259.o
+
+# Error support
+obj-$(CONFIG_ALPHA_MARVEL)	+= err_ev7.o err_marvel.o
+obj-$(CONFIG_ALPHA_NAUTILUS)	+= err_ev6.o
+obj-$(CONFIG_ALPHA_TITAN)	+= err_ev6.o err_titan.o
+
+endif # GENERIC
diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
new file mode 100644
index 0000000000..2e125e5c15
--- /dev/null
+++ b/arch/alpha/kernel/asm-offsets.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/kbuild.h>
+#include <asm/io.h>
+
+void foo(void)
+{
+	DEFINE(TI_TASK, offsetof(struct thread_info, task));
+	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+	BLANK();
+
+        DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
+        DEFINE(TASK_CRED, offsetof(struct task_struct, cred));
+        DEFINE(TASK_REAL_PARENT, offsetof(struct task_struct, real_parent));
+        DEFINE(TASK_GROUP_LEADER, offsetof(struct task_struct, group_leader));
+        DEFINE(TASK_TGID, offsetof(struct task_struct, tgid));
+        BLANK();
+
+        DEFINE(CRED_UID,  offsetof(struct cred, uid));
+        DEFINE(CRED_EUID, offsetof(struct cred, euid));
+        DEFINE(CRED_GID,  offsetof(struct cred, gid));
+        DEFINE(CRED_EGID, offsetof(struct cred, egid));
+        BLANK();
+
+	DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs));
+	DEFINE(PT_PTRACED, PT_PTRACED);
+	DEFINE(CLONE_VM, CLONE_VM);
+	DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
+	DEFINE(SIGCHLD, SIGCHLD);
+	BLANK();
+
+	DEFINE(HAE_CACHE, offsetof(struct alpha_machine_vector, hae_cache));
+	DEFINE(HAE_REG, offsetof(struct alpha_machine_vector, hae_register));
+}
diff --git a/arch/alpha/kernel/audit.c b/arch/alpha/kernel/audit.c
new file mode 100644
index 0000000000..3ab0470978
--- /dev/null
+++ b/arch/alpha/kernel/audit.c
@@ -0,0 +1,62 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return AUDITSC_OPEN;
+	case __NR_openat:
+		return AUDITSC_OPENAT;
+	case __NR_execve:
+		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+	default:
+		return AUDITSC_NATIVE;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
new file mode 100644
index 0000000000..e4be7a543e
--- /dev/null
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm_types.h>
+#include <linux/binfmts.h>
+#include <linux/a.out.h>
+
+static int load_binary(struct linux_binprm *bprm)
+{
+	struct exec *eh = (struct exec *)bprm->buf;
+	unsigned long loader;
+	struct file *file;
+	int retval;
+
+	if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000)
+		return -ENOEXEC;
+
+	if (bprm->loader)
+		return -ENOEXEC;
+
+	loader = bprm->vma->vm_end - sizeof(void *);
+
+	file = open_exec("/sbin/loader");
+	retval = PTR_ERR(file);
+	if (IS_ERR(file))
+		return retval;
+
+	/* Remember if the application is TASO.  */
+	bprm->taso = eh->ah.entry < 0x100000000UL;
+
+	bprm->interpreter = file;
+	bprm->loader = loader;
+	return 0;
+}
+
+static struct linux_binfmt loader_format = {
+	.load_binary	= load_binary,
+};
+
+static int __init init_loader_binfmt(void)
+{
+	insert_binfmt(&loader_format);
+	return 0;
+}
+arch_initcall(init_loader_binfmt);
diff --git a/arch/alpha/kernel/bugs.c b/arch/alpha/kernel/bugs.c
new file mode 100644
index 0000000000..08cc10d7fa
--- /dev/null
+++ b/arch/alpha/kernel/bugs.c
@@ -0,0 +1,45 @@
+
+#include <asm/hwrpb.h>
+#include <linux/device.h>
+
+
+#ifdef CONFIG_SYSFS
+
+static int cpu_is_ev6_or_later(void)
+{
+	struct percpu_struct *cpu;
+        unsigned long cputype;
+
+        cpu = (struct percpu_struct *)((char *)hwrpb + hwrpb->processor_offset);
+        cputype = cpu->type & 0xffffffff;
+        /* Include all of EV6, EV67, EV68, EV7, EV79 and EV69. */
+        return (cputype == EV6_CPU) || ((cputype >= EV67_CPU) && (cputype <= EV69_CPU));
+}
+
+ssize_t cpu_show_meltdown(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	if (cpu_is_ev6_or_later())
+		return sprintf(buf, "Vulnerable\n");
+	else
+		return sprintf(buf, "Not affected\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+                            struct device_attribute *attr, char *buf)
+{
+	if (cpu_is_ev6_or_later())
+		return sprintf(buf, "Vulnerable\n");
+	else
+		return sprintf(buf, "Not affected\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (cpu_is_ev6_or_later())
+		return sprintf(buf, "Vulnerable\n");
+	else
+		return sprintf(buf, "Not affected\n");
+}
+#endif
diff --git a/arch/alpha/kernel/console.c b/arch/alpha/kernel/console.c
new file mode 100644
index 0000000000..5476279329
--- /dev/null
+++ b/arch/alpha/kernel/console.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/console.c
+ *
+ * Architecture-specific specific support for VGA device on 
+ * non-0 I/O hose
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/console.h>
+#include <linux/vt.h>
+#include <asm/vga.h>
+#include <asm/machvec.h>
+
+#include "pci_impl.h"
+
+#ifdef CONFIG_VGA_HOSE
+
+struct pci_controller *pci_vga_hose;
+static struct resource alpha_vga = {
+	.name	= "alpha-vga+",
+	.flags	= IORESOURCE_IO,
+	.start	= 0x3C0,
+	.end	= 0x3DF
+};
+
+static struct pci_controller * __init 
+default_vga_hose_select(struct pci_controller *h1, struct pci_controller *h2)
+{
+	if (h2->index < h1->index)
+		return h2;
+
+	return h1;
+}
+
+void __init 
+locate_and_init_vga(void *(*sel_func)(void *, void *))
+{
+	struct pci_controller *hose = NULL;
+	struct pci_dev *dev = NULL;
+
+	/* Default the select function */
+	if (!sel_func) sel_func = (void *)default_vga_hose_select;
+
+	/* Find the console VGA device */
+	for(dev=NULL; (dev=pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, dev));) {
+		if (!hose)
+			hose = dev->sysdata;
+		else
+			hose = sel_func(hose, dev->sysdata);
+	}
+
+	/* Did we already initialize the correct one? Is there one? */
+	if (!hose || (conswitchp == &vga_con && pci_vga_hose == hose))
+		return;
+
+	/* Create a new VGA ioport resource WRT the hose it is on. */
+	alpha_vga.start += hose->io_space->start;
+	alpha_vga.end += hose->io_space->start;
+	request_resource(hose->io_space, &alpha_vga);
+
+	/* Set the VGA hose and init the new console. */
+	pci_vga_hose = hose;
+	console_lock();
+	do_take_over_console(&vga_con, 0, MAX_NR_CONSOLES-1, 1);
+	console_unlock();
+}
+
+void __init
+find_console_vga_hose(void)
+{
+	u64 *pu64 = (u64 *)((u64)hwrpb + hwrpb->ctbt_offset);
+
+	if (pu64[7] == 3) {	/* TERM_TYPE == graphics */
+		struct pci_controller *hose;
+		int h = (pu64[30] >> 24) & 0xff;	/* console hose # */
+
+		/*
+		 * Our hose numbering DOES match the console's, so find
+		 * the right one...
+		 */
+		for (hose = hose_head; hose; hose = hose->next) {
+			if (hose->index == h) break;
+		}
+
+		if (hose) {
+			printk("Console graphics on hose %d\n", h);
+			pci_vga_hose = hose;
+		}
+	}
+}
+
+#endif
diff --git a/arch/alpha/kernel/core_apecs.c b/arch/alpha/kernel/core_apecs.c
new file mode 100644
index 0000000000..6df765ff2b
--- /dev/null
+++ b/arch/alpha/kernel/core_apecs.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_apecs.c
+ *
+ * Rewritten for Apecs from the lca.c from:
+ *
+ * Written by David Mosberger (davidm@cs.arizona.edu) with some code
+ * taken from Dave Rusling's (david.rusling@reo.mts.dec.com) 32-bit
+ * bios code.
+ *
+ * Code common to all APECS core logic chips.
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_apecs.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+#include <asm/mce.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/*
+ * NOTE: Herein lie back-to-back mb instructions.  They are magic. 
+ * One plausible explanation is that the i/o controller does not properly
+ * handle the system transaction.  Another involves timing.  Ho hum.
+ */
+
+/*
+ * BIOS32-style PCI interface:
+ */
+
+#define DEBUG_CONFIG 0
+
+#if DEBUG_CONFIG
+# define DBGC(args)	printk args
+#else
+# define DBGC(args)
+#endif
+
+#define vuip	volatile unsigned int  *
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address and setup the APECS_HAXR2 register
+ * accordingly.  It is therefore not safe to have concurrent
+ * invocations to configuration space access routines, but there
+ * really shouldn't be any need for this.
+ *
+ * Type 0:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | | | | | | | | | | | | | | |F|F|F|R|R|R|R|R|R|0|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:11	Device select bit.
+ * 	10:8	Function number
+ * 	 7:2	Register number
+ *
+ * Type 1:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:24	reserved
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., SCSI and Ethernet).
+ * 
+ *	The register selects a DWORD (32 bit) register offset.  Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
+	     unsigned long *pci_addr, unsigned char *type1)
+{
+	unsigned long addr;
+	u8 bus = pbus->number;
+
+	DBGC(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x,"
+	      " pci_addr=0x%p, type1=0x%p)\n",
+	      bus, device_fn, where, pci_addr, type1));
+
+	if (bus == 0) {
+		int device = device_fn >> 3;
+
+		/* type 0 configuration cycle: */
+
+		if (device > 20) {
+			DBGC(("mk_conf_addr: device (%d) > 20, returning -1\n",
+			      device));
+			return -1;
+		}
+
+		*type1 = 0;
+		addr = (device_fn << 8) | (where);
+	} else {
+		/* type 1 configuration cycle: */
+		*type1 = 1;
+		addr = (bus << 16) | (device_fn << 8) | (where);
+	}
+	*pci_addr = addr;
+	DBGC(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
+	return 0;
+}
+
+static unsigned int
+conf_read(unsigned long addr, unsigned char type1)
+{
+	unsigned long flags;
+	unsigned int stat0, value;
+	unsigned int haxr2 = 0;
+
+	local_irq_save(flags);	/* avoid getting hit by machine check */
+
+	DBGC(("conf_read(addr=0x%lx, type1=%d)\n", addr, type1));
+
+	/* Reset status register to avoid losing errors.  */
+	stat0 = *(vuip)APECS_IOC_DCSR;
+	*(vuip)APECS_IOC_DCSR = stat0;
+	mb();
+	DBGC(("conf_read: APECS DCSR was 0x%x\n", stat0));
+
+	/* If Type1 access, must set HAE #2. */
+	if (type1) {
+		haxr2 = *(vuip)APECS_IOC_HAXR2;
+		mb();
+		*(vuip)APECS_IOC_HAXR2 = haxr2 | 1;
+		DBGC(("conf_read: TYPE1 access\n"));
+	}
+
+	draina();
+	mcheck_expected(0) = 1;
+	mcheck_taken(0) = 0;
+	mb();
+
+	/* Access configuration space.  */
+
+	/* Some SRMs step on these registers during a machine check.  */
+	asm volatile("ldl %0,%1; mb; mb" : "=r"(value) : "m"(*(vuip)addr)
+		     : "$9", "$10", "$11", "$12", "$13", "$14", "memory");
+
+	if (mcheck_taken(0)) {
+		mcheck_taken(0) = 0;
+		value = 0xffffffffU;
+		mb();
+	}
+	mcheck_expected(0) = 0;
+	mb();
+
+#if 1
+	/*
+	 * david.rusling@reo.mts.dec.com.  This code is needed for the
+	 * EB64+ as it does not generate a machine check (why I don't
+	 * know).  When we build kernels for one particular platform
+	 * then we can make this conditional on the type.
+	 */
+	draina();
+
+	/* Now look for any errors.  */
+	stat0 = *(vuip)APECS_IOC_DCSR;
+	DBGC(("conf_read: APECS DCSR after read 0x%x\n", stat0));
+
+	/* Is any error bit set? */
+	if (stat0 & 0xffe0U) {
+		/* If not NDEV, print status.  */
+		if (!(stat0 & 0x0800)) {
+			printk("apecs.c:conf_read: got stat0=%x\n", stat0);
+		}
+
+		/* Reset error status.  */
+		*(vuip)APECS_IOC_DCSR = stat0;
+		mb();
+		wrmces(0x7);			/* reset machine check */
+		value = 0xffffffff;
+	}
+#endif
+
+	/* If Type1 access, must reset HAE #2 so normal IO space ops work.  */
+	if (type1) {
+		*(vuip)APECS_IOC_HAXR2 = haxr2 & ~1;
+		mb();
+	}
+	local_irq_restore(flags);
+
+	return value;
+}
+
+static void
+conf_write(unsigned long addr, unsigned int value, unsigned char type1)
+{
+	unsigned long flags;
+	unsigned int stat0;
+	unsigned int haxr2 = 0;
+
+	local_irq_save(flags);	/* avoid getting hit by machine check */
+
+	/* Reset status register to avoid losing errors.  */
+	stat0 = *(vuip)APECS_IOC_DCSR;
+	*(vuip)APECS_IOC_DCSR = stat0;
+	mb();
+
+	/* If Type1 access, must set HAE #2. */
+	if (type1) {
+		haxr2 = *(vuip)APECS_IOC_HAXR2;
+		mb();
+		*(vuip)APECS_IOC_HAXR2 = haxr2 | 1;
+	}
+
+	draina();
+	mcheck_expected(0) = 1;
+	mb();
+
+	/* Access configuration space.  */
+	*(vuip)addr = value;
+	mb();
+	mb();  /* magic */
+	mcheck_expected(0) = 0;
+	mb();
+
+#if 1
+	/*
+	 * david.rusling@reo.mts.dec.com.  This code is needed for the
+	 * EB64+ as it does not generate a machine check (why I don't
+	 * know).  When we build kernels for one particular platform
+	 * then we can make this conditional on the type.
+	 */
+	draina();
+
+	/* Now look for any errors.  */
+	stat0 = *(vuip)APECS_IOC_DCSR;
+
+	/* Is any error bit set? */
+	if (stat0 & 0xffe0U) {
+		/* If not NDEV, print status.  */
+		if (!(stat0 & 0x0800)) {
+			printk("apecs.c:conf_write: got stat0=%x\n", stat0);
+		}
+
+		/* Reset error status.  */
+		*(vuip)APECS_IOC_DCSR = stat0;
+		mb();
+		wrmces(0x7);			/* reset machine check */
+	}
+#endif
+
+	/* If Type1 access, must reset HAE #2 so normal IO space ops work.  */
+	if (type1) {
+		*(vuip)APECS_IOC_HAXR2 = haxr2 & ~1;
+		mb();
+	}
+	local_irq_restore(flags);
+}
+
+static int
+apecs_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		  int size, u32 *value)
+{
+	unsigned long addr, pci_addr;
+	unsigned char type1;
+	long mask;
+	int shift;
+
+	if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	mask = (size - 1) * 8;
+	shift = (where & 3) * 8;
+	addr = (pci_addr << 5) + mask + APECS_CONF;
+	*value = conf_read(addr, type1) >> (shift);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+apecs_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+		   int size, u32 value)
+{
+	unsigned long addr, pci_addr;
+	unsigned char type1;
+	long mask;
+
+	if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	mask = (size - 1) * 8;
+	addr = (pci_addr << 5) + mask + APECS_CONF;
+	conf_write(addr, value << ((where & 3) * 8), type1);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops apecs_pci_ops = 
+{
+	.read =		apecs_read_config,
+	.write =	apecs_write_config,
+};
+
+void
+apecs_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	wmb();
+	*(vip)APECS_IOC_TBIA = 0;
+	mb();
+}
+
+void __init
+apecs_init_arch(void)
+{
+	struct pci_controller *hose;
+
+	/*
+	 * Create our single hose.
+	 */
+
+	pci_isa_hose = hose = alloc_pci_controller();
+	hose->io_space = &ioport_resource;
+	hose->mem_space = &iomem_resource;
+	hose->index = 0;
+
+	hose->sparse_mem_base = APECS_SPARSE_MEM - IDENT_ADDR;
+	hose->dense_mem_base = APECS_DENSE_MEM - IDENT_ADDR;
+	hose->sparse_io_base = APECS_IO - IDENT_ADDR;
+	hose->dense_io_base = 0;
+
+	/*
+	 * Set up the PCI to main memory translation windows.
+	 *
+	 * Window 1 is direct access 1GB at 1GB
+	 * Window 2 is scatter-gather 8MB at 8MB (for isa)
+	 */
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
+	hose->sg_pci = NULL;
+	__direct_map_base = 0x40000000;
+	__direct_map_size = 0x40000000;
+
+	*(vuip)APECS_IOC_PB1R = __direct_map_base | 0x00080000;
+	*(vuip)APECS_IOC_PM1R = (__direct_map_size - 1) & 0xfff00000U;
+	*(vuip)APECS_IOC_TB1R = 0;
+
+	*(vuip)APECS_IOC_PB2R = hose->sg_isa->dma_base | 0x000c0000;
+	*(vuip)APECS_IOC_PM2R = (hose->sg_isa->size - 1) & 0xfff00000;
+	*(vuip)APECS_IOC_TB2R = virt_to_phys(hose->sg_isa->ptes) >> 1;
+
+	apecs_pci_tbi(hose, 0, -1);
+
+	/*
+	 * Finally, clear the HAXR2 register, which gets used
+	 * for PCI Config Space accesses. That is the way
+	 * we want to use it, and we do not want to depend on
+	 * what ARC or SRM might have left behind...
+	 */
+	*(vuip)APECS_IOC_HAXR2 = 0;
+	mb();
+}
+
+void
+apecs_pci_clr_err(void)
+{
+	unsigned int jd;
+
+	jd = *(vuip)APECS_IOC_DCSR;
+	if (jd & 0xffe0L) {
+		*(vuip)APECS_IOC_SEAR;
+		*(vuip)APECS_IOC_DCSR = jd | 0xffe1L;
+		mb();
+		*(vuip)APECS_IOC_DCSR;
+	}
+	*(vuip)APECS_IOC_TBIA = (unsigned int)APECS_IOC_TBIA;
+	mb();
+	*(vuip)APECS_IOC_TBIA;
+}
+
+void
+apecs_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	struct el_common *mchk_header;
+	struct el_apecs_procdata *mchk_procdata;
+	struct el_apecs_sysdata_mcheck *mchk_sysdata;
+
+	mchk_header = (struct el_common *)la_ptr;
+
+	mchk_procdata = (struct el_apecs_procdata *)
+		(la_ptr + mchk_header->proc_offset
+		 - sizeof(mchk_procdata->paltemp));
+
+	mchk_sysdata = (struct el_apecs_sysdata_mcheck *)
+		(la_ptr + mchk_header->sys_offset);
+
+
+	/* Clear the error before any reporting.  */
+	mb();
+	mb(); /* magic */
+	draina();
+	apecs_pci_clr_err();
+	wrmces(0x7);		/* reset machine check pending flag */
+	mb();
+
+	process_mcheck_info(vector, la_ptr, "APECS",
+			    (mcheck_expected(0)
+			     && (mchk_sysdata->epic_dcsr & 0x0c00UL)));
+}
diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
new file mode 100644
index 0000000000..f489170201
--- /dev/null
+++ b/arch/alpha/kernel/core_cia.c
@@ -0,0 +1,1216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_cia.c
+ *
+ * Written by David A Rusling (david.rusling@reo.mts.dec.com).
+ * December 1995.
+ *
+ *	Copyright (C) 1995  David A Rusling
+ *	Copyright (C) 1997, 1998  Jay Estabrook
+ *	Copyright (C) 1998, 1999, 2000  Richard Henderson
+ *
+ * Code common to all CIA core logic chips.
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_cia.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+
+#include <asm/ptrace.h>
+#include <asm/mce.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+
+/*
+ * NOTE: Herein lie back-to-back mb instructions.  They are magic. 
+ * One plausible explanation is that the i/o controller does not properly
+ * handle the system transaction.  Another involves timing.  Ho hum.
+ */
+
+#define DEBUG_CONFIG 0
+#if DEBUG_CONFIG
+# define DBGC(args)	printk args
+#else
+# define DBGC(args)
+#endif
+
+#define vip	volatile int  *
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address.  It is therefore not safe to have
+ * concurrent invocations to configuration space access routines, but
+ * there really shouldn't be any need for this.
+ *
+ * Type 0:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | |D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:11	Device select bit.
+ * 	10:8	Function number
+ * 	 7:2	Register number
+ *
+ * Type 1:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:24	reserved
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., SCSI and Ethernet).
+ * 
+ *	The register selects a DWORD (32 bit) register offset.  Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static int
+mk_conf_addr(struct pci_bus *bus_dev, unsigned int device_fn, int where,
+	     unsigned long *pci_addr, unsigned char *type1)
+{
+	u8 bus = bus_dev->number;
+
+	*type1 = (bus != 0);
+	*pci_addr = (bus << 16) | (device_fn << 8) | where;
+
+	DBGC(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x,"
+	      " returning address 0x%p\n"
+	      bus, device_fn, where, *pci_addr));
+
+	return 0;
+}
+
+static unsigned int
+conf_read(unsigned long addr, unsigned char type1)
+{
+	unsigned long flags;
+	int stat0, value;
+	int cia_cfg = 0;
+
+	DBGC(("conf_read(addr=0x%lx, type1=%d) ", addr, type1));
+	local_irq_save(flags);
+
+	/* Reset status register to avoid losing errors.  */
+	stat0 = *(vip)CIA_IOC_CIA_ERR;
+	*(vip)CIA_IOC_CIA_ERR = stat0;
+	mb();
+	*(vip)CIA_IOC_CIA_ERR; /* re-read to force write */
+
+	/* If Type1 access, must set CIA CFG. */
+	if (type1) {
+		cia_cfg = *(vip)CIA_IOC_CFG;
+		*(vip)CIA_IOC_CFG = (cia_cfg & ~3) | 1;
+		mb();
+		*(vip)CIA_IOC_CFG;
+	}
+
+	mb();
+	draina();
+	mcheck_expected(0) = 1;
+	mcheck_taken(0) = 0;
+	mb();
+
+	/* Access configuration space.  */
+	value = *(vip)addr;
+	mb();
+	mb();  /* magic */
+	if (mcheck_taken(0)) {
+		mcheck_taken(0) = 0;
+		value = 0xffffffff;
+		mb();
+	}
+	mcheck_expected(0) = 0;
+	mb();
+
+	/* If Type1 access, must reset IOC CFG so normal IO space ops work.  */
+	if (type1) {
+		*(vip)CIA_IOC_CFG = cia_cfg;
+		mb();
+		*(vip)CIA_IOC_CFG;
+	}
+
+	local_irq_restore(flags);
+	DBGC(("done\n"));
+
+	return value;
+}
+
+static void
+conf_write(unsigned long addr, unsigned int value, unsigned char type1)
+{
+	unsigned long flags;
+	int stat0, cia_cfg = 0;
+
+	DBGC(("conf_write(addr=0x%lx, type1=%d) ", addr, type1));
+	local_irq_save(flags);
+
+	/* Reset status register to avoid losing errors.  */
+	stat0 = *(vip)CIA_IOC_CIA_ERR;
+	*(vip)CIA_IOC_CIA_ERR = stat0;
+	mb();
+	*(vip)CIA_IOC_CIA_ERR; /* re-read to force write */
+
+	/* If Type1 access, must set CIA CFG.  */
+	if (type1) {
+		cia_cfg = *(vip)CIA_IOC_CFG;
+		*(vip)CIA_IOC_CFG = (cia_cfg & ~3) | 1;
+		mb();
+		*(vip)CIA_IOC_CFG;
+	}
+
+	mb();
+	draina();
+	mcheck_expected(0) = 1;
+	mcheck_taken(0) = 0;
+	mb();
+
+	/* Access configuration space.  */
+	*(vip)addr = value;
+	mb();
+	*(vip)addr; /* read back to force the write */
+
+	mcheck_expected(0) = 0;
+	mb();
+
+	/* If Type1 access, must reset IOC CFG so normal IO space ops work.  */
+	if (type1) {
+		*(vip)CIA_IOC_CFG = cia_cfg;
+		mb();
+		*(vip)CIA_IOC_CFG;
+	}
+
+	local_irq_restore(flags);
+	DBGC(("done\n"));
+}
+
+static int 
+cia_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size,
+		u32 *value)
+{
+	unsigned long addr, pci_addr;
+	long mask;
+	unsigned char type1;
+	int shift;
+
+	if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	mask = (size - 1) * 8;
+	shift = (where & 3) * 8;
+	addr = (pci_addr << 5) + mask + CIA_CONF;
+	*value = conf_read(addr, type1) >> (shift);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int 
+cia_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size,
+		 u32 value)
+{
+	unsigned long addr, pci_addr;
+	long mask;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	mask = (size - 1) * 8;
+	addr = (pci_addr << 5) + mask + CIA_CONF;
+	conf_write(addr, value << ((where & 3) * 8), type1);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops cia_pci_ops = 
+{
+	.read = 	cia_read_config,
+	.write =	cia_write_config,
+};
+
+/*
+ * CIA Pass 1 and PYXIS Pass 1 and 2 have a broken scatter-gather tlb.
+ * It cannot be invalidated.  Rather than hard code the pass numbers,
+ * actually try the tbia to see if it works.
+ */
+
+void
+cia_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	wmb();
+	*(vip)CIA_IOC_PCI_TBIA = 3;	/* Flush all locked and unlocked.  */
+	mb();
+	*(vip)CIA_IOC_PCI_TBIA;
+}
+
+/*
+ * On PYXIS, even if the tbia works, we cannot use it. It effectively locks
+ * the chip (as well as direct write to the tag registers) if there is a
+ * SG DMA operation in progress. This is true at least for PYXIS rev. 1,
+ * so always use the method below.
+ */
+/*
+ * This is the method NT and NetBSD use.
+ *
+ * Allocate mappings, and put the chip into DMA loopback mode to read a
+ * garbage page.  This works by causing TLB misses, causing old entries to
+ * be purged to make room for the new entries coming in for the garbage page.
+ */
+
+#define CIA_BROKEN_TBIA_BASE	0x30000000
+#define CIA_BROKEN_TBIA_SIZE	1024
+
+/* Always called with interrupts disabled */
+void
+cia_pci_tbi_try2(struct pci_controller *hose,
+		 dma_addr_t start, dma_addr_t end)
+{
+	void __iomem *bus_addr;
+	int ctrl;
+
+	/* Put the chip into PCI loopback mode.  */
+	mb();
+	ctrl = *(vip)CIA_IOC_CIA_CTRL;
+	*(vip)CIA_IOC_CIA_CTRL = ctrl | CIA_CTRL_PCI_LOOP_EN;
+	mb();
+	*(vip)CIA_IOC_CIA_CTRL;
+	mb();
+
+	/* Read from PCI dense memory space at TBI_ADDR, skipping 32k on
+	   each read.  This forces SG TLB misses.  NetBSD claims that the
+	   TLB entries are not quite LRU, meaning that we need to read more
+	   times than there are actual tags.  The 2117x docs claim strict
+	   round-robin.  Oh well, we've come this far...  */
+	/* Even better - as seen on the PYXIS rev 1 the TLB tags 0-3 can
+	   be filled by the TLB misses *only once* after being invalidated
+	   (by tbia or direct write). Next misses won't update them even
+	   though the lock bits are cleared. Tags 4-7 are "quite LRU" though,
+	   so use them and read at window 3 base exactly 4 times. Reading
+	   more sometimes makes the chip crazy.  -ink */
+
+	bus_addr = cia_ioremap(CIA_BROKEN_TBIA_BASE, 32768 * 4);
+
+	cia_readl(bus_addr + 0x00000);
+	cia_readl(bus_addr + 0x08000);
+	cia_readl(bus_addr + 0x10000);
+	cia_readl(bus_addr + 0x18000);
+
+	cia_iounmap(bus_addr);
+
+	/* Restore normal PCI operation.  */
+	mb();
+	*(vip)CIA_IOC_CIA_CTRL = ctrl;
+	mb();
+	*(vip)CIA_IOC_CIA_CTRL;
+	mb();
+}
+
+static inline void
+cia_prepare_tbia_workaround(int window)
+{
+	unsigned long *ppte, pte;
+	long i;
+
+	/* Use minimal 1K map. */
+	ppte = memblock_alloc(CIA_BROKEN_TBIA_SIZE, 32768);
+	if (!ppte)
+		panic("%s: Failed to allocate %u bytes align=0x%x\n",
+		      __func__, CIA_BROKEN_TBIA_SIZE, 32768);
+	pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1;
+
+	for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i)
+		ppte[i] = pte;
+
+	*(vip)CIA_IOC_PCI_Wn_BASE(window) = CIA_BROKEN_TBIA_BASE | 3;
+	*(vip)CIA_IOC_PCI_Wn_MASK(window)
+	  = (CIA_BROKEN_TBIA_SIZE*1024 - 1) & 0xfff00000;
+	*(vip)CIA_IOC_PCI_Tn_BASE(window) = virt_to_phys(ppte) >> 2;
+}
+
+static void __init
+verify_tb_operation(void)
+{
+	static int page[PAGE_SIZE/4]
+		__attribute__((aligned(PAGE_SIZE)))
+		__initdata = { 0 };
+
+	struct pci_iommu_arena *arena = pci_isa_hose->sg_isa;
+	int ctrl, addr0, tag0, pte0, data0;
+	int temp, use_tbia_try2 = 0;
+	void __iomem *bus_addr;
+
+	/* pyxis -- tbia is broken */
+	if (pci_isa_hose->dense_io_base)
+		use_tbia_try2 = 1;
+
+	/* Put the chip into PCI loopback mode.  */
+	mb();
+	ctrl = *(vip)CIA_IOC_CIA_CTRL;
+	*(vip)CIA_IOC_CIA_CTRL = ctrl | CIA_CTRL_PCI_LOOP_EN;
+	mb();
+	*(vip)CIA_IOC_CIA_CTRL;
+	mb();
+
+	/* Write a valid entry directly into the TLB registers.  */
+
+	addr0 = arena->dma_base;
+	tag0 = addr0 | 1;
+	pte0 = (virt_to_phys(page) >> (PAGE_SHIFT - 1)) | 1;
+
+	*(vip)CIA_IOC_TB_TAGn(0) = tag0;
+	*(vip)CIA_IOC_TB_TAGn(1) = 0;
+	*(vip)CIA_IOC_TB_TAGn(2) = 0;
+	*(vip)CIA_IOC_TB_TAGn(3) = 0;
+	*(vip)CIA_IOC_TB_TAGn(4) = 0;
+	*(vip)CIA_IOC_TB_TAGn(5) = 0;
+	*(vip)CIA_IOC_TB_TAGn(6) = 0;
+	*(vip)CIA_IOC_TB_TAGn(7) = 0;
+	*(vip)CIA_IOC_TBn_PAGEm(0,0) = pte0;
+	*(vip)CIA_IOC_TBn_PAGEm(0,1) = 0;
+	*(vip)CIA_IOC_TBn_PAGEm(0,2) = 0;
+	*(vip)CIA_IOC_TBn_PAGEm(0,3) = 0;
+	mb();
+
+	/* Get a usable bus address */
+	bus_addr = cia_ioremap(addr0, 8*PAGE_SIZE);
+
+	/* First, verify we can read back what we've written.  If
+	   this fails, we can't be sure of any of the other testing
+	   we're going to do, so bail.  */
+	/* ??? Actually, we could do the work with machine checks.
+	   By passing this register update test, we pretty much
+	   guarantee that cia_pci_tbi_try1 works.  If this test
+	   fails, cia_pci_tbi_try2 might still work.  */
+
+	temp = *(vip)CIA_IOC_TB_TAGn(0);
+	if (temp != tag0) {
+		printk("pci: failed tb register update test "
+		       "(tag0 %#x != %#x)\n", temp, tag0);
+		goto failed;
+	}
+	temp = *(vip)CIA_IOC_TB_TAGn(1);
+	if (temp != 0) {
+		printk("pci: failed tb register update test "
+		       "(tag1 %#x != 0)\n", temp);
+		goto failed;
+	}
+	temp = *(vip)CIA_IOC_TBn_PAGEm(0,0);
+	if (temp != pte0) {
+		printk("pci: failed tb register update test "
+		       "(pte0 %#x != %#x)\n", temp, pte0);
+		goto failed;
+	}
+	printk("pci: passed tb register update test\n");
+
+	/* Second, verify we can actually do I/O through this entry.  */
+
+	data0 = 0xdeadbeef;
+	page[0] = data0;
+	mcheck_expected(0) = 1;
+	mcheck_taken(0) = 0;
+	mb();
+	temp = cia_readl(bus_addr);
+	mb();
+	mcheck_expected(0) = 0;
+	mb();
+	if (mcheck_taken(0)) {
+		printk("pci: failed sg loopback i/o read test (mcheck)\n");
+		goto failed;
+	}
+	if (temp != data0) {
+		printk("pci: failed sg loopback i/o read test "
+		       "(%#x != %#x)\n", temp, data0);
+		goto failed;
+	}
+	printk("pci: passed sg loopback i/o read test\n");
+
+	/* Third, try to invalidate the TLB.  */
+
+	if (! use_tbia_try2) {
+		cia_pci_tbi(arena->hose, 0, -1);
+		temp = *(vip)CIA_IOC_TB_TAGn(0);
+		if (temp & 1) {
+			use_tbia_try2 = 1;
+			printk("pci: failed tbia test; workaround available\n");
+		} else {
+			printk("pci: passed tbia test\n");
+		}
+	}
+
+	/* Fourth, verify the TLB snoops the EV5's caches when
+	   doing a tlb fill.  */
+
+	data0 = 0x5adda15e;
+	page[0] = data0;
+	arena->ptes[4] = pte0;
+	mcheck_expected(0) = 1;
+	mcheck_taken(0) = 0;
+	mb();
+	temp = cia_readl(bus_addr + 4*PAGE_SIZE);
+	mb();
+	mcheck_expected(0) = 0;
+	mb();
+	if (mcheck_taken(0)) {
+		printk("pci: failed pte write cache snoop test (mcheck)\n");
+		goto failed;
+	}
+	if (temp != data0) {
+		printk("pci: failed pte write cache snoop test "
+		       "(%#x != %#x)\n", temp, data0);
+		goto failed;
+	}
+	printk("pci: passed pte write cache snoop test\n");
+
+	/* Fifth, verify that a previously invalid PTE entry gets
+	   filled from the page table.  */
+
+	data0 = 0xabcdef12;
+	page[0] = data0;
+	arena->ptes[5] = pte0;
+	mcheck_expected(0) = 1;
+	mcheck_taken(0) = 0;
+	mb();
+	temp = cia_readl(bus_addr + 5*PAGE_SIZE);
+	mb();
+	mcheck_expected(0) = 0;
+	mb();
+	if (mcheck_taken(0)) {
+		printk("pci: failed valid tag invalid pte reload test "
+		       "(mcheck; workaround available)\n");
+		/* Work around this bug by aligning new allocations
+		   on 4 page boundaries.  */
+		arena->align_entry = 4;
+	} else if (temp != data0) {
+		printk("pci: failed valid tag invalid pte reload test "
+		       "(%#x != %#x)\n", temp, data0);
+		goto failed;
+	} else {
+		printk("pci: passed valid tag invalid pte reload test\n");
+	}
+
+	/* Sixth, verify machine checks are working.  Test invalid
+	   pte under the same valid tag as we used above.  */
+
+	mcheck_expected(0) = 1;
+	mcheck_taken(0) = 0;
+	mb();
+	temp = cia_readl(bus_addr + 6*PAGE_SIZE);
+	mb();
+	mcheck_expected(0) = 0;
+	mb();
+	printk("pci: %s pci machine check test\n",
+	       mcheck_taken(0) ? "passed" : "failed");
+
+	/* Clean up after the tests.  */
+	arena->ptes[4] = 0;
+	arena->ptes[5] = 0;
+
+	if (use_tbia_try2) {
+		alpha_mv.mv_pci_tbi = cia_pci_tbi_try2;
+
+		/* Tags 0-3 must be disabled if we use this workaraund. */
+		wmb();
+		*(vip)CIA_IOC_TB_TAGn(0) = 2;
+		*(vip)CIA_IOC_TB_TAGn(1) = 2;
+		*(vip)CIA_IOC_TB_TAGn(2) = 2;
+		*(vip)CIA_IOC_TB_TAGn(3) = 2;
+
+		printk("pci: tbia workaround enabled\n");
+	}
+	alpha_mv.mv_pci_tbi(arena->hose, 0, -1);
+
+exit:
+	/* unmap the bus addr */
+	cia_iounmap(bus_addr);
+
+	/* Restore normal PCI operation.  */
+	mb();
+	*(vip)CIA_IOC_CIA_CTRL = ctrl;
+	mb();
+	*(vip)CIA_IOC_CIA_CTRL;
+	mb();
+	return;
+
+failed:
+	printk("pci: disabling sg translation window\n");
+	*(vip)CIA_IOC_PCI_W0_BASE = 0;
+	*(vip)CIA_IOC_PCI_W1_BASE = 0;
+	pci_isa_hose->sg_isa = NULL;
+	alpha_mv.mv_pci_tbi = NULL;
+	goto exit;
+}
+
+#if defined(ALPHA_RESTORE_SRM_SETUP)
+/* Save CIA configuration data as the console had it set up.  */
+struct 
+{
+    unsigned int hae_mem;
+    unsigned int hae_io;
+    unsigned int pci_dac_offset;
+    unsigned int err_mask;
+    unsigned int cia_ctrl;
+    unsigned int cia_cnfg;
+    struct {
+	unsigned int w_base;
+	unsigned int w_mask;
+	unsigned int t_base;
+    } window[4];
+} saved_config __attribute((common));
+
+void
+cia_save_srm_settings(int is_pyxis)
+{
+	int i;
+
+	/* Save some important registers. */
+	saved_config.err_mask       = *(vip)CIA_IOC_ERR_MASK;
+	saved_config.cia_ctrl       = *(vip)CIA_IOC_CIA_CTRL;
+	saved_config.hae_mem        = *(vip)CIA_IOC_HAE_MEM;
+	saved_config.hae_io         = *(vip)CIA_IOC_HAE_IO;
+	saved_config.pci_dac_offset = *(vip)CIA_IOC_PCI_W_DAC;
+
+	if (is_pyxis)
+	    saved_config.cia_cnfg   = *(vip)CIA_IOC_CIA_CNFG;
+	else
+	    saved_config.cia_cnfg   = 0;
+
+	/* Save DMA windows configuration. */
+	for (i = 0; i < 4; i++) {
+	    saved_config.window[i].w_base = *(vip)CIA_IOC_PCI_Wn_BASE(i);
+	    saved_config.window[i].w_mask = *(vip)CIA_IOC_PCI_Wn_MASK(i);
+	    saved_config.window[i].t_base = *(vip)CIA_IOC_PCI_Tn_BASE(i);
+	}
+	mb();
+}
+
+void
+cia_restore_srm_settings(void)
+{
+	int i;
+
+	for (i = 0; i < 4; i++) {
+	    *(vip)CIA_IOC_PCI_Wn_BASE(i) = saved_config.window[i].w_base;
+	    *(vip)CIA_IOC_PCI_Wn_MASK(i) = saved_config.window[i].w_mask;
+	    *(vip)CIA_IOC_PCI_Tn_BASE(i) = saved_config.window[i].t_base;
+	}
+
+	*(vip)CIA_IOC_HAE_MEM   = saved_config.hae_mem;
+	*(vip)CIA_IOC_HAE_IO    = saved_config.hae_io;
+	*(vip)CIA_IOC_PCI_W_DAC = saved_config.pci_dac_offset;	
+	*(vip)CIA_IOC_ERR_MASK  = saved_config.err_mask;
+	*(vip)CIA_IOC_CIA_CTRL  = saved_config.cia_ctrl;
+
+	if (saved_config.cia_cnfg) /* Must be pyxis. */
+	    *(vip)CIA_IOC_CIA_CNFG  = saved_config.cia_cnfg;
+
+	mb();
+}
+#else /* ALPHA_RESTORE_SRM_SETUP */
+#define cia_save_srm_settings(p)	do {} while (0)
+#define cia_restore_srm_settings()	do {} while (0)
+#endif /* ALPHA_RESTORE_SRM_SETUP */
+
+
+static void __init
+do_init_arch(int is_pyxis)
+{
+	struct pci_controller *hose;
+	int temp, cia_rev, tbia_window;
+
+	cia_rev = *(vip)CIA_IOC_CIA_REV & CIA_REV_MASK;
+	printk("pci: cia revision %d%s\n",
+	       cia_rev, is_pyxis ? " (pyxis)" : "");
+
+	if (alpha_using_srm)
+		cia_save_srm_settings(is_pyxis);
+
+	/* Set up error reporting.  */
+	temp = *(vip)CIA_IOC_ERR_MASK;
+	temp &= ~(CIA_ERR_CPU_PE | CIA_ERR_MEM_NEM | CIA_ERR_PA_PTE_INV
+		  | CIA_ERR_RCVD_MAS_ABT | CIA_ERR_RCVD_TAR_ABT);
+	*(vip)CIA_IOC_ERR_MASK = temp;
+
+	/* Clear all currently pending errors.  */
+	temp = *(vip)CIA_IOC_CIA_ERR;
+	*(vip)CIA_IOC_CIA_ERR = temp;
+
+	/* Turn on mchecks.  */
+	temp = *(vip)CIA_IOC_CIA_CTRL;
+	temp |= CIA_CTRL_FILL_ERR_EN | CIA_CTRL_MCHK_ERR_EN;
+	*(vip)CIA_IOC_CIA_CTRL = temp;
+
+	/* Clear the CFG register, which gets used for PCI config space
+	   accesses.  That is the way we want to use it, and we do not
+	   want to depend on what ARC or SRM might have left behind.  */
+	*(vip)CIA_IOC_CFG = 0;
+ 
+	/* Zero the HAEs.  */
+	*(vip)CIA_IOC_HAE_MEM = 0;
+	*(vip)CIA_IOC_HAE_IO = 0;
+
+	/* For PYXIS, we always use BWX bus and i/o accesses.  To that end,
+	   make sure they're enabled on the controller.  At the same time,
+	   enable the monster window.  */
+	if (is_pyxis) {
+		temp = *(vip)CIA_IOC_CIA_CNFG;
+		temp |= CIA_CNFG_IOA_BWEN | CIA_CNFG_PCI_MWEN;
+		*(vip)CIA_IOC_CIA_CNFG = temp;
+	}
+
+	/* Synchronize with all previous changes.  */
+	mb();
+	*(vip)CIA_IOC_CIA_REV;
+
+	/*
+	 * Create our single hose.
+	 */
+
+	pci_isa_hose = hose = alloc_pci_controller();
+	hose->io_space = &ioport_resource;
+	hose->mem_space = &iomem_resource;
+	hose->index = 0;
+
+	if (! is_pyxis) {
+		struct resource *hae_mem = alloc_resource();
+		hose->mem_space = hae_mem;
+
+		hae_mem->start = 0;
+		hae_mem->end = CIA_MEM_R1_MASK;
+		hae_mem->name = pci_hae0_name;
+		hae_mem->flags = IORESOURCE_MEM;
+
+		if (request_resource(&iomem_resource, hae_mem) < 0)
+			printk(KERN_ERR "Failed to request HAE_MEM\n");
+
+		hose->sparse_mem_base = CIA_SPARSE_MEM - IDENT_ADDR;
+		hose->dense_mem_base = CIA_DENSE_MEM - IDENT_ADDR;
+		hose->sparse_io_base = CIA_IO - IDENT_ADDR;
+		hose->dense_io_base = 0;
+	} else {
+		hose->sparse_mem_base = 0;
+		hose->dense_mem_base = CIA_BW_MEM - IDENT_ADDR;
+		hose->sparse_io_base = 0;
+		hose->dense_io_base = CIA_BW_IO - IDENT_ADDR;
+	}
+
+	/*
+	 * Set up the PCI to main memory translation windows.
+	 *
+	 * Window 0 is S/G 8MB at 8MB (for isa)
+	 * Window 1 is S/G 1MB at 768MB (for tbia) (unused for CIA rev 1)
+	 * Window 2 is direct access 2GB at 2GB
+	 * Window 3 is DAC access 4GB at 8GB (or S/G for tbia if CIA rev 1)
+	 *
+	 * ??? NetBSD hints that page tables must be aligned to 32K,
+	 * possibly due to a hardware bug.  This is over-aligned
+	 * from the 8K alignment one would expect for an 8MB window. 
+	 * No description of what revisions affected.
+	 */
+
+	hose->sg_pci = NULL;
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 32768);
+
+	__direct_map_base = 0x80000000;
+	__direct_map_size = 0x80000000;
+
+	*(vip)CIA_IOC_PCI_W0_BASE = hose->sg_isa->dma_base | 3;
+	*(vip)CIA_IOC_PCI_W0_MASK = (hose->sg_isa->size - 1) & 0xfff00000;
+	*(vip)CIA_IOC_PCI_T0_BASE = virt_to_phys(hose->sg_isa->ptes) >> 2;
+
+	*(vip)CIA_IOC_PCI_W2_BASE = __direct_map_base | 1;
+	*(vip)CIA_IOC_PCI_W2_MASK = (__direct_map_size - 1) & 0xfff00000;
+	*(vip)CIA_IOC_PCI_T2_BASE = 0 >> 2;
+
+	/* On PYXIS we have the monster window, selected by bit 40, so
+	   there is no need for window3 to be enabled.
+
+	   On CIA, we don't have true arbitrary addressing -- bits <39:32>
+	   are compared against W_DAC.  We can, however, directly map 4GB,
+	   which is better than before.  However, due to assumptions made
+	   elsewhere, we should not claim that we support DAC unless that
+	   4GB covers all of physical memory.
+
+	   On CIA rev 1, apparently W1 and W2 can't be used for SG. 
+	   At least, there are reports that it doesn't work for Alcor. 
+	   In that case, we have no choice but to use W3 for the TBIA 
+	   workaround, which means we can't use DAC at all. */ 
+
+	tbia_window = 1;
+	if (is_pyxis) {
+		*(vip)CIA_IOC_PCI_W3_BASE = 0;
+	} else if (cia_rev == 1) {
+		*(vip)CIA_IOC_PCI_W1_BASE = 0;
+		tbia_window = 3;
+	} else if (max_low_pfn > (0x100000000UL >> PAGE_SHIFT)) {
+		*(vip)CIA_IOC_PCI_W3_BASE = 0;
+	} else {
+		*(vip)CIA_IOC_PCI_W3_BASE = 0x00000000 | 1 | 8;
+		*(vip)CIA_IOC_PCI_W3_MASK = 0xfff00000;
+		*(vip)CIA_IOC_PCI_T3_BASE = 0 >> 2;
+
+		alpha_mv.pci_dac_offset = 0x200000000UL;
+		*(vip)CIA_IOC_PCI_W_DAC = alpha_mv.pci_dac_offset >> 32;
+	}
+
+	/* Prepare workaround for apparently broken tbia. */
+	cia_prepare_tbia_workaround(tbia_window);
+}
+
+void __init
+cia_init_arch(void)
+{
+	do_init_arch(0);
+}
+
+void __init
+pyxis_init_arch(void)
+{
+	/* On pyxis machines we can precisely calculate the
+	   CPU clock frequency using pyxis real time counter.
+	   It's especially useful for SX164 with broken RTC.
+
+	   Both CPU and chipset are driven by the single 16.666M
+	   or 16.667M crystal oscillator. PYXIS_RT_COUNT clock is
+	   66.66 MHz. -ink */
+
+	unsigned int cc0, cc1;
+	unsigned long pyxis_cc;
+
+	__asm__ __volatile__ ("rpcc %0" : "=r"(cc0));
+	pyxis_cc = *(vulp)PYXIS_RT_COUNT;
+	do { } while(*(vulp)PYXIS_RT_COUNT - pyxis_cc < 4096);
+	__asm__ __volatile__ ("rpcc %0" : "=r"(cc1));
+	cc1 -= cc0;
+	hwrpb->cycle_freq = ((cc1 >> 11) * 100000000UL) / 3;
+	hwrpb_update_checksum(hwrpb);
+
+	do_init_arch(1);
+}
+
+void
+cia_kill_arch(int mode)
+{
+	if (alpha_using_srm)
+		cia_restore_srm_settings();
+}
+
+void __init
+cia_init_pci(void)
+{
+	/* Must delay this from init_arch, as we need machine checks.  */
+	verify_tb_operation();
+	common_init_pci();
+}
+
+static inline void
+cia_pci_clr_err(void)
+{
+	int jd;
+
+	jd = *(vip)CIA_IOC_CIA_ERR;
+	*(vip)CIA_IOC_CIA_ERR = jd;
+	mb();
+	*(vip)CIA_IOC_CIA_ERR;		/* re-read to force write.  */
+}
+
+#ifdef CONFIG_VERBOSE_MCHECK
+static void
+cia_decode_pci_error(struct el_CIA_sysdata_mcheck *cia, const char *msg)
+{
+	static const char * const pci_cmd_desc[16] = {
+		"Interrupt Acknowledge", "Special Cycle", "I/O Read",
+		"I/O Write", "Reserved 0x4", "Reserved 0x5", "Memory Read",
+		"Memory Write", "Reserved 0x8", "Reserved 0x9",
+		"Configuration Read", "Configuration Write",
+		"Memory Read Multiple", "Dual Address Cycle",
+		"Memory Read Line", "Memory Write and Invalidate"
+	};
+
+	if (cia->cia_err & (CIA_ERR_COR_ERR
+			    | CIA_ERR_UN_COR_ERR
+			    | CIA_ERR_MEM_NEM
+			    | CIA_ERR_PA_PTE_INV)) {
+		static const char * const window_desc[6] = {
+			"No window active", "Window 0 hit", "Window 1 hit",
+			"Window 2 hit", "Window 3 hit", "Monster window hit"
+		};
+
+		const char *window;
+		const char *cmd;
+		unsigned long addr, tmp;
+		int lock, dac;
+	
+		cmd = pci_cmd_desc[cia->pci_err0 & 0x7];
+		lock = (cia->pci_err0 >> 4) & 1;
+		dac = (cia->pci_err0 >> 5) & 1;
+
+		tmp = (cia->pci_err0 >> 8) & 0x1F;
+		tmp = ffs(tmp);
+		window = window_desc[tmp];
+
+		addr = cia->pci_err1;
+		if (dac) {
+			tmp = *(vip)CIA_IOC_PCI_W_DAC & 0xFFUL;
+			addr |= tmp << 32;
+		}
+
+		printk(KERN_CRIT "CIA machine check: %s\n", msg);
+		printk(KERN_CRIT "  DMA command: %s\n", cmd);
+		printk(KERN_CRIT "  PCI address: %#010lx\n", addr);
+		printk(KERN_CRIT "  %s, Lock: %d, DAC: %d\n",
+		       window, lock, dac);
+	} else if (cia->cia_err & (CIA_ERR_PERR
+				   | CIA_ERR_PCI_ADDR_PE
+				   | CIA_ERR_RCVD_MAS_ABT
+				   | CIA_ERR_RCVD_TAR_ABT
+				   | CIA_ERR_IOA_TIMEOUT)) {
+		static const char * const master_st_desc[16] = {
+			"Idle", "Drive bus", "Address step cycle",
+			"Address cycle", "Data cycle", "Last read data cycle",
+			"Last write data cycle", "Read stop cycle",
+			"Write stop cycle", "Read turnaround cycle",
+			"Write turnaround cycle", "Reserved 0xB",
+			"Reserved 0xC", "Reserved 0xD", "Reserved 0xE",
+			"Unknown state"
+		};
+		static const char * const target_st_desc[16] = {
+			"Idle", "Busy", "Read data cycle", "Write data cycle",
+			"Read stop cycle", "Write stop cycle",
+			"Read turnaround cycle", "Write turnaround cycle",
+			"Read wait cycle", "Write wait cycle",
+			"Reserved 0xA", "Reserved 0xB", "Reserved 0xC",
+			"Reserved 0xD", "Reserved 0xE", "Unknown state"
+		};
+
+		const char *cmd;
+		const char *master, *target;
+		unsigned long addr, tmp;
+		int dac;
+
+		master = master_st_desc[(cia->pci_err0 >> 16) & 0xF];
+		target = target_st_desc[(cia->pci_err0 >> 20) & 0xF];
+		cmd = pci_cmd_desc[(cia->pci_err0 >> 24) & 0xF];
+		dac = (cia->pci_err0 >> 28) & 1;
+
+		addr = cia->pci_err2;
+		if (dac) {
+			tmp = *(volatile int *)CIA_IOC_PCI_W_DAC & 0xFFUL;
+			addr |= tmp << 32;
+		}
+
+		printk(KERN_CRIT "CIA machine check: %s\n", msg);
+		printk(KERN_CRIT "  PCI command: %s\n", cmd);
+		printk(KERN_CRIT "  Master state: %s, Target state: %s\n",
+		       master, target);
+		printk(KERN_CRIT "  PCI address: %#010lx, DAC: %d\n",
+		       addr, dac);
+	} else {
+		printk(KERN_CRIT "CIA machine check: %s\n", msg);
+		printk(KERN_CRIT "  Unknown PCI error\n");
+		printk(KERN_CRIT "  PCI_ERR0 = %#08lx", cia->pci_err0);
+		printk(KERN_CRIT "  PCI_ERR1 = %#08lx", cia->pci_err1);
+		printk(KERN_CRIT "  PCI_ERR2 = %#08lx", cia->pci_err2);
+	}
+}
+
+static void
+cia_decode_mem_error(struct el_CIA_sysdata_mcheck *cia, const char *msg)
+{
+	unsigned long mem_port_addr;
+	unsigned long mem_port_mask;
+	const char *mem_port_cmd;
+	const char *seq_state;
+	const char *set_select;
+	unsigned long tmp;
+
+	/* If this is a DMA command, also decode the PCI bits.  */
+	if ((cia->mem_err1 >> 20) & 1)
+		cia_decode_pci_error(cia, msg);
+	else
+		printk(KERN_CRIT "CIA machine check: %s\n", msg);
+
+	mem_port_addr = cia->mem_err0 & 0xfffffff0;
+	mem_port_addr |= (cia->mem_err1 & 0x83UL) << 32;
+
+	mem_port_mask = (cia->mem_err1 >> 12) & 0xF;
+
+	tmp = (cia->mem_err1 >> 8) & 0xF;
+	tmp |= ((cia->mem_err1 >> 20) & 1) << 4;
+	if ((tmp & 0x1E) == 0x06)
+		mem_port_cmd = "WRITE BLOCK or WRITE BLOCK LOCK";
+	else if ((tmp & 0x1C) == 0x08)
+		mem_port_cmd = "READ MISS or READ MISS MODIFY";
+	else if (tmp == 0x1C)
+		mem_port_cmd = "BC VICTIM";
+	else if ((tmp & 0x1E) == 0x0E)
+		mem_port_cmd = "READ MISS MODIFY";
+	else if ((tmp & 0x1C) == 0x18)
+		mem_port_cmd = "DMA READ or DMA READ MODIFY";
+	else if ((tmp & 0x1E) == 0x12)
+		mem_port_cmd = "DMA WRITE";
+	else
+		mem_port_cmd = "Unknown";
+
+	tmp = (cia->mem_err1 >> 16) & 0xF;
+	switch (tmp) {
+	case 0x0:
+		seq_state = "Idle";
+		break;
+	case 0x1:
+		seq_state = "DMA READ or DMA WRITE";
+		break;
+	case 0x2: case 0x3:
+		seq_state = "READ MISS (or READ MISS MODIFY) with victim";
+		break;
+	case 0x4: case 0x5: case 0x6:
+		seq_state = "READ MISS (or READ MISS MODIFY) with no victim";
+		break;
+	case 0x8: case 0x9: case 0xB:
+		seq_state = "Refresh";
+		break;
+	case 0xC:
+		seq_state = "Idle, waiting for DMA pending read";
+		break;
+	case 0xE: case 0xF:
+		seq_state = "Idle, ras precharge";
+		break;
+	default:
+		seq_state = "Unknown";
+		break;
+	}
+
+	tmp = (cia->mem_err1 >> 24) & 0x1F;
+	switch (tmp) {
+	case 0x00: set_select = "Set 0 selected"; break;
+	case 0x01: set_select = "Set 1 selected"; break;
+	case 0x02: set_select = "Set 2 selected"; break;
+	case 0x03: set_select = "Set 3 selected"; break;
+	case 0x04: set_select = "Set 4 selected"; break;
+	case 0x05: set_select = "Set 5 selected"; break;
+	case 0x06: set_select = "Set 6 selected"; break;
+	case 0x07: set_select = "Set 7 selected"; break;
+	case 0x08: set_select = "Set 8 selected"; break;
+	case 0x09: set_select = "Set 9 selected"; break;
+	case 0x0A: set_select = "Set A selected"; break;
+	case 0x0B: set_select = "Set B selected"; break;
+	case 0x0C: set_select = "Set C selected"; break;
+	case 0x0D: set_select = "Set D selected"; break;
+	case 0x0E: set_select = "Set E selected"; break;
+	case 0x0F: set_select = "Set F selected"; break;
+	case 0x10: set_select = "No set selected"; break;
+	case 0x1F: set_select = "Refresh cycle"; break;
+	default:   set_select = "Unknown"; break;
+	}
+
+	printk(KERN_CRIT "  Memory port command: %s\n", mem_port_cmd);
+	printk(KERN_CRIT "  Memory port address: %#010lx, mask: %#lx\n",
+	       mem_port_addr, mem_port_mask);
+	printk(KERN_CRIT "  Memory sequencer state: %s\n", seq_state);
+	printk(KERN_CRIT "  Memory set: %s\n", set_select);
+}
+
+static void
+cia_decode_ecc_error(struct el_CIA_sysdata_mcheck *cia, const char *msg)
+{
+	long syn;
+	long i;
+	const char *fmt;
+
+	cia_decode_mem_error(cia, msg);
+
+	syn = cia->cia_syn & 0xff;
+	if (syn == (syn & -syn)) {
+		fmt = KERN_CRIT "  ECC syndrome %#x -- check bit %d\n";
+		i = ffs(syn) - 1;
+	} else {
+		static unsigned char const data_bit[64] = {
+			0xCE, 0xCB, 0xD3, 0xD5,
+			0xD6, 0xD9, 0xDA, 0xDC,
+			0x23, 0x25, 0x26, 0x29,
+			0x2A, 0x2C, 0x31, 0x34,
+			0x0E, 0x0B, 0x13, 0x15,
+			0x16, 0x19, 0x1A, 0x1C,
+			0xE3, 0xE5, 0xE6, 0xE9,
+			0xEA, 0xEC, 0xF1, 0xF4,
+			0x4F, 0x4A, 0x52, 0x54,
+			0x57, 0x58, 0x5B, 0x5D,
+			0xA2, 0xA4, 0xA7, 0xA8,
+			0xAB, 0xAD, 0xB0, 0xB5,
+			0x8F, 0x8A, 0x92, 0x94,
+			0x97, 0x98, 0x9B, 0x9D,
+			0x62, 0x64, 0x67, 0x68,
+			0x6B, 0x6D, 0x70, 0x75
+		};
+
+		for (i = 0; i < 64; ++i)
+			if (data_bit[i] == syn)
+				break;
+
+		if (i < 64)
+			fmt = KERN_CRIT "  ECC syndrome %#x -- data bit %d\n";
+		else
+			fmt = KERN_CRIT "  ECC syndrome %#x -- unknown bit\n";
+	}
+
+	printk (fmt, syn, i);
+}
+
+static void
+cia_decode_parity_error(struct el_CIA_sysdata_mcheck *cia)
+{
+	static const char * const cmd_desc[16] = {
+		"NOP", "LOCK", "FETCH", "FETCH_M", "MEMORY BARRIER",
+		"SET DIRTY", "WRITE BLOCK", "WRITE BLOCK LOCK",
+		"READ MISS0", "READ MISS1", "READ MISS MOD0",
+		"READ MISS MOD1", "BCACHE VICTIM", "Spare",
+		"READ MISS MOD STC0", "READ MISS MOD STC1"
+	};
+
+	unsigned long addr;
+	unsigned long mask;
+	const char *cmd;
+	int par;
+
+	addr = cia->cpu_err0 & 0xfffffff0;
+	addr |= (cia->cpu_err1 & 0x83UL) << 32;
+	cmd = cmd_desc[(cia->cpu_err1 >> 8) & 0xF];
+	mask = (cia->cpu_err1 >> 12) & 0xF;
+	par = (cia->cpu_err1 >> 21) & 1;
+
+	printk(KERN_CRIT "CIA machine check: System bus parity error\n");
+	printk(KERN_CRIT "  Command: %s, Parity bit: %d\n", cmd, par);
+	printk(KERN_CRIT "  Address: %#010lx, Mask: %#lx\n", addr, mask);
+}
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+
+static int
+cia_decode_mchk(unsigned long la_ptr)
+{
+	struct el_common *com;
+	struct el_CIA_sysdata_mcheck *cia;
+
+	com = (void *)la_ptr;
+	cia = (void *)(la_ptr + com->sys_offset);
+
+	if ((cia->cia_err & CIA_ERR_VALID) == 0)
+		return 0;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (!alpha_verbose_mcheck)
+		return 1;
+
+	switch (ffs(cia->cia_err & 0xfff) - 1) {
+	case 0: /* CIA_ERR_COR_ERR */
+		cia_decode_ecc_error(cia, "Corrected ECC error");
+		break;
+	case 1: /* CIA_ERR_UN_COR_ERR */
+		cia_decode_ecc_error(cia, "Uncorrected ECC error");
+		break;
+	case 2: /* CIA_ERR_CPU_PE */
+		cia_decode_parity_error(cia);
+		break;
+	case 3: /* CIA_ERR_MEM_NEM */
+		cia_decode_mem_error(cia, "Access to nonexistent memory");
+		break;
+	case 4: /* CIA_ERR_PCI_SERR */
+		cia_decode_pci_error(cia, "PCI bus system error");
+		break;
+	case 5: /* CIA_ERR_PERR */
+		cia_decode_pci_error(cia, "PCI data parity error");
+		break;
+	case 6: /* CIA_ERR_PCI_ADDR_PE */
+		cia_decode_pci_error(cia, "PCI address parity error");
+		break;
+	case 7: /* CIA_ERR_RCVD_MAS_ABT */
+		cia_decode_pci_error(cia, "PCI master abort");
+		break;
+	case 8: /* CIA_ERR_RCVD_TAR_ABT */
+		cia_decode_pci_error(cia, "PCI target abort");
+		break;
+	case 9: /* CIA_ERR_PA_PTE_INV */
+		cia_decode_pci_error(cia, "PCI invalid PTE");
+		break;
+	case 10: /* CIA_ERR_FROM_WRT_ERR */
+		cia_decode_mem_error(cia, "Write to flash ROM attempted");
+		break;
+	case 11: /* CIA_ERR_IOA_TIMEOUT */
+		cia_decode_pci_error(cia, "I/O timeout");
+		break;
+	}
+
+	if (cia->cia_err & CIA_ERR_LOST_CORR_ERR)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "Correctable ECC error\n");
+	if (cia->cia_err & CIA_ERR_LOST_UN_CORR_ERR)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "Uncorrectable ECC error\n");
+	if (cia->cia_err & CIA_ERR_LOST_CPU_PE)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "System bus parity error\n");
+	if (cia->cia_err & CIA_ERR_LOST_MEM_NEM)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "Access to nonexistent memory\n");
+	if (cia->cia_err & CIA_ERR_LOST_PERR)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "PCI data parity error\n");
+	if (cia->cia_err & CIA_ERR_LOST_PCI_ADDR_PE)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "PCI address parity error\n");
+	if (cia->cia_err & CIA_ERR_LOST_RCVD_MAS_ABT)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "PCI master abort\n");
+	if (cia->cia_err & CIA_ERR_LOST_RCVD_TAR_ABT)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "PCI target abort\n");
+	if (cia->cia_err & CIA_ERR_LOST_PA_PTE_INV)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "PCI invalid PTE\n");
+	if (cia->cia_err & CIA_ERR_LOST_FROM_WRT_ERR)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "Write to flash ROM attempted\n");
+	if (cia->cia_err & CIA_ERR_LOST_IOA_TIMEOUT)
+		printk(KERN_CRIT "CIA lost machine check: "
+		       "I/O timeout\n");
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+	return 1;
+}
+
+void
+cia_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	int expected;
+
+	/* Clear the error before any reporting.  */
+	mb();
+	mb();  /* magic */
+	draina();
+	cia_pci_clr_err();
+	wrmces(rdmces());	/* reset machine check pending flag.  */
+	mb();
+
+	expected = mcheck_expected(0);
+	if (!expected && vector == 0x660)
+		expected = cia_decode_mchk(la_ptr);
+	process_mcheck_info(vector, la_ptr, "CIA", expected);
+}
diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
new file mode 100644
index 0000000000..6b8ed12936
--- /dev/null
+++ b/arch/alpha/kernel/core_irongate.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_irongate.c
+ *
+ * Based on code written by David A. Rusling (david.rusling@reo.mts.dec.com).
+ *
+ *	Copyright (C) 1999 Alpha Processor, Inc.,
+ *		(David Daniel, Stig Telfer, Soohoon Lee)
+ *
+ * Code common to all IRONGATE core logic chips.
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_irongate.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/memblock.h>
+
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/*
+ * BIOS32-style PCI interface:
+ */
+
+#define DEBUG_CONFIG 0
+
+#if DEBUG_CONFIG
+# define DBG_CFG(args)	printk args
+#else
+# define DBG_CFG(args)
+#endif
+
+igcsr32 *IronECC;
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address accordingly.  It is therefore not safe
+ * to have concurrent invocations to configuration space access
+ * routines, but there really shouldn't be any need for this.
+ *
+ *	addr[31:24]		reserved
+ *	addr[23:16]		bus number (8 bits = 128 possible buses)
+ *	addr[15:11]		Device number (5 bits)
+ *	addr[10: 8]		function number
+ *	addr[ 7: 2]		register number
+ *
+ * For IRONGATE:
+ *    if (bus = addr[23:16]) == 0
+ *    then
+ *	  type 0 config cycle:
+ *	      addr_on_pci[31:11] = id selection for device = addr[15:11]
+ *	      addr_on_pci[10: 2] = addr[10: 2] ???
+ *	      addr_on_pci[ 1: 0] = 00
+ *    else
+ *	  type 1 config cycle (pass on with no decoding):
+ *	      addr_on_pci[31:24] = 0
+ *	      addr_on_pci[23: 2] = addr[23: 2]
+ *	      addr_on_pci[ 1: 0] = 01
+ *    fi
+ *
+ * Notes:
+ *	The function number selects which function of a multi-function device
+ *	(e.g., SCSI and Ethernet).
+ *
+ *	The register selects a DWORD (32 bit) register offset.	Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
+	     unsigned long *pci_addr, unsigned char *type1)
+{
+	unsigned long addr;
+	u8 bus = pbus->number;
+
+	DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, "
+		 "pci_addr=0x%p, type1=0x%p)\n",
+		 bus, device_fn, where, pci_addr, type1));
+
+	*type1 = (bus != 0);
+
+	addr = (bus << 16) | (device_fn << 8) | where;
+	addr |= IRONGATE_CONF;
+
+	*pci_addr = addr;
+	DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
+	return 0;
+}
+
+static int
+irongate_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 *value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		*value = __kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		*value = __kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*value = *(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+irongate_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+		      int size, u32 value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		__kernel_stb(value, *(vucp)addr);
+		mb();
+		__kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		__kernel_stw(value, *(vusp)addr);
+		mb();
+		__kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*(vuip)addr = value;
+		mb();
+		*(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops irongate_pci_ops =
+{
+	.read =		irongate_read_config,
+	.write =	irongate_write_config,
+};
+
+int
+irongate_pci_clr_err(void)
+{
+	unsigned int nmi_ctl=0;
+	unsigned int IRONGATE_jd;
+
+again:
+	IRONGATE_jd = IRONGATE0->stat_cmd;
+	printk("Iron stat_cmd %x\n", IRONGATE_jd);
+	IRONGATE0->stat_cmd = IRONGATE_jd; /* write again clears error bits */
+	mb();
+	IRONGATE_jd = IRONGATE0->stat_cmd;  /* re-read to force write */
+
+	IRONGATE_jd = *IronECC;
+	printk("Iron ECC %x\n", IRONGATE_jd);
+	*IronECC = IRONGATE_jd; /* write again clears error bits */
+	mb();
+	IRONGATE_jd = *IronECC;  /* re-read to force write */
+
+	/* Clear ALI NMI */
+        nmi_ctl = inb(0x61);
+        nmi_ctl |= 0x0c;
+        outb(nmi_ctl, 0x61);
+        nmi_ctl &= ~0x0c;
+        outb(nmi_ctl, 0x61);
+
+	IRONGATE_jd = *IronECC;
+	if (IRONGATE_jd & 0x300) goto again;
+
+	return 0;
+}
+
+#define IRONGATE_3GB 0xc0000000UL
+
+/* On Albacore (aka UP1500) with 4Gb of RAM we have to reserve some
+   memory for PCI. At this point we just reserve memory above 3Gb. Most
+   of this memory will be freed after PCI setup is done. */
+static void __init
+albacore_init_arch(void)
+{
+	unsigned long memtop = max_low_pfn << PAGE_SHIFT;
+	unsigned long pci_mem = (memtop + 0x1000000UL) & ~0xffffffUL;
+	struct percpu_struct *cpu;
+	int pal_rev, pal_var;
+
+	cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset);
+	pal_rev = cpu->pal_revision & 0xffff;
+	pal_var = (cpu->pal_revision >> 16) & 0xff;
+
+	/* Consoles earlier than A5.6-18 (OSF PALcode v1.62-2) set up
+	   the CPU incorrectly (leave speculative stores enabled),
+	   which causes memory corruption under certain conditions.
+	   Issue a warning for such consoles. */
+	if (alpha_using_srm &&
+	    (pal_rev < 0x13e ||	(pal_rev == 0x13e && pal_var < 2)))
+		printk(KERN_WARNING "WARNING! Upgrade to SRM A5.6-19 "
+				    "or later\n");
+
+	if (pci_mem > IRONGATE_3GB)
+		pci_mem = IRONGATE_3GB;
+	IRONGATE0->pci_mem = pci_mem;
+	alpha_mv.min_mem_address = pci_mem;
+	if (memtop > pci_mem) {
+#ifdef CONFIG_BLK_DEV_INITRD
+		extern unsigned long initrd_start, initrd_end;
+		extern void *move_initrd(unsigned long);
+
+		/* Move the initrd out of the way. */
+		if (initrd_end && __pa(initrd_end) > pci_mem) {
+			unsigned long size;
+
+			size = initrd_end - initrd_start;
+			memblock_free((void *)initrd_start, PAGE_ALIGN(size));
+			if (!move_initrd(pci_mem))
+				printk("irongate_init_arch: initrd too big "
+				       "(%ldK)\ndisabling initrd\n",
+				       size / 1024);
+		}
+#endif
+		memblock_reserve(pci_mem, memtop - pci_mem);
+		printk("irongate_init_arch: temporarily reserving "
+			"region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
+	}
+}
+
+static void __init
+irongate_setup_agp(void)
+{
+	/* Disable the GART window. AGPGART doesn't work due to yet
+	   unresolved memory coherency issues... */
+	IRONGATE0->agpva = IRONGATE0->agpva & ~0xf;
+	alpha_agpgart_size = 0;
+}
+
+void __init
+irongate_init_arch(void)
+{
+	struct pci_controller *hose;
+	int amd761 = (IRONGATE0->dev_vendor >> 16) > 0x7006;	/* Albacore? */
+
+	IronECC = amd761 ? &IRONGATE0->bacsr54_eccms761 : &IRONGATE0->dramms;
+
+	irongate_pci_clr_err();
+
+	if (amd761)
+		albacore_init_arch();
+
+	irongate_setup_agp();
+
+	/*
+	 * Create our single hose.
+	 */
+
+	pci_isa_hose = hose = alloc_pci_controller();
+	hose->io_space = &ioport_resource;
+	hose->mem_space = &iomem_resource;
+	hose->index = 0;
+
+	/* This is for userland consumption.  For some reason, the 40-bit
+	   PIO bias that we use in the kernel through KSEG didn't work for
+	   the page table based user mappings.  So make sure we get the
+	   43-bit PIO bias.  */
+	hose->sparse_mem_base = 0;
+	hose->sparse_io_base = 0;
+	hose->dense_mem_base
+	  = (IRONGATE_MEM & 0xffffffffffUL) | 0x80000000000UL;
+	hose->dense_io_base
+	  = (IRONGATE_IO & 0xffffffffffUL) | 0x80000000000UL;
+
+	hose->sg_isa = hose->sg_pci = NULL;
+	__direct_map_base = 0;
+	__direct_map_size = 0xffffffff;
+}
+
+/*
+ * IO map and AGP support
+ */
+#include <linux/vmalloc.h>
+#include <linux/agp_backend.h>
+#include <linux/agpgart.h>
+#include <linux/export.h>
+
+#define GET_PAGE_DIR_OFF(addr) (addr >> 22)
+#define GET_PAGE_DIR_IDX(addr) (GET_PAGE_DIR_OFF(addr))
+
+#define GET_GATT_OFF(addr) ((addr & 0x003ff000) >> 12) 
+#define GET_GATT(addr) (gatt_pages[GET_PAGE_DIR_IDX(addr)])
+
+void __iomem *
+irongate_ioremap(unsigned long addr, unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long vaddr;
+	unsigned long baddr, last;
+	u32 *mmio_regs, *gatt_pages, *cur_gatt, pte;
+	unsigned long gart_bus_addr;
+
+	if (!alpha_agpgart_size)
+		return (void __iomem *)(addr + IRONGATE_MEM);
+
+	gart_bus_addr = (unsigned long)IRONGATE0->bar0 &
+			PCI_BASE_ADDRESS_MEM_MASK; 
+
+	/* 
+	 * Check for within the AGP aperture...
+	 */
+	do {
+		/*
+		 * Check the AGP area
+		 */
+		if (addr >= gart_bus_addr && addr + size - 1 < 
+		    gart_bus_addr + alpha_agpgart_size)
+			break;
+
+		/*
+		 * Not found - assume legacy ioremap
+		 */
+		return (void __iomem *)(addr + IRONGATE_MEM);
+	} while(0);
+
+	mmio_regs = (u32 *)(((unsigned long)IRONGATE0->bar1 &
+			PCI_BASE_ADDRESS_MEM_MASK) + IRONGATE_MEM);
+
+	gatt_pages = (u32 *)(phys_to_virt(mmio_regs[1])); /* FIXME */
+
+	/*
+	 * Adjust the limits (mappings must be page aligned)
+	 */
+	if (addr & ~PAGE_MASK) {
+		printk("AGP ioremap failed... addr not page aligned (0x%lx)\n",
+		       addr);
+		return (void __iomem *)(addr + IRONGATE_MEM);
+	}
+	last = addr + size - 1;
+	size = PAGE_ALIGN(last) - addr;
+
+#if 0
+	printk("irongate_ioremap(0x%lx, 0x%lx)\n", addr, size);
+	printk("irongate_ioremap:  gart_bus_addr  0x%lx\n", gart_bus_addr);
+	printk("irongate_ioremap:  gart_aper_size 0x%lx\n", gart_aper_size);
+	printk("irongate_ioremap:  mmio_regs      %p\n", mmio_regs);
+	printk("irongate_ioremap:  gatt_pages     %p\n", gatt_pages);
+	
+	for(baddr = addr; baddr <= last; baddr += PAGE_SIZE)
+	{
+		cur_gatt = phys_to_virt(GET_GATT(baddr) & ~1);
+		pte = cur_gatt[GET_GATT_OFF(baddr)] & ~1;
+		printk("irongate_ioremap:  cur_gatt %p pte 0x%x\n",
+		       cur_gatt, pte);
+	}
+#endif
+
+	/*
+	 * Map it
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area) return NULL;
+
+	for(baddr = addr, vaddr = (unsigned long)area->addr; 
+	    baddr <= last; 
+	    baddr += PAGE_SIZE, vaddr += PAGE_SIZE)
+	{
+		cur_gatt = phys_to_virt(GET_GATT(baddr) & ~1);
+		pte = cur_gatt[GET_GATT_OFF(baddr)] & ~1;
+
+		if (__alpha_remap_area_pages(vaddr,
+					     pte, PAGE_SIZE, 0)) {
+			printk("AGP ioremap: FAILED to map...\n");
+			vfree(area->addr);
+			return NULL;
+		}
+	}
+
+	flush_tlb_all();
+
+	vaddr = (unsigned long)area->addr + (addr & ~PAGE_MASK);
+#if 0
+	printk("irongate_ioremap(0x%lx, 0x%lx) returning 0x%lx\n",
+	       addr, size, vaddr);
+#endif
+	return (void __iomem *)vaddr;
+}
+EXPORT_SYMBOL(irongate_ioremap);
+
+void
+irongate_iounmap(volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (((long)addr >> 41) == -2)
+		return;	/* kseg map, nothing to do */
+	if (addr)
+		return vfree((void *)(PAGE_MASK & addr)); 
+}
+EXPORT_SYMBOL(irongate_iounmap);
diff --git a/arch/alpha/kernel/core_lca.c b/arch/alpha/kernel/core_lca.c
new file mode 100644
index 0000000000..57e0750419
--- /dev/null
+++ b/arch/alpha/kernel/core_lca.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_lca.c
+ *
+ * Written by David Mosberger (davidm@cs.arizona.edu) with some code
+ * taken from Dave Rusling's (david.rusling@reo.mts.dec.com) 32-bit
+ * bios code.
+ *
+ * Code common to all LCA core logic chips.
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_lca.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+
+#include <asm/ptrace.h>
+#include <asm/irq_regs.h>
+#include <asm/smp.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+
+/*
+ * BIOS32-style PCI interface:
+ */
+
+/*
+ * Machine check reasons.  Defined according to PALcode sources
+ * (osf.h and platform.h).
+ */
+#define MCHK_K_TPERR		0x0080
+#define MCHK_K_TCPERR		0x0082
+#define MCHK_K_HERR		0x0084
+#define MCHK_K_ECC_C		0x0086
+#define MCHK_K_ECC_NC		0x0088
+#define MCHK_K_UNKNOWN		0x008A
+#define MCHK_K_CACKSOFT		0x008C
+#define MCHK_K_BUGCHECK		0x008E
+#define MCHK_K_OS_BUGCHECK	0x0090
+#define MCHK_K_DCPERR		0x0092
+#define MCHK_K_ICPERR		0x0094
+
+
+/*
+ * Platform-specific machine-check reasons:
+ */
+#define MCHK_K_SIO_SERR		0x204	/* all platforms so far */
+#define MCHK_K_SIO_IOCHK	0x206	/* all platforms so far */
+#define MCHK_K_DCSR		0x208	/* all but Noname */
+
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address and setup the LCA_IOC_CONF register
+ * accordingly.  It is therefore not safe to have concurrent
+ * invocations to configuration space access routines, but there
+ * really shouldn't be any need for this.
+ *
+ * Type 0:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | | | | | | | | | | | | | | |F|F|F|R|R|R|R|R|R|0|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:11	Device select bit.
+ * 	10:8	Function number
+ * 	 7:2	Register number
+ *
+ * Type 1:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:24	reserved
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., SCSI and Ethernet).
+ * 
+ *	The register selects a DWORD (32 bit) register offset.  Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
+	     unsigned long *pci_addr)
+{
+	unsigned long addr;
+	u8 bus = pbus->number;
+
+	if (bus == 0) {
+		int device = device_fn >> 3;
+		int func = device_fn & 0x7;
+
+		/* Type 0 configuration cycle.  */
+
+		if (device > 12) {
+			return -1;
+		}
+
+		*(vulp)LCA_IOC_CONF = 0;
+		addr = (1 << (11 + device)) | (func << 8) | where;
+	} else {
+		/* Type 1 configuration cycle.  */
+		*(vulp)LCA_IOC_CONF = 1;
+		addr = (bus << 16) | (device_fn << 8) | where;
+	}
+	*pci_addr = addr;
+	return 0;
+}
+
+static unsigned int
+conf_read(unsigned long addr)
+{
+	unsigned long flags, code, stat0;
+	unsigned int value;
+
+	local_irq_save(flags);
+
+	/* Reset status register to avoid losing errors.  */
+	stat0 = *(vulp)LCA_IOC_STAT0;
+	*(vulp)LCA_IOC_STAT0 = stat0;
+	mb();
+
+	/* Access configuration space.  */
+	value = *(vuip)addr;
+	draina();
+
+	stat0 = *(vulp)LCA_IOC_STAT0;
+	if (stat0 & LCA_IOC_STAT0_ERR) {
+		code = ((stat0 >> LCA_IOC_STAT0_CODE_SHIFT)
+			& LCA_IOC_STAT0_CODE_MASK);
+		if (code != 1) {
+			printk("lca.c:conf_read: got stat0=%lx\n", stat0);
+		}
+
+		/* Reset error status.  */
+		*(vulp)LCA_IOC_STAT0 = stat0;
+		mb();
+
+		/* Reset machine check.  */
+		wrmces(0x7);
+
+		value = 0xffffffff;
+	}
+	local_irq_restore(flags);
+	return value;
+}
+
+static void
+conf_write(unsigned long addr, unsigned int value)
+{
+	unsigned long flags, code, stat0;
+
+	local_irq_save(flags);	/* avoid getting hit by machine check */
+
+	/* Reset status register to avoid losing errors.  */
+	stat0 = *(vulp)LCA_IOC_STAT0;
+	*(vulp)LCA_IOC_STAT0 = stat0;
+	mb();
+
+	/* Access configuration space.  */
+	*(vuip)addr = value;
+	draina();
+
+	stat0 = *(vulp)LCA_IOC_STAT0;
+	if (stat0 & LCA_IOC_STAT0_ERR) {
+		code = ((stat0 >> LCA_IOC_STAT0_CODE_SHIFT)
+			& LCA_IOC_STAT0_CODE_MASK);
+		if (code != 1) {
+			printk("lca.c:conf_write: got stat0=%lx\n", stat0);
+		}
+
+		/* Reset error status.  */
+		*(vulp)LCA_IOC_STAT0 = stat0;
+		mb();
+
+		/* Reset machine check. */
+		wrmces(0x7);
+	}
+	local_irq_restore(flags);
+}
+
+static int
+lca_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		int size, u32 *value)
+{
+	unsigned long addr, pci_addr;
+	long mask;
+	int shift;
+
+	if (mk_conf_addr(bus, devfn, where, &pci_addr))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	shift = (where & 3) * 8;
+	mask = (size - 1) * 8;
+	addr = (pci_addr << 5) + mask + LCA_CONF;
+	*value = conf_read(addr) >> (shift);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int 
+lca_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size,
+		 u32 value)
+{
+	unsigned long addr, pci_addr;
+	long mask;
+
+	if (mk_conf_addr(bus, devfn, where, &pci_addr))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	mask = (size - 1) * 8;
+	addr = (pci_addr << 5) + mask + LCA_CONF;
+	conf_write(addr, value << ((where & 3) * 8));
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops lca_pci_ops = 
+{
+	.read =		lca_read_config,
+	.write =	lca_write_config,
+};
+
+void
+lca_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	wmb();
+	*(vulp)LCA_IOC_TBIA = 0;
+	mb();
+}
+
+void __init
+lca_init_arch(void)
+{
+	struct pci_controller *hose;
+
+	/*
+	 * Create our single hose.
+	 */
+
+	pci_isa_hose = hose = alloc_pci_controller();
+	hose->io_space = &ioport_resource;
+	hose->mem_space = &iomem_resource;
+	hose->index = 0;
+
+	hose->sparse_mem_base = LCA_SPARSE_MEM - IDENT_ADDR;
+	hose->dense_mem_base = LCA_DENSE_MEM - IDENT_ADDR;
+	hose->sparse_io_base = LCA_IO - IDENT_ADDR;
+	hose->dense_io_base = 0;
+
+	/*
+	 * Set up the PCI to main memory translation windows.
+	 *
+	 * Mimic the SRM settings for the direct-map window.
+	 *   Window 0 is scatter-gather 8MB at 8MB (for isa).
+	 *   Window 1 is direct access 1GB at 1GB.
+	 *
+	 * Note that we do not try to save any of the DMA window CSRs
+	 * before setting them, since we cannot read those CSRs on LCA.
+	 */
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
+	hose->sg_pci = NULL;
+	__direct_map_base = 0x40000000;
+	__direct_map_size = 0x40000000;
+
+	*(vulp)LCA_IOC_W_BASE0 = hose->sg_isa->dma_base | (3UL << 32);
+	*(vulp)LCA_IOC_W_MASK0 = (hose->sg_isa->size - 1) & 0xfff00000;
+	*(vulp)LCA_IOC_T_BASE0 = virt_to_phys(hose->sg_isa->ptes);
+
+	*(vulp)LCA_IOC_W_BASE1 = __direct_map_base | (2UL << 32);
+	*(vulp)LCA_IOC_W_MASK1 = (__direct_map_size - 1) & 0xfff00000;
+	*(vulp)LCA_IOC_T_BASE1 = 0;
+
+	*(vulp)LCA_IOC_TB_ENA = 0x80;
+
+	lca_pci_tbi(hose, 0, -1);
+
+	/*
+	 * Disable PCI parity for now.  The NCR53c810 chip has
+	 * troubles meeting the PCI spec which results in
+	 * data parity errors.
+	 */
+	*(vulp)LCA_IOC_PAR_DIS = 1UL<<5;
+
+	/*
+	 * Finally, set up for restoring the correct HAE if using SRM.
+	 * Again, since we cannot read many of the CSRs on the LCA,
+	 * one of which happens to be the HAE, we save the value that
+	 * the SRM will expect...
+	 */
+	if (alpha_using_srm)
+		srm_hae = 0x80000000UL;
+}
+
+/*
+ * Constants used during machine-check handling.  I suppose these
+ * could be moved into lca.h but I don't see much reason why anybody
+ * else would want to use them.
+ */
+
+#define ESR_EAV		(1UL<< 0)	/* error address valid */
+#define ESR_CEE		(1UL<< 1)	/* correctable error */
+#define ESR_UEE		(1UL<< 2)	/* uncorrectable error */
+#define ESR_WRE		(1UL<< 3)	/* write-error */
+#define ESR_SOR		(1UL<< 4)	/* error source */
+#define ESR_CTE		(1UL<< 7)	/* cache-tag error */
+#define ESR_MSE		(1UL<< 9)	/* multiple soft errors */
+#define ESR_MHE		(1UL<<10)	/* multiple hard errors */
+#define ESR_NXM		(1UL<<12)	/* non-existent memory */
+
+#define IOC_ERR		(  1<<4)	/* ioc logs an error */
+#define IOC_CMD_SHIFT	0
+#define IOC_CMD		(0xf<<IOC_CMD_SHIFT)
+#define IOC_CODE_SHIFT	8
+#define IOC_CODE	(0xf<<IOC_CODE_SHIFT)
+#define IOC_LOST	(  1<<5)
+#define IOC_P_NBR	((__u32) ~((1<<13) - 1))
+
+static void
+mem_error(unsigned long esr, unsigned long ear)
+{
+	printk("    %s %s error to %s occurred at address %x\n",
+	       ((esr & ESR_CEE) ? "Correctable" :
+		(esr & ESR_UEE) ? "Uncorrectable" : "A"),
+	       (esr & ESR_WRE) ? "write" : "read",
+	       (esr & ESR_SOR) ? "memory" : "b-cache",
+	       (unsigned) (ear & 0x1ffffff8));
+	if (esr & ESR_CTE) {
+		printk("    A b-cache tag parity error was detected.\n");
+	}
+	if (esr & ESR_MSE) {
+		printk("    Several other correctable errors occurred.\n");
+	}
+	if (esr & ESR_MHE) {
+		printk("    Several other uncorrectable errors occurred.\n");
+	}
+	if (esr & ESR_NXM) {
+		printk("    Attempted to access non-existent memory.\n");
+	}
+}
+
+static void
+ioc_error(__u32 stat0, __u32 stat1)
+{
+	static const char * const pci_cmd[] = {
+		"Interrupt Acknowledge", "Special", "I/O Read", "I/O Write",
+		"Rsvd 1", "Rsvd 2", "Memory Read", "Memory Write", "Rsvd3",
+		"Rsvd4", "Configuration Read", "Configuration Write",
+		"Memory Read Multiple", "Dual Address", "Memory Read Line",
+		"Memory Write and Invalidate"
+	};
+	static const char * const err_name[] = {
+		"exceeded retry limit", "no device", "bad data parity",
+		"target abort", "bad address parity", "page table read error",
+		"invalid page", "data error"
+	};
+	unsigned code = (stat0 & IOC_CODE) >> IOC_CODE_SHIFT;
+	unsigned cmd  = (stat0 & IOC_CMD)  >> IOC_CMD_SHIFT;
+
+	printk("    %s initiated PCI %s cycle to address %x"
+	       " failed due to %s.\n",
+	       code > 3 ? "PCI" : "CPU", pci_cmd[cmd], stat1, err_name[code]);
+
+	if (code == 5 || code == 6) {
+		printk("    (Error occurred at PCI memory address %x.)\n",
+		       (stat0 & ~IOC_P_NBR));
+	}
+	if (stat0 & IOC_LOST) {
+		printk("    Other PCI errors occurred simultaneously.\n");
+	}
+}
+
+void
+lca_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	const char * reason;
+	union el_lca el;
+
+	el.c = (struct el_common *) la_ptr;
+
+	wrmces(rdmces());	/* reset machine check pending flag */
+
+	printk(KERN_CRIT "LCA machine check: vector=%#lx pc=%#lx code=%#x\n",
+	       vector, get_irq_regs()->pc, (unsigned int) el.c->code);
+
+	/*
+	 * The first quadword after the common header always seems to
+	 * be the machine check reason---don't know why this isn't
+	 * part of the common header instead.  In the case of a long
+	 * logout frame, the upper 32 bits is the machine check
+	 * revision level, which we ignore for now.
+	 */
+	switch ((unsigned int) el.c->code) {
+	case MCHK_K_TPERR:	reason = "tag parity error"; break;
+	case MCHK_K_TCPERR:	reason = "tag control parity error"; break;
+	case MCHK_K_HERR:	reason = "access to non-existent memory"; break;
+	case MCHK_K_ECC_C:	reason = "correctable ECC error"; break;
+	case MCHK_K_ECC_NC:	reason = "non-correctable ECC error"; break;
+	case MCHK_K_CACKSOFT:	reason = "MCHK_K_CACKSOFT"; break;
+	case MCHK_K_BUGCHECK:	reason = "illegal exception in PAL mode"; break;
+	case MCHK_K_OS_BUGCHECK: reason = "callsys in kernel mode"; break;
+	case MCHK_K_DCPERR:	reason = "d-cache parity error"; break;
+	case MCHK_K_ICPERR:	reason = "i-cache parity error"; break;
+	case MCHK_K_SIO_SERR:	reason = "SIO SERR occurred on PCI bus"; break;
+	case MCHK_K_SIO_IOCHK:	reason = "SIO IOCHK occurred on ISA bus"; break;
+	case MCHK_K_DCSR:	reason = "MCHK_K_DCSR"; break;
+	case MCHK_K_UNKNOWN:
+	default:		reason = "unknown"; break;
+	}
+
+	switch (el.c->size) {
+	case sizeof(struct el_lca_mcheck_short):
+		printk(KERN_CRIT
+		       "  Reason: %s (short frame%s, dc_stat=%#lx):\n",
+		       reason, el.c->retry ? ", retryable" : "",
+		       el.s->dc_stat);
+		if (el.s->esr & ESR_EAV) {
+			mem_error(el.s->esr, el.s->ear);
+		}
+		if (el.s->ioc_stat0 & IOC_ERR) {
+			ioc_error(el.s->ioc_stat0, el.s->ioc_stat1);
+		}
+		break;
+
+	case sizeof(struct el_lca_mcheck_long):
+		printk(KERN_CRIT "  Reason: %s (long frame%s):\n",
+		       reason, el.c->retry ? ", retryable" : "");
+		printk(KERN_CRIT
+		       "    reason: %#lx  exc_addr: %#lx  dc_stat: %#lx\n", 
+		       el.l->pt[0], el.l->exc_addr, el.l->dc_stat);
+		printk(KERN_CRIT "    car: %#lx\n", el.l->car);
+		if (el.l->esr & ESR_EAV) {
+			mem_error(el.l->esr, el.l->ear);
+		}
+		if (el.l->ioc_stat0 & IOC_ERR) {
+			ioc_error(el.l->ioc_stat0, el.l->ioc_stat1);
+		}
+		break;
+
+	default:
+		printk(KERN_CRIT "  Unknown errorlog size %d\n", el.c->size);
+	}
+
+	/* Dump the logout area to give all info.  */
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (alpha_verbose_mcheck > 1) {
+		unsigned long * ptr = (unsigned long *) la_ptr;
+		long i;
+		for (i = 0; i < el.c->size / sizeof(long); i += 2) {
+			printk(KERN_CRIT " +%8lx %016lx %016lx\n",
+			       i*sizeof(long), ptr[i], ptr[i+1]);
+		}
+	}
+#endif /* CONFIG_VERBOSE_MCHECK */
+}
+
+/*
+ * The following routines are needed to support the SPEED changing
+ * necessary to successfully manage the thermal problem on the AlphaBook1.
+ */
+
+void
+lca_clock_print(void)
+{
+        long    pmr_reg;
+
+        pmr_reg = LCA_READ_PMR;
+
+        printk("Status of clock control:\n");
+        printk("\tPrimary clock divisor\t0x%lx\n", LCA_GET_PRIMARY(pmr_reg));
+        printk("\tOverride clock divisor\t0x%lx\n", LCA_GET_OVERRIDE(pmr_reg));
+        printk("\tInterrupt override is %s\n",
+	       (pmr_reg & LCA_PMR_INTO) ? "on" : "off"); 
+        printk("\tDMA override is %s\n",
+	       (pmr_reg & LCA_PMR_DMAO) ? "on" : "off"); 
+
+}
+
+int
+lca_get_clock(void)
+{
+        long    pmr_reg;
+
+        pmr_reg = LCA_READ_PMR;
+        return(LCA_GET_PRIMARY(pmr_reg));
+
+}
+
+void
+lca_clock_fiddle(int divisor)
+{
+        long    pmr_reg;
+
+        pmr_reg = LCA_READ_PMR;
+        LCA_SET_PRIMARY_CLOCK(pmr_reg, divisor);
+	/* lca_norm_clock = divisor; */
+        LCA_WRITE_PMR(pmr_reg);
+        mb();
+}
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
new file mode 100644
index 0000000000..1efca79ac8
--- /dev/null
+++ b/arch/alpha/kernel/core_marvel.c
@@ -0,0 +1,1099 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_marvel.c
+ *
+ * Code common to all Marvel based systems.
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_marvel.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/mc146818rtc.h>
+#include <linux/rtc.h>
+#include <linux/module.h>
+#include <linux/memblock.h>
+
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+#include <asm/gct.h>
+#include <asm/tlbflush.h>
+#include <asm/vga.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+
+/*
+ * Debug helpers
+ */
+#define DEBUG_CONFIG 0
+
+#if DEBUG_CONFIG
+# define DBG_CFG(args) printk args
+#else
+# define DBG_CFG(args)
+#endif
+
+
+/*
+ * Private data
+ */
+static struct io7 *io7_head = NULL;
+
+
+/*
+ * Helper functions
+ */
+static unsigned long __attribute__ ((unused))
+read_ev7_csr(int pe, unsigned long offset)
+{
+	ev7_csr *ev7csr = EV7_CSR_KERN(pe, offset);
+	unsigned long q;
+
+	mb();
+	q = ev7csr->csr;
+	mb();
+
+	return q;
+}
+
+static void __attribute__ ((unused))
+write_ev7_csr(int pe, unsigned long offset, unsigned long q)
+{
+	ev7_csr *ev7csr = EV7_CSR_KERN(pe, offset);
+
+	mb();
+	ev7csr->csr = q;
+	mb();
+}
+
+static char * __init
+mk_resource_name(int pe, int port, char *str)
+{
+	char tmp[80];
+	char *name;
+	
+	sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port);
+	name = memblock_alloc(strlen(tmp) + 1, SMP_CACHE_BYTES);
+	if (!name)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      strlen(tmp) + 1);
+	strcpy(name, tmp);
+
+	return name;
+}
+
+inline struct io7 *
+marvel_next_io7(struct io7 *prev)
+{
+	return (prev ? prev->next : io7_head);
+}
+
+struct io7 *
+marvel_find_io7(int pe)
+{
+	struct io7 *io7;
+
+	for (io7 = io7_head; io7 && io7->pe != pe; io7 = io7->next)
+		continue;
+
+	return io7;
+}
+
+static struct io7 * __init
+alloc_io7(unsigned int pe)
+{
+	struct io7 *io7;
+	struct io7 *insp;
+	int h;
+
+	if (marvel_find_io7(pe)) {
+		printk(KERN_WARNING "IO7 at PE %d already allocated!\n", pe);
+		return NULL;
+	}
+
+	io7 = memblock_alloc(sizeof(*io7), SMP_CACHE_BYTES);
+	if (!io7)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(*io7));
+	io7->pe = pe;
+	raw_spin_lock_init(&io7->irq_lock);
+
+	for (h = 0; h < 4; h++) {
+		io7->ports[h].io7 = io7;
+		io7->ports[h].port = h;
+		io7->ports[h].enabled = 0; /* default to disabled */
+	}
+
+	/*
+	 * Insert in pe sorted order.
+	 */
+	if (NULL == io7_head)			/* empty list */
+		io7_head = io7;	
+	else if (io7_head->pe > io7->pe) {	/* insert at head */
+		io7->next = io7_head;
+		io7_head = io7;
+	} else {				/* insert at position */
+		for (insp = io7_head; insp; insp = insp->next) {
+			if (insp->pe == io7->pe) {
+				printk(KERN_ERR "Too many IO7s at PE %d\n", 
+				       io7->pe);
+				return NULL;
+			}
+
+			if (NULL == insp->next || 
+			    insp->next->pe > io7->pe) { /* insert here */
+				io7->next = insp->next;
+				insp->next = io7;
+				break;
+			}
+		}
+
+		if (NULL == insp) { /* couldn't insert ?!? */
+			printk(KERN_WARNING "Failed to insert IO7 at PE %d "
+			       " - adding at head of list\n", io7->pe);
+			io7->next = io7_head;
+			io7_head = io7;
+		}
+	}
+	
+	return io7;
+}
+
+void
+io7_clear_errors(struct io7 *io7)
+{
+	io7_port7_csrs *p7csrs;
+	io7_ioport_csrs *csrs;
+	int port;
+
+
+	/*
+	 * First the IO ports.
+	 */
+	for (port = 0; port < 4; port++) {
+		csrs = IO7_CSRS_KERN(io7->pe, port);
+
+		csrs->POx_ERR_SUM.csr = -1UL;
+		csrs->POx_TLB_ERR.csr = -1UL;
+		csrs->POx_SPL_COMPLT.csr = -1UL;
+		csrs->POx_TRANS_SUM.csr = -1UL;
+	}
+
+	/*
+	 * Then the common ones.
+	 */
+	p7csrs = IO7_PORT7_CSRS_KERN(io7->pe);
+
+	p7csrs->PO7_ERROR_SUM.csr = -1UL;
+	p7csrs->PO7_UNCRR_SYM.csr = -1UL;
+	p7csrs->PO7_CRRCT_SYM.csr = -1UL;
+}
+
+
+/*
+ * IO7 PCI, PCI/X, AGP configuration.
+ */
+static void __init
+io7_init_hose(struct io7 *io7, int port)
+{
+	static int hose_index = 0;
+
+	struct pci_controller *hose = alloc_pci_controller();
+	struct io7_port *io7_port = &io7->ports[port];
+	io7_ioport_csrs *csrs = IO7_CSRS_KERN(io7->pe, port);
+	int i;
+
+	hose->index = hose_index++;	/* arbitrary */
+	
+	/*
+	 * We don't have an isa or legacy hose, but glibc expects to be
+	 * able to use the bus == 0 / dev == 0 form of the iobase syscall
+	 * to determine information about the i/o system. Since XFree86 
+	 * relies on glibc's determination to tell whether or not to use
+	 * sparse access, we need to point the pci_isa_hose at a real hose
+	 * so at least that determination is correct.
+	 */
+	if (hose->index == 0)
+		pci_isa_hose = hose;
+
+	io7_port->csrs = csrs;
+	io7_port->hose = hose;
+	hose->sysdata = io7_port;
+
+	hose->io_space = alloc_resource();
+	hose->mem_space = alloc_resource();
+
+	/*
+	 * Base addresses for userland consumption. Since these are going
+	 * to be mapped, they are pure physical addresses.
+	 */
+	hose->sparse_mem_base = hose->sparse_io_base = 0;
+	hose->dense_mem_base = IO7_MEM_PHYS(io7->pe, port);
+	hose->dense_io_base = IO7_IO_PHYS(io7->pe, port);
+
+	/*
+	 * Base addresses and resource ranges for kernel consumption.
+	 */
+	hose->config_space_base = (unsigned long)IO7_CONF_KERN(io7->pe, port);
+
+	hose->io_space->start = (unsigned long)IO7_IO_KERN(io7->pe, port);
+	hose->io_space->end = hose->io_space->start + IO7_IO_SPACE - 1;
+	hose->io_space->name = mk_resource_name(io7->pe, port, "IO");
+	hose->io_space->flags = IORESOURCE_IO;
+
+	hose->mem_space->start = (unsigned long)IO7_MEM_KERN(io7->pe, port);
+	hose->mem_space->end = hose->mem_space->start + IO7_MEM_SPACE - 1;
+	hose->mem_space->name = mk_resource_name(io7->pe, port, "MEM");
+	hose->mem_space->flags = IORESOURCE_MEM;
+
+	if (request_resource(&ioport_resource, hose->io_space) < 0)
+		printk(KERN_ERR "Failed to request IO on hose %d\n", 
+		       hose->index);
+	if (request_resource(&iomem_resource, hose->mem_space) < 0)
+		printk(KERN_ERR "Failed to request MEM on hose %d\n", 
+		       hose->index);
+
+	/*
+	 * Save the existing DMA window settings for later restoration.
+	 */
+	for (i = 0; i < 4; i++) {
+		io7_port->saved_wbase[i] = csrs->POx_WBASE[i].csr;
+		io7_port->saved_wmask[i] = csrs->POx_WMASK[i].csr;
+		io7_port->saved_tbase[i] = csrs->POx_TBASE[i].csr;
+	}
+
+	/*
+	 * Set up the PCI to main memory translation windows.
+	 *
+	 * Window 0 is scatter-gather 8MB at 8MB
+	 * Window 1 is direct access 1GB at 2GB
+	 * Window 2 is scatter-gather (up-to) 1GB at 3GB
+	 * Window 3 is disabled
+	 */
+
+	/*
+	 * TBIA before modifying windows.
+	 */
+	marvel_pci_tbi(hose, 0, -1);
+
+	/*
+	 * Set up window 0 for scatter-gather 8MB at 8MB.
+	 */
+	hose->sg_isa = iommu_arena_new_node(0, hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa->align_entry = 8;	/* cache line boundary */
+	csrs->POx_WBASE[0].csr = 
+		hose->sg_isa->dma_base | wbase_m_ena | wbase_m_sg;
+	csrs->POx_WMASK[0].csr = (hose->sg_isa->size - 1) & wbase_m_addr;
+	csrs->POx_TBASE[0].csr = virt_to_phys(hose->sg_isa->ptes);
+
+	/*
+	 * Set up window 1 for direct-mapped 1GB at 2GB.
+	 */
+	csrs->POx_WBASE[1].csr = __direct_map_base | wbase_m_ena;
+	csrs->POx_WMASK[1].csr = (__direct_map_size - 1) & wbase_m_addr;
+	csrs->POx_TBASE[1].csr = 0;
+
+	/*
+	 * Set up window 2 for scatter-gather (up-to) 1GB at 3GB.
+	 */
+	hose->sg_pci = iommu_arena_new_node(0, hose, 0xc0000000, 0x40000000, 0);
+	hose->sg_pci->align_entry = 8;	/* cache line boundary */
+	csrs->POx_WBASE[2].csr = 
+		hose->sg_pci->dma_base | wbase_m_ena | wbase_m_sg;
+	csrs->POx_WMASK[2].csr = (hose->sg_pci->size - 1) & wbase_m_addr;
+	csrs->POx_TBASE[2].csr = virt_to_phys(hose->sg_pci->ptes);
+
+	/*
+	 * Disable window 3.
+	 */
+	csrs->POx_WBASE[3].csr = 0;
+
+	/*
+	 * Make sure that the AGP Monster Window is disabled.
+	 */
+	csrs->POx_CTRL.csr &= ~(1UL << 61);
+
+#if 1
+	printk("FIXME: disabling master aborts\n");
+	csrs->POx_MSK_HEI.csr &= ~(3UL << 14);
+#endif
+	/*
+	 * TBIA after modifying windows.
+	 */
+	marvel_pci_tbi(hose, 0, -1);
+}
+
+static void __init
+marvel_init_io7(struct io7 *io7)
+{
+	int i;
+
+	printk("Initializing IO7 at PID %d\n", io7->pe);
+
+	/*
+	 * Get the Port 7 CSR pointer.
+	 */
+	io7->csrs = IO7_PORT7_CSRS_KERN(io7->pe);
+
+	/*
+	 * Init this IO7's hoses.
+	 */
+	for (i = 0; i < IO7_NUM_PORTS; i++) {
+		io7_ioport_csrs *csrs = IO7_CSRS_KERN(io7->pe, i);
+		if (csrs->POx_CACHE_CTL.csr == 8) {
+			io7->ports[i].enabled = 1;
+			io7_init_hose(io7, i);
+		}
+	}
+}
+
+void __init
+marvel_io7_present(gct6_node *node)
+{
+	int pe;
+
+	if (node->type != GCT_TYPE_HOSE ||
+	    node->subtype != GCT_SUBTYPE_IO_PORT_MODULE) 
+		return;
+
+	pe = (node->id >> 8) & 0xff;
+	printk("Found an IO7 at PID %d\n", pe);
+
+	alloc_io7(pe);
+}
+
+static void __init
+marvel_find_console_vga_hose(void)
+{
+#ifdef CONFIG_VGA_HOSE
+	u64 *pu64 = (u64 *)((u64)hwrpb + hwrpb->ctbt_offset);
+
+	if (pu64[7] == 3) {	/* TERM_TYPE == graphics */
+		struct pci_controller *hose = NULL;
+		int h = (pu64[30] >> 24) & 0xff; /* TERM_OUT_LOC, hose # */
+		struct io7 *io7;
+		int pid, port;
+
+		/* FIXME - encoding is going to have to change for Marvel
+		 *         since hose will be able to overflow a byte...
+		 *         need to fix this decode when the console 
+		 *         changes its encoding
+		 */
+		printk("console graphics is on hose %d (console)\n", h);
+
+		/*
+		 * The console's hose numbering is:
+		 *
+		 *	hose<n:2>: PID
+		 *	hose<1:0>: PORT
+		 *
+		 * We need to find the hose at that pid and port
+		 */
+		pid = h >> 2;
+		port = h & 3;
+		if ((io7 = marvel_find_io7(pid)))
+			hose = io7->ports[port].hose;
+
+		if (hose) {
+			printk("Console graphics on hose %d\n", hose->index);
+			pci_vga_hose = hose;
+		}
+	}
+#endif
+}
+
+gct6_search_struct gct_wanted_node_list[] __initdata = {
+	{ GCT_TYPE_HOSE, GCT_SUBTYPE_IO_PORT_MODULE, marvel_io7_present },
+	{ 0, 0, NULL }
+};
+
+/*
+ * In case the GCT is not complete, let the user specify PIDs with IO7s
+ * at boot time. Syntax is 'io7=a,b,c,...,n' where a-n are the PIDs (decimal)
+ * where IO7s are connected
+ */
+static int __init
+marvel_specify_io7(char *str)
+{
+	unsigned long pid;
+	struct io7 *io7;
+	char *pchar;
+
+	do {
+		pid = simple_strtoul(str, &pchar, 0);
+		if (pchar != str) {
+			printk("User-specified IO7 at PID %lu\n", pid);
+			io7 = alloc_io7(pid);
+			if (io7) marvel_init_io7(io7);
+		}
+
+		if (pchar == str) pchar++;
+		str = pchar;
+	} while(*str);
+
+	return 1;
+}
+__setup("io7=", marvel_specify_io7);
+
+void __init
+marvel_init_arch(void)
+{
+	struct io7 *io7;
+
+	/* With multiple PCI busses, we play with I/O as physical addrs.  */
+	ioport_resource.end = ~0UL;
+
+	/* PCI DMA Direct Mapping is 1GB at 2GB.  */
+	__direct_map_base = 0x80000000;
+	__direct_map_size = 0x40000000;
+
+	/* Parse the config tree.  */
+	gct6_find_nodes(GCT_NODE_PTR(0), gct_wanted_node_list);
+
+	/* Init the io7s.  */
+	for (io7 = NULL; NULL != (io7 = marvel_next_io7(io7)); ) 
+		marvel_init_io7(io7);
+
+	/* Check for graphic console location (if any).  */
+	marvel_find_console_vga_hose();
+}
+
+void
+marvel_kill_arch(int mode)
+{
+}
+
+
+/*
+ * PCI Configuration Space access functions
+ *
+ * Configuration space addresses have the following format:
+ *
+ * 	|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ * 	|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 	|B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|R|R|
+ * 	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	 n:24	reserved for hose base
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	IO7 determines whether to use a type 0 or type 1 config cycle
+ *	based on the bus number. Therefore the bus number must be set 
+ *	to 0 for the root bus on any hose.
+ *	
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., SCSI and Ethernet).
+ * 
+ */
+
+static inline unsigned long
+build_conf_addr(struct pci_controller *hose, u8 bus, 
+		unsigned int devfn, int where)
+{
+	return (hose->config_space_base | (bus << 16) | (devfn << 8) | where);
+}
+
+static unsigned long
+mk_conf_addr(struct pci_bus *pbus, unsigned int devfn, int where)
+{
+	struct pci_controller *hose = pbus->sysdata;
+	struct io7_port *io7_port;
+	unsigned long addr = 0;
+	u8 bus = pbus->number;
+
+	if (!hose)
+		return addr;
+
+	/* Check for enabled.  */
+	io7_port = hose->sysdata;
+	if (!io7_port->enabled)
+		return addr;
+
+	if (!pbus->parent) { /* No parent means peer PCI bus. */
+		/* Don't support idsel > 20 on primary bus.  */
+		if (devfn >= PCI_DEVFN(21, 0))
+			return addr;
+		bus = 0;
+	}
+
+	addr = build_conf_addr(hose, bus, devfn, where);
+
+	DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
+	return addr;
+}
+
+static int
+marvel_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		   int size, u32 *value)
+{
+	unsigned long addr;
+	
+	if (0 == (addr = mk_conf_addr(bus, devfn, where)))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch(size) {
+	case 1:	
+		*value = __kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:	
+		*value = __kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:	
+		*value = *(vuip)addr;
+		break;
+	default:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+marvel_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 value)
+{
+	unsigned long addr;
+	
+	if (0 == (addr = mk_conf_addr(bus, devfn, where)))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		__kernel_stb(value, *(vucp)addr);
+		mb();
+		__kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		__kernel_stw(value, *(vusp)addr);
+		mb();
+		__kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*(vuip)addr = value;
+		mb();
+		*(vuip)addr;
+		break;
+	default:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops marvel_pci_ops =
+{
+	.read =		marvel_read_config,
+	.write = 	marvel_write_config,
+};
+
+
+/*
+ * Other PCI helper functions.
+ */
+void
+marvel_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	io7_ioport_csrs *csrs = ((struct io7_port *)hose->sysdata)->csrs;
+
+	wmb();
+	csrs->POx_SG_TBIA.csr = 0;
+	mb();
+	csrs->POx_SG_TBIA.csr;
+}
+
+
+
+/*
+ * RTC Support
+ */
+struct marvel_rtc_access_info {
+	unsigned long function;
+	unsigned long index;
+	unsigned long data;
+};
+
+static void
+__marvel_access_rtc(void *info)
+{
+	struct marvel_rtc_access_info *rtc_access = info;
+
+	register unsigned long __r0 __asm__("$0");
+	register unsigned long __r16 __asm__("$16") = rtc_access->function;
+	register unsigned long __r17 __asm__("$17") = rtc_access->index;
+	register unsigned long __r18 __asm__("$18") = rtc_access->data;
+	
+	__asm__ __volatile__(
+		"call_pal %4 # cserve rtc"
+		: "=r"(__r16), "=r"(__r17), "=r"(__r18), "=r"(__r0)
+		: "i"(PAL_cserve), "0"(__r16), "1"(__r17), "2"(__r18)
+		: "$1", "$22", "$23", "$24", "$25");
+
+	rtc_access->data = __r0;
+}
+
+static u8
+__marvel_rtc_io(u8 b, unsigned long addr, int write)
+{
+	static u8 index = 0;
+
+	struct marvel_rtc_access_info rtc_access;
+	u8 ret = 0;
+
+	switch(addr) {
+	case 0x70:					/* RTC_PORT(0) */
+		if (write) index = b;
+		ret = index;
+		break;
+
+	case 0x71:					/* RTC_PORT(1) */
+		rtc_access.index = index;
+		rtc_access.data = bcd2bin(b);
+		rtc_access.function = 0x48 + !write;	/* GET/PUT_TOY */
+
+		__marvel_access_rtc(&rtc_access);
+
+		ret = bin2bcd(rtc_access.data);
+		break;
+
+	default:
+		printk(KERN_WARNING "Illegal RTC port %lx\n", addr);
+		break;
+	}
+
+	return ret;
+}
+
+
+/*
+ * IO map support.
+ */
+void __iomem *
+marvel_ioremap(unsigned long addr, unsigned long size)
+{
+	struct pci_controller *hose;
+	unsigned long baddr, last;
+	struct vm_struct *area;
+	unsigned long vaddr;
+	unsigned long *ptes;
+	unsigned long pfn;
+
+	/*
+	 * Adjust the address.
+	 */ 
+	FIXUP_MEMADDR_VGA(addr);
+
+	/*
+	 * Find the hose.
+	 */
+	for (hose = hose_head; hose; hose = hose->next) {
+		if ((addr >> 32) == (hose->mem_space->start >> 32))
+			break; 
+	}
+	if (!hose)
+		return NULL;
+
+	/*
+	 * We have the hose - calculate the bus limits.
+	 */
+	baddr = addr - hose->mem_space->start;
+	last = baddr + size - 1;
+
+	/*
+	 * Is it direct-mapped?
+	 */
+	if ((baddr >= __direct_map_base) && 
+	    ((baddr + size - 1) < __direct_map_base + __direct_map_size)) {
+		addr = IDENT_ADDR | (baddr - __direct_map_base);
+		return (void __iomem *) addr;
+	}
+
+	/* 
+	 * Check the scatter-gather arena.
+	 */
+	if (hose->sg_pci &&
+	    baddr >= (unsigned long)hose->sg_pci->dma_base &&
+	    last < (unsigned long)hose->sg_pci->dma_base + hose->sg_pci->size) {
+
+		/*
+		 * Adjust the limits (mappings must be page aligned)
+		 */
+		baddr -= hose->sg_pci->dma_base;
+		last -= hose->sg_pci->dma_base;
+		baddr &= PAGE_MASK;
+		size = PAGE_ALIGN(last) - baddr;
+
+		/*
+		 * Map it.
+		 */
+		area = get_vm_area(size, VM_IOREMAP);
+		if (!area)
+			return NULL;
+
+		ptes = hose->sg_pci->ptes;
+		for (vaddr = (unsigned long)area->addr; 
+		    baddr <= last; 
+		    baddr += PAGE_SIZE, vaddr += PAGE_SIZE) {
+			pfn = ptes[baddr >> PAGE_SHIFT];
+			if (!(pfn & 1)) {
+				printk("ioremap failed... pte not valid...\n");
+				vfree(area->addr);
+				return NULL;
+			}
+			pfn >>= 1;	/* make it a true pfn */
+			
+			if (__alpha_remap_area_pages(vaddr,
+						     pfn << PAGE_SHIFT, 
+						     PAGE_SIZE, 0)) {
+				printk("FAILED to map...\n");
+				vfree(area->addr);
+				return NULL;
+			}
+		}
+
+		flush_tlb_all();
+
+		vaddr = (unsigned long)area->addr + (addr & ~PAGE_MASK);
+
+		return (void __iomem *) vaddr;
+	}
+
+	/* Assume it was already a reasonable address */
+	vaddr = baddr + hose->mem_space->start;
+	return (void __iomem *) vaddr;
+}
+
+void
+marvel_iounmap(volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (addr >= VMALLOC_START)
+		vfree((void *)(PAGE_MASK & addr)); 
+}
+
+int
+marvel_is_mmio(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+
+	if (addr >= VMALLOC_START)
+		return 1;
+	else
+		return (addr & 0xFF000000UL) == 0;
+}
+
+#define __marvel_is_port_kbd(a)	(((a) == 0x60) || ((a) == 0x64))
+#define __marvel_is_port_rtc(a)	(((a) == 0x70) || ((a) == 0x71))
+
+void __iomem *marvel_ioportmap (unsigned long addr)
+{
+	FIXUP_IOADDR_VGA(addr);
+	return (void __iomem *)addr;
+}
+
+unsigned int
+marvel_ioread8(const void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (__marvel_is_port_kbd(addr))
+		return 0;
+	else if (__marvel_is_port_rtc(addr))
+		return __marvel_rtc_io(0, addr, 0);
+	else if (marvel_is_ioaddr(addr))
+		return __kernel_ldbu(*(vucp)addr);
+	else
+		/* this should catch other legacy addresses
+		   that would normally fail on MARVEL,
+		   because there really is nothing there...
+		*/
+		return ~0;
+}
+
+void
+marvel_iowrite8(u8 b, void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (__marvel_is_port_kbd(addr))
+		return;
+	else if (__marvel_is_port_rtc(addr)) 
+		__marvel_rtc_io(b, addr, 1);
+	else if (marvel_is_ioaddr(addr))
+		__kernel_stb(b, *(vucp)addr);
+}
+
+#ifndef CONFIG_ALPHA_GENERIC
+EXPORT_SYMBOL(marvel_ioremap);
+EXPORT_SYMBOL(marvel_iounmap);
+EXPORT_SYMBOL(marvel_is_mmio);
+EXPORT_SYMBOL(marvel_ioportmap);
+EXPORT_SYMBOL(marvel_ioread8);
+EXPORT_SYMBOL(marvel_iowrite8);
+#endif
+
+/*
+ * AGP GART Support.
+ */
+#include <linux/agp_backend.h>
+#include <asm/agp_backend.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+
+struct marvel_agp_aperture {
+	struct pci_iommu_arena *arena;
+	long pg_start;
+	long pg_count;
+};
+
+static int
+marvel_agp_setup(alpha_agp_info *agp)
+{
+	struct marvel_agp_aperture *aper;
+
+	if (!alpha_agpgart_size)
+		return -ENOMEM;
+
+	aper = kmalloc(sizeof(*aper), GFP_KERNEL);
+	if (aper == NULL) return -ENOMEM;
+
+	aper->arena = agp->hose->sg_pci;
+	aper->pg_count = alpha_agpgart_size / PAGE_SIZE;
+	aper->pg_start = iommu_reserve(aper->arena, aper->pg_count,
+				       aper->pg_count - 1);
+
+	if (aper->pg_start < 0) {
+		printk(KERN_ERR "Failed to reserve AGP memory\n");
+		kfree(aper);
+		return -ENOMEM;
+	}
+
+	agp->aperture.bus_base = 
+		aper->arena->dma_base + aper->pg_start * PAGE_SIZE;
+	agp->aperture.size = aper->pg_count * PAGE_SIZE;
+	agp->aperture.sysdata = aper;
+
+	return 0;
+}
+
+static void
+marvel_agp_cleanup(alpha_agp_info *agp)
+{
+	struct marvel_agp_aperture *aper = agp->aperture.sysdata;
+	int status;
+
+	status = iommu_release(aper->arena, aper->pg_start, aper->pg_count);
+	if (status == -EBUSY) {
+		printk(KERN_WARNING
+		       "Attempted to release bound AGP memory - unbinding\n");
+		iommu_unbind(aper->arena, aper->pg_start, aper->pg_count);
+		status = iommu_release(aper->arena, aper->pg_start, 
+				       aper->pg_count);
+	}
+	if (status < 0)
+		printk(KERN_ERR "Failed to release AGP memory\n");
+
+	kfree(aper);
+	kfree(agp);
+}
+
+static int
+marvel_agp_configure(alpha_agp_info *agp)
+{
+	io7_ioport_csrs *csrs = ((struct io7_port *)agp->hose->sysdata)->csrs;
+	struct io7 *io7 = ((struct io7_port *)agp->hose->sysdata)->io7;
+	unsigned int new_rate = 0;
+	unsigned long agp_pll;
+
+	/*
+	 * Check the requested mode against the PLL setting.
+	 * The agpgart_be code has not programmed the card yet,
+	 * so we can still tweak mode here.
+	 */
+	agp_pll = io7->csrs->POx_RST[IO7_AGP_PORT].csr;
+	switch(IO7_PLL_RNGB(agp_pll)) {
+	case 0x4:				/* 2x only */
+		/* 
+		 * The PLL is only programmed for 2x, so adjust the
+		 * rate to 2x, if necessary.
+		 */
+		if (agp->mode.bits.rate != 2) 
+			new_rate = 2;
+		break;
+
+	case 0x6:				/* 1x / 4x */
+		/*
+		 * The PLL is programmed for 1x or 4x.  Don't go faster
+		 * than requested, so if the requested rate is 2x, use 1x.
+		 */
+		if (agp->mode.bits.rate == 2) 
+			new_rate = 1;
+		break;
+
+	default:				/* ??????? */
+		/*
+		 * Don't know what this PLL setting is, take the requested
+		 * rate, but warn the user.
+		 */
+		printk("%s: unknown PLL setting RNGB=%lx (PLL6_CTL=%016lx)\n",
+		       __func__, IO7_PLL_RNGB(agp_pll), agp_pll);
+		break;
+	}
+
+	/*
+	 * Set the new rate, if necessary.
+	 */
+	if (new_rate) {
+		printk("Requested AGP Rate %dX not compatible "
+		       "with PLL setting - using %dX\n",
+		       agp->mode.bits.rate,
+		       new_rate);
+
+		agp->mode.bits.rate = new_rate;
+	}
+		
+	printk("Enabling AGP on hose %d: %dX%s RQ %d\n", 
+	       agp->hose->index, agp->mode.bits.rate, 
+	       agp->mode.bits.sba ? " - SBA" : "", agp->mode.bits.rq);
+
+	csrs->AGP_CMD.csr = agp->mode.lw;
+
+	return 0;
+}
+
+static int 
+marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *mem)
+{
+	struct marvel_agp_aperture *aper = agp->aperture.sysdata;
+	return iommu_bind(aper->arena, aper->pg_start + pg_start, 
+			  mem->page_count, mem->pages);
+}
+
+static int 
+marvel_agp_unbind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *mem)
+{
+	struct marvel_agp_aperture *aper = agp->aperture.sysdata;
+	return iommu_unbind(aper->arena, aper->pg_start + pg_start,
+			    mem->page_count);
+}
+
+static unsigned long
+marvel_agp_translate(alpha_agp_info *agp, dma_addr_t addr)
+{
+	struct marvel_agp_aperture *aper = agp->aperture.sysdata;
+	unsigned long baddr = addr - aper->arena->dma_base;
+	unsigned long pte;
+
+	if (addr < agp->aperture.bus_base ||
+	    addr >= agp->aperture.bus_base + agp->aperture.size) {
+		printk("%s: addr out of range\n", __func__);
+		return -EINVAL;
+	}
+
+	pte = aper->arena->ptes[baddr >> PAGE_SHIFT];
+	if (!(pte & 1)) {
+		printk("%s: pte not valid\n", __func__);
+		return -EINVAL;
+	} 
+	return (pte >> 1) << PAGE_SHIFT;
+}
+
+struct alpha_agp_ops marvel_agp_ops =
+{
+	.setup		= marvel_agp_setup,
+	.cleanup	= marvel_agp_cleanup,
+	.configure	= marvel_agp_configure,
+	.bind		= marvel_agp_bind_memory,
+	.unbind		= marvel_agp_unbind_memory,
+	.translate	= marvel_agp_translate
+};
+
+alpha_agp_info *
+marvel_agp_info(void)
+{
+	struct pci_controller *hose;
+	io7_ioport_csrs *csrs;
+	alpha_agp_info *agp;
+	struct io7 *io7;
+
+	/*
+	 * Find the first IO7 with an AGP card.
+	 *
+	 * FIXME -- there should be a better way (we want to be able to
+	 * specify and what if the agp card is not video???)
+	 */
+	hose = NULL;
+	for (io7 = NULL; (io7 = marvel_next_io7(io7)) != NULL; ) {
+		struct pci_controller *h;
+		vuip addr;
+
+		if (!io7->ports[IO7_AGP_PORT].enabled)
+			continue;
+
+		h = io7->ports[IO7_AGP_PORT].hose;
+		addr = (vuip)build_conf_addr(h, 0, PCI_DEVFN(5, 0), 0);
+
+		if (*addr != 0xffffffffu) {
+			hose = h;
+			break;
+		}
+	}
+
+	if (!hose || !hose->sg_pci)
+		return NULL;
+
+	printk("MARVEL - using hose %d as AGP\n", hose->index);
+
+	/* 
+	 * Get the csrs from the hose.
+	 */
+	csrs = ((struct io7_port *)hose->sysdata)->csrs;
+
+	/*
+	 * Allocate the info structure.
+	 */
+	agp = kmalloc(sizeof(*agp), GFP_KERNEL);
+	if (!agp)
+		return NULL;
+
+	/*
+	 * Fill it in.
+	 */
+	agp->hose = hose;
+	agp->private = NULL;
+	agp->ops = &marvel_agp_ops;
+
+	/*
+	 * Aperture - not configured until ops.setup().
+	 */
+	agp->aperture.bus_base = 0;
+	agp->aperture.size = 0;
+	agp->aperture.sysdata = NULL;
+
+	/*
+	 * Capabilities.
+	 *
+	 * NOTE: IO7 reports through AGP_STAT that it can support a read queue
+	 *       depth of 17 (rq = 0x10). It actually only supports a depth of
+	 * 	 16 (rq = 0xf).
+	 */
+	agp->capability.lw = csrs->AGP_STAT.csr;
+	agp->capability.bits.rq = 0xf;
+	
+	/*
+	 * Mode.
+	 */
+	agp->mode.lw = csrs->AGP_CMD.csr;
+
+	return agp;
+}
diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c
new file mode 100644
index 0000000000..74b1d01812
--- /dev/null
+++ b/arch/alpha/kernel/core_mcpcia.c
@@ -0,0 +1,616 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_mcpcia.c
+ *
+ * Based on code written by David A Rusling (david.rusling@reo.mts.dec.com).
+ *
+ * Code common to all MCbus-PCI Adaptor core logic chipsets
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_mcpcia.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+
+#include <asm/ptrace.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/*
+ * NOTE: Herein lie back-to-back mb instructions.  They are magic. 
+ * One plausible explanation is that the i/o controller does not properly
+ * handle the system transaction.  Another involves timing.  Ho hum.
+ */
+
+/*
+ * BIOS32-style PCI interface:
+ */
+
+#define DEBUG_CFG 0
+
+#if DEBUG_CFG
+# define DBG_CFG(args)	printk args
+#else
+# define DBG_CFG(args)
+#endif
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address and setup the MCPCIA_HAXR2 register
+ * accordingly.  It is therefore not safe to have concurrent
+ * invocations to configuration space access routines, but there
+ * really shouldn't be any need for this.
+ *
+ * Type 0:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | |D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:11	Device select bit.
+ * 	10:8	Function number
+ * 	 7:2	Register number
+ *
+ * Type 1:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:24	reserved
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., SCSI and Ethernet).
+ * 
+ *	The register selects a DWORD (32 bit) register offset.  Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static unsigned int
+conf_read(unsigned long addr, unsigned char type1,
+	  struct pci_controller *hose)
+{
+	unsigned long flags;
+	unsigned long mid = MCPCIA_HOSE2MID(hose->index);
+	unsigned int stat0, value, cpu;
+
+	cpu = smp_processor_id();
+
+	local_irq_save(flags);
+
+	DBG_CFG(("conf_read(addr=0x%lx, type1=%d, hose=%d)\n",
+		 addr, type1, mid));
+
+	/* Reset status register to avoid losing errors.  */
+	stat0 = *(vuip)MCPCIA_CAP_ERR(mid);
+	*(vuip)MCPCIA_CAP_ERR(mid) = stat0;
+	mb();
+	*(vuip)MCPCIA_CAP_ERR(mid);
+	DBG_CFG(("conf_read: MCPCIA_CAP_ERR(%d) was 0x%x\n", mid, stat0));
+
+	mb();
+	draina();
+	mcheck_expected(cpu) = 1;
+	mcheck_taken(cpu) = 0;
+	mcheck_extra(cpu) = mid;
+	mb();
+
+	/* Access configuration space.  */
+	value = *((vuip)addr);
+	mb();
+	mb();  /* magic */
+
+	if (mcheck_taken(cpu)) {
+		mcheck_taken(cpu) = 0;
+		value = 0xffffffffU;
+		mb();
+	}
+	mcheck_expected(cpu) = 0;
+	mb();
+
+	DBG_CFG(("conf_read(): finished\n"));
+
+	local_irq_restore(flags);
+	return value;
+}
+
+static void
+conf_write(unsigned long addr, unsigned int value, unsigned char type1,
+	   struct pci_controller *hose)
+{
+	unsigned long flags;
+	unsigned long mid = MCPCIA_HOSE2MID(hose->index);
+	unsigned int stat0, cpu;
+
+	cpu = smp_processor_id();
+
+	local_irq_save(flags);	/* avoid getting hit by machine check */
+
+	/* Reset status register to avoid losing errors.  */
+	stat0 = *(vuip)MCPCIA_CAP_ERR(mid);
+	*(vuip)MCPCIA_CAP_ERR(mid) = stat0; mb();
+	*(vuip)MCPCIA_CAP_ERR(mid);
+	DBG_CFG(("conf_write: MCPCIA CAP_ERR(%d) was 0x%x\n", mid, stat0));
+
+	draina();
+	mcheck_expected(cpu) = 1;
+	mcheck_extra(cpu) = mid;
+	mb();
+
+	/* Access configuration space.  */
+	*((vuip)addr) = value;
+	mb();
+	mb();  /* magic */
+	*(vuip)MCPCIA_CAP_ERR(mid); /* read to force the write */
+	mcheck_expected(cpu) = 0;
+	mb();
+
+	DBG_CFG(("conf_write(): finished\n"));
+	local_irq_restore(flags);
+}
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int devfn, int where,
+	     struct pci_controller *hose, unsigned long *pci_addr,
+	     unsigned char *type1)
+{
+	u8 bus = pbus->number;
+	unsigned long addr;
+
+	DBG_CFG(("mk_conf_addr(bus=%d,devfn=0x%x,hose=%d,where=0x%x,"
+		 " pci_addr=0x%p, type1=0x%p)\n",
+		 bus, devfn, hose->index, where, pci_addr, type1));
+
+	/* Type 1 configuration cycle for *ALL* busses.  */
+	*type1 = 1;
+
+	if (!pbus->parent) /* No parent means peer PCI bus. */
+		bus = 0;
+	addr = (bus << 16) | (devfn << 8) | (where);
+	addr <<= 5; /* swizzle for SPARSE */
+	addr |= hose->config_space_base;
+
+	*pci_addr = addr;
+	DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
+	return 0;
+}
+
+static int
+mcpcia_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		   int size, u32 *value)
+{
+	struct pci_controller *hose = bus->sysdata;
+	unsigned long addr, w;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, hose, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr |= (size - 1) * 8;
+	w = conf_read(addr, type1, hose);
+	switch (size) {
+	case 1:
+		*value = __kernel_extbl(w, where & 3);
+		break;
+	case 2:
+		*value = __kernel_extwl(w, where & 3);
+		break;
+	case 4:
+		*value = w;
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+mcpcia_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 value)
+{
+	struct pci_controller *hose = bus->sysdata;
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, hose, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr |= (size - 1) * 8;
+	value = __kernel_insql(value, where & 3);
+	conf_write(addr, value, type1, hose);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops mcpcia_pci_ops = 
+{
+	.read =		mcpcia_read_config,
+	.write =	mcpcia_write_config,
+};
+
+void
+mcpcia_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	wmb();
+	*(vuip)MCPCIA_SG_TBIA(MCPCIA_HOSE2MID(hose->index)) = 0;
+	mb();
+}
+
+static int __init
+mcpcia_probe_hose(int h)
+{
+	int cpu = smp_processor_id();
+	int mid = MCPCIA_HOSE2MID(h);
+	unsigned int pci_rev;
+
+	/* Gotta be REAL careful.  If hose is absent, we get an mcheck.  */
+
+	mb();
+	mb();
+	draina();
+	wrmces(7);
+
+	mcheck_expected(cpu) = 2;	/* indicates probing */
+	mcheck_taken(cpu) = 0;
+	mcheck_extra(cpu) = mid;
+	mb();
+
+	/* Access the bus revision word. */
+	pci_rev = *(vuip)MCPCIA_REV(mid);
+
+	mb();
+	mb();  /* magic */
+	if (mcheck_taken(cpu)) {
+		mcheck_taken(cpu) = 0;
+		pci_rev = 0xffffffff;
+		mb();
+	}
+	mcheck_expected(cpu) = 0;
+	mb();
+
+	return (pci_rev >> 16) == PCI_CLASS_BRIDGE_HOST;
+}
+
+static void __init
+mcpcia_new_hose(int h)
+{
+	struct pci_controller *hose;
+	struct resource *io, *mem, *hae_mem;
+	int mid = MCPCIA_HOSE2MID(h);
+
+	hose = alloc_pci_controller();
+	if (h == 0)
+		pci_isa_hose = hose;
+	io = alloc_resource();
+	mem = alloc_resource();
+	hae_mem = alloc_resource();
+			
+	hose->io_space = io;
+	hose->mem_space = hae_mem;
+	hose->sparse_mem_base = MCPCIA_SPARSE(mid) - IDENT_ADDR;
+	hose->dense_mem_base = MCPCIA_DENSE(mid) - IDENT_ADDR;
+	hose->sparse_io_base = MCPCIA_IO(mid) - IDENT_ADDR;
+	hose->dense_io_base = 0;
+	hose->config_space_base = MCPCIA_CONF(mid);
+	hose->index = h;
+
+	io->start = MCPCIA_IO(mid) - MCPCIA_IO_BIAS;
+	io->end = io->start + 0xffff;
+	io->name = pci_io_names[h];
+	io->flags = IORESOURCE_IO;
+
+	mem->start = MCPCIA_DENSE(mid) - MCPCIA_MEM_BIAS;
+	mem->end = mem->start + 0xffffffff;
+	mem->name = pci_mem_names[h];
+	mem->flags = IORESOURCE_MEM;
+
+	hae_mem->start = mem->start;
+	hae_mem->end = mem->start + MCPCIA_MEM_MASK;
+	hae_mem->name = pci_hae0_name;
+	hae_mem->flags = IORESOURCE_MEM;
+
+	if (request_resource(&ioport_resource, io) < 0)
+		printk(KERN_ERR "Failed to request IO on hose %d\n", h);
+	if (request_resource(&iomem_resource, mem) < 0)
+		printk(KERN_ERR "Failed to request MEM on hose %d\n", h);
+	if (request_resource(mem, hae_mem) < 0)
+		printk(KERN_ERR "Failed to request HAE_MEM on hose %d\n", h);
+}
+
+static void
+mcpcia_pci_clr_err(int mid)
+{
+	*(vuip)MCPCIA_CAP_ERR(mid);
+	*(vuip)MCPCIA_CAP_ERR(mid) = 0xffffffff;   /* Clear them all.  */
+	mb();
+	*(vuip)MCPCIA_CAP_ERR(mid);  /* Re-read for force write.  */
+}
+
+static void __init
+mcpcia_startup_hose(struct pci_controller *hose)
+{
+	int mid = MCPCIA_HOSE2MID(hose->index);
+	unsigned int tmp;
+
+	mcpcia_pci_clr_err(mid);
+
+	/* 
+	 * Set up error reporting.
+	 */
+	tmp = *(vuip)MCPCIA_CAP_ERR(mid);
+	tmp |= 0x0006;		/* master/target abort */
+	*(vuip)MCPCIA_CAP_ERR(mid) = tmp;
+	mb();
+	tmp = *(vuip)MCPCIA_CAP_ERR(mid);
+
+	/*
+	 * Set up the PCI->physical memory translation windows.
+	 *
+	 * Window 0 is scatter-gather 8MB at 8MB (for isa)
+	 * Window 1 is scatter-gather (up to) 1GB at 1GB (for pci)
+	 * Window 2 is direct access 2GB at 2GB
+	 */
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
+	hose->sg_pci = iommu_arena_new(hose, 0x40000000,
+				       size_for_memory(0x40000000),
+				       SMP_CACHE_BYTES);
+
+	__direct_map_base = 0x80000000;
+	__direct_map_size = 0x80000000;
+
+	*(vuip)MCPCIA_W0_BASE(mid) = hose->sg_isa->dma_base | 3;
+	*(vuip)MCPCIA_W0_MASK(mid) = (hose->sg_isa->size - 1) & 0xfff00000;
+	*(vuip)MCPCIA_T0_BASE(mid) = virt_to_phys(hose->sg_isa->ptes) >> 8;
+
+	*(vuip)MCPCIA_W1_BASE(mid) = hose->sg_pci->dma_base | 3;
+	*(vuip)MCPCIA_W1_MASK(mid) = (hose->sg_pci->size - 1) & 0xfff00000;
+	*(vuip)MCPCIA_T1_BASE(mid) = virt_to_phys(hose->sg_pci->ptes) >> 8;
+
+	*(vuip)MCPCIA_W2_BASE(mid) = __direct_map_base | 1;
+	*(vuip)MCPCIA_W2_MASK(mid) = (__direct_map_size - 1) & 0xfff00000;
+	*(vuip)MCPCIA_T2_BASE(mid) = 0;
+
+	*(vuip)MCPCIA_W3_BASE(mid) = 0x0;
+
+	mcpcia_pci_tbi(hose, 0, -1);
+
+	*(vuip)MCPCIA_HBASE(mid) = 0x0;
+	mb();
+
+	*(vuip)MCPCIA_HAE_MEM(mid) = 0U;
+	mb();
+	*(vuip)MCPCIA_HAE_MEM(mid); /* read it back. */
+	*(vuip)MCPCIA_HAE_IO(mid) = 0;
+	mb();
+	*(vuip)MCPCIA_HAE_IO(mid);  /* read it back. */
+}
+
+void __init
+mcpcia_init_arch(void)
+{
+	/* With multiple PCI busses, we play with I/O as physical addrs.  */
+	ioport_resource.end = ~0UL;
+
+	/* Allocate hose 0.  That's the one that all the ISA junk hangs
+	   off of, from which we'll be registering stuff here in a bit.
+	   Other hose detection is done in mcpcia_init_hoses, which is
+	   called from init_IRQ.  */
+
+	mcpcia_new_hose(0);
+}
+
+/* This is called from init_IRQ, since we cannot take interrupts
+   before then.  Which means we cannot do this in init_arch.  */
+
+void __init
+mcpcia_init_hoses(void)
+{
+	struct pci_controller *hose;
+	int hose_count;
+	int h;
+
+	/* First, find how many hoses we have.  */
+	hose_count = 0;
+	for (h = 0; h < MCPCIA_MAX_HOSES; ++h) {
+		if (mcpcia_probe_hose(h)) {
+			if (h != 0)
+				mcpcia_new_hose(h);
+			hose_count++;
+		}
+	}
+
+	printk("mcpcia_init_hoses: found %d hoses\n", hose_count);
+
+	/* Now do init for each hose.  */
+	for (hose = hose_head; hose; hose = hose->next)
+		mcpcia_startup_hose(hose);
+}
+
+static void
+mcpcia_print_uncorrectable(struct el_MCPCIA_uncorrected_frame_mcheck *logout)
+{
+	struct el_common_EV5_uncorrectable_mcheck *frame;
+	int i;
+
+	frame = &logout->procdata;
+
+	/* Print PAL fields */
+	for (i = 0; i < 24; i += 2) {
+		printk("  paltmp[%d-%d] = %16lx %16lx\n",
+		       i, i+1, frame->paltemp[i], frame->paltemp[i+1]);
+	}
+	for (i = 0; i < 8; i += 2) {
+		printk("  shadow[%d-%d] = %16lx %16lx\n",
+		       i, i+1, frame->shadow[i], 
+		       frame->shadow[i+1]);
+	}
+	printk("  Addr of excepting instruction  = %16lx\n",
+	       frame->exc_addr);
+	printk("  Summary of arithmetic traps    = %16lx\n",
+	       frame->exc_sum);
+	printk("  Exception mask                 = %16lx\n",
+	       frame->exc_mask);
+	printk("  Base address for PALcode       = %16lx\n",
+	       frame->pal_base);
+	printk("  Interrupt Status Reg           = %16lx\n",
+	       frame->isr);
+	printk("  CURRENT SETUP OF EV5 IBOX      = %16lx\n",
+	       frame->icsr);
+	printk("  I-CACHE Reg %s parity error   = %16lx\n",
+	       (frame->ic_perr_stat & 0x800L) ? 
+	       "Data" : "Tag", 
+	       frame->ic_perr_stat); 
+	printk("  D-CACHE error Reg              = %16lx\n",
+	       frame->dc_perr_stat);
+	if (frame->dc_perr_stat & 0x2) {
+		switch (frame->dc_perr_stat & 0x03c) {
+		case 8:
+			printk("    Data error in bank 1\n");
+			break;
+		case 4:
+			printk("    Data error in bank 0\n");
+			break;
+		case 20:
+			printk("    Tag error in bank 1\n");
+			break;
+		case 10:
+			printk("    Tag error in bank 0\n");
+			break;
+		}
+	}
+	printk("  Effective VA                   = %16lx\n",
+	       frame->va);
+	printk("  Reason for D-stream            = %16lx\n",
+	       frame->mm_stat);
+	printk("  EV5 SCache address             = %16lx\n",
+	       frame->sc_addr);
+	printk("  EV5 SCache TAG/Data parity     = %16lx\n",
+	       frame->sc_stat);
+	printk("  EV5 BC_TAG_ADDR                = %16lx\n",
+	       frame->bc_tag_addr);
+	printk("  EV5 EI_ADDR: Phys addr of Xfer = %16lx\n",
+	       frame->ei_addr);
+	printk("  Fill Syndrome                  = %16lx\n",
+	       frame->fill_syndrome);
+	printk("  EI_STAT reg                    = %16lx\n",
+	       frame->ei_stat);
+	printk("  LD_LOCK                        = %16lx\n",
+	       frame->ld_lock);
+}
+
+static void
+mcpcia_print_system_area(unsigned long la_ptr)
+{
+	struct el_common *frame;
+	struct pci_controller *hose;
+
+	struct IOD_subpacket {
+	  unsigned long base;
+	  unsigned int whoami;
+	  unsigned int rsvd1;
+	  unsigned int pci_rev;
+	  unsigned int cap_ctrl;
+	  unsigned int hae_mem;
+	  unsigned int hae_io;
+	  unsigned int int_ctl;
+	  unsigned int int_reg;
+	  unsigned int int_mask0;
+	  unsigned int int_mask1;
+	  unsigned int mc_err0;
+	  unsigned int mc_err1;
+	  unsigned int cap_err;
+	  unsigned int rsvd2;
+	  unsigned int pci_err1;
+	  unsigned int mdpa_stat;
+	  unsigned int mdpa_syn;
+	  unsigned int mdpb_stat;
+	  unsigned int mdpb_syn;
+	  unsigned int rsvd3;
+	  unsigned int rsvd4;
+	  unsigned int rsvd5;
+	} *iodpp;
+
+	frame = (struct el_common *)la_ptr;
+	iodpp = (struct IOD_subpacket *) (la_ptr + frame->sys_offset);
+
+	for (hose = hose_head; hose; hose = hose->next, iodpp++) {
+
+	  printk("IOD %d Register Subpacket - Bridge Base Address %16lx\n",
+		 hose->index, iodpp->base);
+	  printk("  WHOAMI      = %8x\n", iodpp->whoami);
+	  printk("  PCI_REV     = %8x\n", iodpp->pci_rev);
+	  printk("  CAP_CTRL    = %8x\n", iodpp->cap_ctrl);
+	  printk("  HAE_MEM     = %8x\n", iodpp->hae_mem);
+	  printk("  HAE_IO      = %8x\n", iodpp->hae_io);
+	  printk("  INT_CTL     = %8x\n", iodpp->int_ctl);
+	  printk("  INT_REG     = %8x\n", iodpp->int_reg);
+	  printk("  INT_MASK0   = %8x\n", iodpp->int_mask0);
+	  printk("  INT_MASK1   = %8x\n", iodpp->int_mask1);
+	  printk("  MC_ERR0     = %8x\n", iodpp->mc_err0);
+	  printk("  MC_ERR1     = %8x\n", iodpp->mc_err1);
+	  printk("  CAP_ERR     = %8x\n", iodpp->cap_err);
+	  printk("  PCI_ERR1    = %8x\n", iodpp->pci_err1);
+	  printk("  MDPA_STAT   = %8x\n", iodpp->mdpa_stat);
+	  printk("  MDPA_SYN    = %8x\n", iodpp->mdpa_syn);
+	  printk("  MDPB_STAT   = %8x\n", iodpp->mdpb_stat);
+	  printk("  MDPB_SYN    = %8x\n", iodpp->mdpb_syn);
+	}
+}
+
+void
+mcpcia_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	struct el_MCPCIA_uncorrected_frame_mcheck *mchk_logout;
+	unsigned int cpu = smp_processor_id();
+	int expected;
+
+	mchk_logout = (struct el_MCPCIA_uncorrected_frame_mcheck *)la_ptr;
+	expected = mcheck_expected(cpu);
+
+	mb();
+	mb();  /* magic */
+	draina();
+
+	switch (expected) {
+	case 0:
+	    {
+		/* FIXME: how do we figure out which hose the
+		   error was on?  */	
+		struct pci_controller *hose;
+		for (hose = hose_head; hose; hose = hose->next)
+			mcpcia_pci_clr_err(MCPCIA_HOSE2MID(hose->index));
+		break;
+	    }
+	case 1:
+		mcpcia_pci_clr_err(mcheck_extra(cpu));
+		break;
+	default:
+		/* Otherwise, we're being called from mcpcia_probe_hose
+		   and there's no hose clear an error from.  */
+		break;
+	}
+
+	wrmces(0x7);
+	mb();
+
+	process_mcheck_info(vector, la_ptr, "MCPCIA", expected != 0);
+	if (!expected && vector != 0x620 && vector != 0x630) {
+		mcpcia_print_uncorrectable(mchk_logout);
+		mcpcia_print_system_area(la_ptr);
+	}
+}
diff --git a/arch/alpha/kernel/core_polaris.c b/arch/alpha/kernel/core_polaris.c
new file mode 100644
index 0000000000..75d622d96f
--- /dev/null
+++ b/arch/alpha/kernel/core_polaris.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *      linux/arch/alpha/kernel/core_polaris.c
+ *
+ * POLARIS chip-specific code
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_polaris.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/*
+ * BIOS32-style PCI interface:
+ */
+
+#define DEBUG_CONFIG 0
+
+#if DEBUG_CONFIG
+# define DBG_CFG(args)	printk args
+#else
+# define DBG_CFG(args)
+#endif
+
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address.  This is fairly straightforward
+ * on POLARIS, since the chip itself generates Type 0 or Type 1
+ * cycles automatically depending on the bus number (Bus 0 is
+ * hardwired to Type 0, all others are Type 1.  Peer bridges
+ * are not supported).
+ *
+ * All types:
+ *
+ *  3 3 3 3|3 3 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |1|1|1|1|1|0|0|1|1|1|1|1|1|1|1|0|B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|x|x|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., scsi and ethernet).
+ * 
+ *	The register selects a DWORD (32 bit) register offset.  Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
+	     unsigned long *pci_addr, u8 *type1)
+{
+	u8 bus = pbus->number;
+
+	*type1 = (bus == 0) ? 0 : 1;
+	*pci_addr = (bus << 16) | (device_fn << 8) | (where) |
+		    POLARIS_DENSE_CONFIG_BASE;
+
+        DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x,"
+                 " returning address 0x%p\n"
+                 bus, device_fn, where, *pci_addr));
+
+	return 0;
+}
+
+static int
+polaris_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 *value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+                return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		*value = __kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		*value = __kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*value = *(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int 
+polaris_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+                return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		__kernel_stb(value, *(vucp)addr);
+		mb();
+		__kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		__kernel_stw(value, *(vusp)addr);
+		mb();
+		__kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*(vuip)addr = value;
+		mb();
+		*(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops polaris_pci_ops = 
+{
+	.read =		polaris_read_config,
+	.write =	polaris_write_config,
+};
+
+void __init
+polaris_init_arch(void)
+{
+	struct pci_controller *hose;
+
+	/* May need to initialize error reporting (see PCICTL0/1), but
+	 * for now assume that the firmware has done the right thing
+	 * already.
+	 */
+#if 0
+	printk("polaris_init_arch(): trusting firmware for setup\n");
+#endif
+
+	/*
+	 * Create our single hose.
+	 */
+
+	pci_isa_hose = hose = alloc_pci_controller();
+	hose->io_space = &ioport_resource;
+	hose->mem_space = &iomem_resource;
+	hose->index = 0;
+
+	hose->sparse_mem_base = 0;
+	hose->dense_mem_base = POLARIS_DENSE_MEM_BASE - IDENT_ADDR;
+	hose->sparse_io_base = 0;
+	hose->dense_io_base = POLARIS_DENSE_IO_BASE - IDENT_ADDR;
+
+	hose->sg_isa = hose->sg_pci = NULL;
+
+	/* The I/O window is fixed at 2G @ 2G.  */
+	__direct_map_base = 0x80000000;
+	__direct_map_size = 0x80000000;
+}
+
+static inline void
+polaris_pci_clr_err(void)
+{
+	*(vusp)POLARIS_W_STATUS;
+	/* Write 1's to settable bits to clear errors */
+	*(vusp)POLARIS_W_STATUS = 0x7800;
+	mb();
+	*(vusp)POLARIS_W_STATUS;
+}
+
+void
+polaris_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	/* Clear the error before any reporting.  */
+	mb();
+	mb();
+	draina();
+	polaris_pci_clr_err();
+	wrmces(0x7);
+	mb();
+
+	process_mcheck_info(vector, la_ptr, "POLARIS",
+			    mcheck_expected(0));
+}
diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c
new file mode 100644
index 0000000000..98d5b6ff8a
--- /dev/null
+++ b/arch/alpha/kernel/core_t2.c
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_t2.c
+ *
+ * Written by Jay A Estabrook (jestabro@amt.tay1.dec.com).
+ * December 1996.
+ *
+ * based on CIA code by David A Rusling (david.rusling@reo.mts.dec.com)
+ *
+ * Code common to all T2 core logic chips.
+ */
+
+#define __EXTERN_INLINE
+#include <asm/io.h>
+#include <asm/core_t2.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+#include <asm/delay.h>
+#include <asm/mce.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/* For dumping initial DMA window settings. */
+#define DEBUG_PRINT_INITIAL_SETTINGS 0
+
+/* For dumping final DMA window settings. */
+#define DEBUG_PRINT_FINAL_SETTINGS 0
+
+/*
+ * By default, we direct-map starting at 2GB, in order to allow the
+ * maximum size direct-map window (2GB) to match the maximum amount of
+ * memory (2GB) that can be present on SABLEs. But that limits the
+ * floppy to DMA only via the scatter/gather window set up for 8MB
+ * ISA DMA, since the maximum ISA DMA address is 2GB-1.
+ *
+ * For now, this seems a reasonable trade-off: even though most SABLEs
+ * have less than 1GB of memory, floppy usage/performance will not
+ * really be affected by forcing it to go via scatter/gather...
+ */
+#define T2_DIRECTMAP_2G 1
+
+#if T2_DIRECTMAP_2G
+# define T2_DIRECTMAP_START	0x80000000UL
+# define T2_DIRECTMAP_LENGTH	0x80000000UL
+#else
+# define T2_DIRECTMAP_START	0x40000000UL
+# define T2_DIRECTMAP_LENGTH	0x40000000UL
+#endif
+
+/* The ISA scatter/gather window settings. */
+#define T2_ISA_SG_START		0x00800000UL
+#define T2_ISA_SG_LENGTH	0x00800000UL
+
+/*
+ * NOTE: Herein lie back-to-back mb instructions.  They are magic. 
+ * One plausible explanation is that the i/o controller does not properly
+ * handle the system transaction.  Another involves timing.  Ho hum.
+ */
+
+/*
+ * BIOS32-style PCI interface:
+ */
+
+#define DEBUG_CONFIG 0
+
+#if DEBUG_CONFIG
+# define DBG(args)	printk args
+#else
+# define DBG(args)
+#endif
+
+static volatile unsigned int t2_mcheck_any_expected;
+static volatile unsigned int t2_mcheck_last_taken;
+
+/* Place to save the DMA Window registers as set up by SRM
+   for restoration during shutdown. */
+static struct
+{
+	struct {
+		unsigned long wbase;
+		unsigned long wmask;
+		unsigned long tbase;
+	} window[2];
+	unsigned long hae_1;
+  	unsigned long hae_2;
+	unsigned long hae_3;
+	unsigned long hae_4;
+	unsigned long hbase;
+} t2_saved_config __attribute((common));
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address and setup the T2_HAXR2 register
+ * accordingly.  It is therefore not safe to have concurrent
+ * invocations to configuration space access routines, but there
+ * really shouldn't be any need for this.
+ *
+ * Type 0:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | |D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|0|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:11	Device select bit.
+ * 	10:8	Function number
+ * 	 7:2	Register number
+ *
+ * Type 1:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:24	reserved
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., SCSI and Ethernet).
+ * 
+ *	The register selects a DWORD (32 bit) register offset.  Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
+	     unsigned long *pci_addr, unsigned char *type1)
+{
+	unsigned long addr;
+	u8 bus = pbus->number;
+
+	DBG(("mk_conf_addr(bus=%d, dfn=0x%x, where=0x%x,"
+	     " addr=0x%lx, type1=0x%x)\n",
+	     bus, device_fn, where, pci_addr, type1));
+
+	if (bus == 0) {
+		int device = device_fn >> 3;
+
+		/* Type 0 configuration cycle.  */
+
+		if (device > 8) {
+			DBG(("mk_conf_addr: device (%d)>20, returning -1\n",
+			     device));
+			return -1;
+		}
+
+		*type1 = 0;
+		addr = (0x0800L << device) | ((device_fn & 7) << 8) | (where);
+	} else {
+		/* Type 1 configuration cycle.  */
+		*type1 = 1;
+		addr = (bus << 16) | (device_fn << 8) | (where);
+	}
+	*pci_addr = addr;
+	DBG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
+	return 0;
+}
+
+/*
+ * NOTE: both conf_read() and conf_write() may set HAE_3 when needing
+ *       to do type1 access. This is protected by the use of spinlock IRQ
+ *       primitives in the wrapper functions pci_{read,write}_config_*()
+ *       defined in drivers/pci/pci.c.
+ */
+static unsigned int
+conf_read(unsigned long addr, unsigned char type1)
+{
+	unsigned int value, cpu, taken;
+	unsigned long t2_cfg = 0;
+
+	cpu = smp_processor_id();
+
+	DBG(("conf_read(addr=0x%lx, type1=%d)\n", addr, type1));
+
+	/* If Type1 access, must set T2 CFG.  */
+	if (type1) {
+		t2_cfg = *(vulp)T2_HAE_3 & ~0xc0000000UL;
+		*(vulp)T2_HAE_3 = 0x40000000UL | t2_cfg;
+		mb();
+	}
+	mb();
+	draina();
+
+	mcheck_expected(cpu) = 1;
+	mcheck_taken(cpu) = 0;
+	t2_mcheck_any_expected |= (1 << cpu);
+	mb();
+
+	/* Access configuration space. */
+	value = *(vuip)addr;
+	mb();
+	mb();  /* magic */
+
+	/* Wait for possible mcheck. Also, this lets other CPUs clear
+	   their mchecks as well, as they can reliably tell when
+	   another CPU is in the midst of handling a real mcheck via
+	   the "taken" function. */
+	udelay(100);
+
+	if ((taken = mcheck_taken(cpu))) {
+		mcheck_taken(cpu) = 0;
+		t2_mcheck_last_taken |= (1 << cpu);
+		value = 0xffffffffU;
+		mb();
+	}
+	mcheck_expected(cpu) = 0;
+	t2_mcheck_any_expected = 0;
+	mb();
+
+	/* If Type1 access, must reset T2 CFG so normal IO space ops work.  */
+	if (type1) {
+		*(vulp)T2_HAE_3 = t2_cfg;
+		mb();
+	}
+
+	return value;
+}
+
+static void
+conf_write(unsigned long addr, unsigned int value, unsigned char type1)
+{
+	unsigned int cpu, taken;
+	unsigned long t2_cfg = 0;
+
+	cpu = smp_processor_id();
+
+	/* If Type1 access, must set T2 CFG.  */
+	if (type1) {
+		t2_cfg = *(vulp)T2_HAE_3 & ~0xc0000000UL;
+		*(vulp)T2_HAE_3 = t2_cfg | 0x40000000UL;
+		mb();
+	}
+	mb();
+	draina();
+
+	mcheck_expected(cpu) = 1;
+	mcheck_taken(cpu) = 0;
+	t2_mcheck_any_expected |= (1 << cpu);
+	mb();
+
+	/* Access configuration space.  */
+	*(vuip)addr = value;
+	mb();
+	mb();  /* magic */
+
+	/* Wait for possible mcheck. Also, this lets other CPUs clear
+	   their mchecks as well, as they can reliably tell when
+	   this CPU is in the midst of handling a real mcheck via
+	   the "taken" function. */
+	udelay(100);
+
+	if ((taken = mcheck_taken(cpu))) {
+		mcheck_taken(cpu) = 0;
+		t2_mcheck_last_taken |= (1 << cpu);
+		mb();
+	}
+	mcheck_expected(cpu) = 0;
+	t2_mcheck_any_expected = 0;
+	mb();
+
+	/* If Type1 access, must reset T2 CFG so normal IO space ops work.  */
+	if (type1) {
+		*(vulp)T2_HAE_3 = t2_cfg;
+		mb();
+	}
+}
+
+static int
+t2_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+	       int size, u32 *value)
+{
+	unsigned long addr, pci_addr;
+	unsigned char type1;
+	int shift;
+	long mask;
+
+	if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	mask = (size - 1) * 8;
+	shift = (where & 3) * 8;
+	addr = (pci_addr << 5) + mask + T2_CONF;
+	*value = conf_read(addr, type1) >> (shift);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int 
+t2_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size,
+		u32 value)
+{
+	unsigned long addr, pci_addr;
+	unsigned char type1;
+	long mask;
+
+	if (mk_conf_addr(bus, devfn, where, &pci_addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	mask = (size - 1) * 8;
+	addr = (pci_addr << 5) + mask + T2_CONF;
+	conf_write(addr, value << ((where & 3) * 8), type1);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops t2_pci_ops = 
+{
+	.read =		t2_read_config,
+	.write =	t2_write_config,
+};
+
+static void __init
+t2_direct_map_window1(unsigned long base, unsigned long length)
+{
+	unsigned long temp;
+
+	__direct_map_base = base;
+	__direct_map_size = length;
+
+	temp = (base & 0xfff00000UL) | ((base + length - 1) >> 20);
+	*(vulp)T2_WBASE1 = temp | 0x80000UL; /* OR in ENABLE bit */
+	temp = (length - 1) & 0xfff00000UL;
+	*(vulp)T2_WMASK1 = temp;
+	*(vulp)T2_TBASE1 = 0;
+
+#if DEBUG_PRINT_FINAL_SETTINGS
+	printk("%s: setting WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n",
+	       __func__, *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1);
+#endif
+}
+
+static void __init
+t2_sg_map_window2(struct pci_controller *hose,
+		  unsigned long base,
+		  unsigned long length)
+{
+	unsigned long temp;
+
+	/* Note we can only do 1 SG window, as the other is for direct, so
+	   do an ISA SG area, especially for the floppy. */
+	hose->sg_isa = iommu_arena_new(hose, base, length, SMP_CACHE_BYTES);
+	hose->sg_pci = NULL;
+
+	temp = (base & 0xfff00000UL) | ((base + length - 1) >> 20);
+	*(vulp)T2_WBASE2 = temp | 0xc0000UL; /* OR in ENABLE/SG bits */
+	temp = (length - 1) & 0xfff00000UL;
+	*(vulp)T2_WMASK2 = temp;
+	*(vulp)T2_TBASE2 = virt_to_phys(hose->sg_isa->ptes) >> 1;
+	mb();
+
+	t2_pci_tbi(hose, 0, -1); /* flush TLB all */
+
+#if DEBUG_PRINT_FINAL_SETTINGS
+	printk("%s: setting WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n",
+	       __func__, *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2);
+#endif
+}
+
+static void __init
+t2_save_configuration(void)
+{
+#if DEBUG_PRINT_INITIAL_SETTINGS
+	printk("%s: HAE_1 was 0x%lx\n", __func__, srm_hae); /* HW is 0 */
+	printk("%s: HAE_2 was 0x%lx\n", __func__, *(vulp)T2_HAE_2);
+	printk("%s: HAE_3 was 0x%lx\n", __func__, *(vulp)T2_HAE_3);
+	printk("%s: HAE_4 was 0x%lx\n", __func__, *(vulp)T2_HAE_4);
+	printk("%s: HBASE was 0x%lx\n", __func__, *(vulp)T2_HBASE);
+
+	printk("%s: WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", __func__,
+	       *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1);
+	printk("%s: WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", __func__,
+	       *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2);
+#endif
+
+	/*
+	 * Save the DMA Window registers.
+	 */
+	t2_saved_config.window[0].wbase = *(vulp)T2_WBASE1;
+	t2_saved_config.window[0].wmask = *(vulp)T2_WMASK1;
+	t2_saved_config.window[0].tbase = *(vulp)T2_TBASE1;
+	t2_saved_config.window[1].wbase = *(vulp)T2_WBASE2;
+	t2_saved_config.window[1].wmask = *(vulp)T2_WMASK2;
+	t2_saved_config.window[1].tbase = *(vulp)T2_TBASE2;
+
+	t2_saved_config.hae_1 = srm_hae; /* HW is already set to 0 */
+	t2_saved_config.hae_2 = *(vulp)T2_HAE_2;
+	t2_saved_config.hae_3 = *(vulp)T2_HAE_3;
+	t2_saved_config.hae_4 = *(vulp)T2_HAE_4;
+	t2_saved_config.hbase = *(vulp)T2_HBASE;
+}
+
+void __init
+t2_init_arch(void)
+{
+	struct pci_controller *hose;
+	struct resource *hae_mem;
+	unsigned long temp;
+	unsigned int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		mcheck_expected(i) = 0;
+		mcheck_taken(i) = 0;
+	}
+	t2_mcheck_any_expected = 0;
+	t2_mcheck_last_taken = 0;
+
+	/* Enable scatter/gather TLB use.  */
+	temp = *(vulp)T2_IOCSR;
+	if (!(temp & (0x1UL << 26))) {
+		printk("t2_init_arch: enabling SG TLB, IOCSR was 0x%lx\n",
+		       temp);
+		*(vulp)T2_IOCSR = temp | (0x1UL << 26);
+		mb();	
+		*(vulp)T2_IOCSR; /* read it back to make sure */
+	}
+
+	t2_save_configuration();
+
+	/*
+	 * Create our single hose.
+	 */
+	pci_isa_hose = hose = alloc_pci_controller();
+	hose->io_space = &ioport_resource;
+	hae_mem = alloc_resource();
+	hae_mem->start = 0;
+	hae_mem->end = T2_MEM_R1_MASK;
+	hae_mem->name = pci_hae0_name;
+	if (request_resource(&iomem_resource, hae_mem) < 0)
+		printk(KERN_ERR "Failed to request HAE_MEM\n");
+	hose->mem_space = hae_mem;
+	hose->index = 0;
+
+	hose->sparse_mem_base = T2_SPARSE_MEM - IDENT_ADDR;
+	hose->dense_mem_base = T2_DENSE_MEM - IDENT_ADDR;
+	hose->sparse_io_base = T2_IO - IDENT_ADDR;
+	hose->dense_io_base = 0;
+
+	/*
+	 * Set up the PCI->physical memory translation windows.
+	 *
+	 * Window 1 is direct mapped.
+	 * Window 2 is scatter/gather (for ISA).
+	 */
+
+	t2_direct_map_window1(T2_DIRECTMAP_START, T2_DIRECTMAP_LENGTH);
+
+	/* Always make an ISA DMA window. */
+	t2_sg_map_window2(hose, T2_ISA_SG_START, T2_ISA_SG_LENGTH);
+
+	*(vulp)T2_HBASE = 0x0; /* Disable HOLES. */
+
+	/* Zero HAE.  */
+	*(vulp)T2_HAE_1 = 0; mb(); /* Sparse MEM HAE */
+	*(vulp)T2_HAE_2 = 0; mb(); /* Sparse I/O HAE */
+	*(vulp)T2_HAE_3 = 0; mb(); /* Config Space HAE */
+
+	/*
+	 * We also now zero out HAE_4, the dense memory HAE, so that
+	 * we need not account for its "offset" when accessing dense
+	 * memory resources which we allocated in our normal way. This
+	 * HAE would need to stay untouched were we to keep the SRM
+	 * resource settings.
+	 *
+	 * Thus we can now run standard X servers on SABLE/LYNX. :-)
+	 */
+	*(vulp)T2_HAE_4 = 0; mb();
+}
+
+void
+t2_kill_arch(int mode)
+{
+	/*
+	 * Restore the DMA Window registers.
+	 */
+	*(vulp)T2_WBASE1 = t2_saved_config.window[0].wbase;
+	*(vulp)T2_WMASK1 = t2_saved_config.window[0].wmask;
+	*(vulp)T2_TBASE1 = t2_saved_config.window[0].tbase;
+	*(vulp)T2_WBASE2 = t2_saved_config.window[1].wbase;
+	*(vulp)T2_WMASK2 = t2_saved_config.window[1].wmask;
+	*(vulp)T2_TBASE2 = t2_saved_config.window[1].tbase;
+	mb();
+
+	*(vulp)T2_HAE_1 = srm_hae;
+	*(vulp)T2_HAE_2 = t2_saved_config.hae_2;
+	*(vulp)T2_HAE_3 = t2_saved_config.hae_3;
+	*(vulp)T2_HAE_4 = t2_saved_config.hae_4;
+	*(vulp)T2_HBASE = t2_saved_config.hbase;
+	mb();
+	*(vulp)T2_HBASE; /* READ it back to ensure WRITE occurred. */
+}
+
+void
+t2_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	unsigned long t2_iocsr;
+
+	t2_iocsr = *(vulp)T2_IOCSR;
+
+	/* set the TLB Clear bit */
+	*(vulp)T2_IOCSR = t2_iocsr | (0x1UL << 28);
+	mb();
+	*(vulp)T2_IOCSR; /* read it back to make sure */
+
+	/* clear the TLB Clear bit */
+	*(vulp)T2_IOCSR = t2_iocsr & ~(0x1UL << 28);
+	mb();
+	*(vulp)T2_IOCSR; /* read it back to make sure */
+}
+
+#define SIC_SEIC (1UL << 33)    /* System Event Clear */
+
+static void
+t2_clear_errors(int cpu)
+{
+	struct sable_cpu_csr *cpu_regs;
+
+	cpu_regs = (struct sable_cpu_csr *)T2_CPUn_BASE(cpu);
+		
+	cpu_regs->sic &= ~SIC_SEIC;
+
+	/* Clear CPU errors.  */
+	cpu_regs->bcce |= cpu_regs->bcce;
+	cpu_regs->cbe  |= cpu_regs->cbe;
+	cpu_regs->bcue |= cpu_regs->bcue;
+	cpu_regs->dter |= cpu_regs->dter;
+
+	*(vulp)T2_CERR1 |= *(vulp)T2_CERR1;
+	*(vulp)T2_PERR1 |= *(vulp)T2_PERR1;
+
+	mb();
+	mb();  /* magic */
+}
+
+/*
+ * SABLE seems to have a "broadcast" style machine check, in that all
+ * CPUs receive it. And, the issuing CPU, in the case of PCI Config
+ * space read/write faults, will also receive a second mcheck, upon
+ * lowering IPL during completion processing in pci_read_config_byte()
+ * et al.
+ *
+ * Hence all the taken/expected/any_expected/last_taken stuff...
+ */
+void
+t2_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	int cpu = smp_processor_id();
+#ifdef CONFIG_VERBOSE_MCHECK
+	struct el_common *mchk_header = (struct el_common *)la_ptr;
+#endif
+
+	/* Clear the error before any reporting.  */
+	mb();
+	mb();  /* magic */
+	draina();
+	t2_clear_errors(cpu);
+
+	/* This should not actually be done until the logout frame is
+	   examined, but, since we don't do that, go on and do this... */
+	wrmces(0x7);
+	mb();
+
+	/* Now, do testing for the anomalous conditions. */
+	if (!mcheck_expected(cpu) && t2_mcheck_any_expected) {
+		/*
+		 * FUNKY: Received mcheck on a CPU and not
+		 * expecting it, but another CPU is expecting one.
+		 *
+		 * Just dismiss it for now on this CPU...
+		 */
+#ifdef CONFIG_VERBOSE_MCHECK
+		if (alpha_verbose_mcheck > 1) {
+			printk("t2_machine_check(cpu%d): any_expected 0x%x -"
+			       " (assumed) spurious -"
+			       " code 0x%x\n", cpu, t2_mcheck_any_expected,
+			       (unsigned int)mchk_header->code);
+		}
+#endif
+		return;
+	}
+
+	if (!mcheck_expected(cpu) && !t2_mcheck_any_expected) {
+		if (t2_mcheck_last_taken & (1 << cpu)) {
+#ifdef CONFIG_VERBOSE_MCHECK
+		    if (alpha_verbose_mcheck > 1) {
+			printk("t2_machine_check(cpu%d): last_taken 0x%x - "
+			       "unexpected mcheck - code 0x%x\n",
+			       cpu, t2_mcheck_last_taken,
+			       (unsigned int)mchk_header->code);
+		    }
+#endif
+		    t2_mcheck_last_taken = 0;
+		    mb();
+		    return;
+		} else {
+			t2_mcheck_last_taken = 0;
+			mb();
+		}
+	}
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (alpha_verbose_mcheck > 1) {
+		printk("%s t2_mcheck(cpu%d): last_taken 0x%x - "
+		       "any_expected 0x%x - code 0x%x\n",
+		       (mcheck_expected(cpu) ? "EX" : "UN"), cpu,
+		       t2_mcheck_last_taken, t2_mcheck_any_expected,
+		       (unsigned int)mchk_header->code);
+	}
+#endif
+
+	process_mcheck_info(vector, la_ptr, "T2", mcheck_expected(cpu));
+}
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
new file mode 100644
index 0000000000..77f5d68ed0
--- /dev/null
+++ b/arch/alpha/kernel/core_titan.c
@@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_titan.c
+ *
+ * Code common to all TITAN core logic chips.
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_titan.h>
+#undef __EXTERN_INLINE
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/memblock.h>
+
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+#include <asm/tlbflush.h>
+#include <asm/vga.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/* Save Titan configuration data as the console had it set up.  */
+
+struct
+{
+	unsigned long wsba[4];
+	unsigned long wsm[4];
+	unsigned long tba[4];
+} saved_config[4] __attribute__((common));
+
+/*
+ * Is PChip 1 present? No need to query it more than once.
+ */
+static int titan_pchip1_present;
+
+/*
+ * BIOS32-style PCI interface:
+ */
+
+#define DEBUG_CONFIG 0
+
+#if DEBUG_CONFIG
+# define DBG_CFG(args)	printk args
+#else
+# define DBG_CFG(args)
+#endif
+
+
+/*
+ * Routines to access TIG registers.
+ */
+static inline volatile unsigned long *
+mk_tig_addr(int offset)
+{
+	return (volatile unsigned long *)(TITAN_TIG_SPACE + (offset << 6));
+}
+
+static inline u8 
+titan_read_tig(int offset, u8 value)
+{
+	volatile unsigned long *tig_addr = mk_tig_addr(offset);
+	return (u8)(*tig_addr & 0xff);
+}
+
+static inline void 
+titan_write_tig(int offset, u8 value)
+{
+	volatile unsigned long *tig_addr = mk_tig_addr(offset);
+	*tig_addr = (unsigned long)value;
+}
+
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address
+ * accordingly.  It is therefore not safe to have concurrent
+ * invocations to configuration space access routines, but there
+ * really shouldn't be any need for this.
+ *
+ * Note that all config space accesses use Type 1 address format.
+ *
+ * Note also that type 1 is determined by non-zero bus number.
+ *
+ * Type 1:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:24	reserved
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., SCSI and Ethernet).
+ * 
+ *	The register selects a DWORD (32 bit) register offset.  Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
+	     unsigned long *pci_addr, unsigned char *type1)
+{
+	struct pci_controller *hose = pbus->sysdata;
+	unsigned long addr;
+	u8 bus = pbus->number;
+
+	DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, "
+		 "pci_addr=0x%p, type1=0x%p)\n",
+		 bus, device_fn, where, pci_addr, type1));
+
+	if (!pbus->parent) /* No parent means peer PCI bus. */
+		bus = 0;
+        *type1 = (bus != 0);
+
+        addr = (bus << 16) | (device_fn << 8) | where;
+	addr |= hose->config_space_base;
+		
+	*pci_addr = addr;
+	DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
+	return 0;
+}
+
+static int
+titan_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		  int size, u32 *value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		*value = __kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		*value = __kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*value = *(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int 
+titan_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+		   int size, u32 value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		__kernel_stb(value, *(vucp)addr);
+		mb();
+		__kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		__kernel_stw(value, *(vusp)addr);
+		mb();
+		__kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*(vuip)addr = value;
+		mb();
+		*(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops titan_pci_ops = 
+{
+	.read =		titan_read_config,
+	.write =	titan_write_config,
+};
+
+
+void
+titan_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	titan_pachip *pachip = 
+	  (hose->index & 1) ? TITAN_pachip1 : TITAN_pachip0;
+	titan_pachip_port *port;
+	volatile unsigned long *csr;
+	unsigned long value;
+
+	/* Get the right hose.  */
+	port = &pachip->g_port;
+	if (hose->index & 2) 
+		port = &pachip->a_port;
+
+	/* We can invalidate up to 8 tlb entries in a go.  The flush
+	   matches against <31:16> in the pci address.  
+	   Note that gtlbi* and atlbi* are in the same place in the g_port
+	   and a_port, respectively, so the g_port offset can be used
+	   even if hose is an a_port */
+	csr = &port->port_specific.g.gtlbia.csr;
+	if (((start ^ end) & 0xffff0000) == 0)
+		csr = &port->port_specific.g.gtlbiv.csr;
+
+	/* For TBIA, it doesn't matter what value we write.  For TBI, 
+	   it's the shifted tag bits.  */
+	value = (start & 0xffff0000) >> 12;
+
+	wmb();
+	*csr = value;
+	mb();
+	*csr;
+}
+
+static int
+titan_query_agp(titan_pachip_port *port)
+{
+	union TPAchipPCTL pctl;
+
+	/* set up APCTL */
+	pctl.pctl_q_whole = port->pctl.csr;
+
+	return pctl.pctl_r_bits.apctl_v_agp_present;
+
+}
+
+static void __init
+titan_init_one_pachip_port(titan_pachip_port *port, int index)
+{
+	struct pci_controller *hose;
+
+	hose = alloc_pci_controller();
+	if (index == 0)
+		pci_isa_hose = hose;
+	hose->io_space = alloc_resource();
+	hose->mem_space = alloc_resource();
+
+	/*
+	 * This is for userland consumption.  The 40-bit PIO bias that we 
+	 * use in the kernel through KSEG doesn't work in the page table 
+	 * based user mappings. (43-bit KSEG sign extends the physical
+	 * address from bit 40 to hit the I/O bit - mapped addresses don't).
+	 * So make sure we get the 43-bit PIO bias.  
+	 */
+	hose->sparse_mem_base = 0;
+	hose->sparse_io_base = 0;
+	hose->dense_mem_base
+	  = (TITAN_MEM(index) & 0xffffffffffUL) | 0x80000000000UL;
+	hose->dense_io_base
+	  = (TITAN_IO(index) & 0xffffffffffUL) | 0x80000000000UL;
+
+	hose->config_space_base = TITAN_CONF(index);
+	hose->index = index;
+
+	hose->io_space->start = TITAN_IO(index) - TITAN_IO_BIAS;
+	hose->io_space->end = hose->io_space->start + TITAN_IO_SPACE - 1;
+	hose->io_space->name = pci_io_names[index];
+	hose->io_space->flags = IORESOURCE_IO;
+
+	hose->mem_space->start = TITAN_MEM(index) - TITAN_MEM_BIAS;
+	hose->mem_space->end = hose->mem_space->start + 0xffffffff;
+	hose->mem_space->name = pci_mem_names[index];
+	hose->mem_space->flags = IORESOURCE_MEM;
+
+	if (request_resource(&ioport_resource, hose->io_space) < 0)
+		printk(KERN_ERR "Failed to request IO on hose %d\n", index);
+	if (request_resource(&iomem_resource, hose->mem_space) < 0)
+		printk(KERN_ERR "Failed to request MEM on hose %d\n", index);
+
+	/*
+	 * Save the existing PCI window translations.  SRM will 
+	 * need them when we go to reboot.
+	 */
+	saved_config[index].wsba[0] = port->wsba[0].csr;
+	saved_config[index].wsm[0]  = port->wsm[0].csr;
+	saved_config[index].tba[0]  = port->tba[0].csr;
+
+	saved_config[index].wsba[1] = port->wsba[1].csr;
+	saved_config[index].wsm[1]  = port->wsm[1].csr;
+	saved_config[index].tba[1]  = port->tba[1].csr;
+
+	saved_config[index].wsba[2] = port->wsba[2].csr;
+	saved_config[index].wsm[2]  = port->wsm[2].csr;
+	saved_config[index].tba[2]  = port->tba[2].csr;
+
+	saved_config[index].wsba[3] = port->wsba[3].csr;
+	saved_config[index].wsm[3]  = port->wsm[3].csr;
+	saved_config[index].tba[3]  = port->tba[3].csr;
+
+	/*
+	 * Set up the PCI to main memory translation windows.
+	 *
+	 * Note: Window 3 on Titan is Scatter-Gather ONLY.
+	 *
+	 * Window 0 is scatter-gather 8MB at 8MB (for isa)
+	 * Window 1 is direct access 1GB at 2GB
+	 * Window 2 is scatter-gather 1GB at 3GB
+	 */
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
+	hose->sg_isa->align_entry = 8; /* 64KB for ISA */
+
+	hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x40000000,
+				       SMP_CACHE_BYTES);
+	hose->sg_pci->align_entry = 4; /* Titan caches 4 PTEs at a time */
+
+	port->wsba[0].csr = hose->sg_isa->dma_base | 3;
+	port->wsm[0].csr  = (hose->sg_isa->size - 1) & 0xfff00000;
+	port->tba[0].csr  = virt_to_phys(hose->sg_isa->ptes);
+
+	port->wsba[1].csr = __direct_map_base | 1;
+	port->wsm[1].csr  = (__direct_map_size - 1) & 0xfff00000;
+	port->tba[1].csr  = 0;
+
+	port->wsba[2].csr = hose->sg_pci->dma_base | 3;
+	port->wsm[2].csr  = (hose->sg_pci->size - 1) & 0xfff00000;
+	port->tba[2].csr  = virt_to_phys(hose->sg_pci->ptes);
+
+	port->wsba[3].csr = 0;
+
+	/* Enable the Monster Window to make DAC pci64 possible.  */
+	port->pctl.csr |= pctl_m_mwin;
+
+	/*
+	 * If it's an AGP port, initialize agplastwr.
+	 */
+	if (titan_query_agp(port)) 
+		port->port_specific.a.agplastwr.csr = __direct_map_base;
+
+	titan_pci_tbi(hose, 0, -1);
+}
+
+static void __init
+titan_init_pachips(titan_pachip *pachip0, titan_pachip *pachip1)
+{
+	titan_pchip1_present = TITAN_cchip->csc.csr & 1L<<14;
+
+	/* Init the ports in hose order... */
+	titan_init_one_pachip_port(&pachip0->g_port, 0);	/* hose 0 */
+	if (titan_pchip1_present)
+		titan_init_one_pachip_port(&pachip1->g_port, 1);/* hose 1 */
+	titan_init_one_pachip_port(&pachip0->a_port, 2);	/* hose 2 */
+	if (titan_pchip1_present)
+		titan_init_one_pachip_port(&pachip1->a_port, 3);/* hose 3 */
+}
+
+void __init
+titan_init_arch(void)
+{
+#if 0
+	printk("%s: titan_init_arch()\n", __func__);
+	printk("%s: CChip registers:\n", __func__);
+	printk("%s: CSR_CSC 0x%lx\n", __func__, TITAN_cchip->csc.csr);
+	printk("%s: CSR_MTR 0x%lx\n", __func__, TITAN_cchip->mtr.csr);
+	printk("%s: CSR_MISC 0x%lx\n", __func__, TITAN_cchip->misc.csr);
+	printk("%s: CSR_DIM0 0x%lx\n", __func__, TITAN_cchip->dim0.csr);
+	printk("%s: CSR_DIM1 0x%lx\n", __func__, TITAN_cchip->dim1.csr);
+	printk("%s: CSR_DIR0 0x%lx\n", __func__, TITAN_cchip->dir0.csr);
+	printk("%s: CSR_DIR1 0x%lx\n", __func__, TITAN_cchip->dir1.csr);
+	printk("%s: CSR_DRIR 0x%lx\n", __func__, TITAN_cchip->drir.csr);
+
+	printk("%s: DChip registers:\n", __func__);
+	printk("%s: CSR_DSC 0x%lx\n", __func__, TITAN_dchip->dsc.csr);
+	printk("%s: CSR_STR 0x%lx\n", __func__, TITAN_dchip->str.csr);
+	printk("%s: CSR_DREV 0x%lx\n", __func__, TITAN_dchip->drev.csr);
+#endif
+
+	boot_cpuid = __hard_smp_processor_id();
+
+	/* With multiple PCI busses, we play with I/O as physical addrs.  */
+	ioport_resource.end = ~0UL;
+	iomem_resource.end = ~0UL;
+
+	/* PCI DMA Direct Mapping is 1GB at 2GB.  */
+	__direct_map_base = 0x80000000;
+	__direct_map_size = 0x40000000;
+
+	/* Init the PA chip(s).  */
+	titan_init_pachips(TITAN_pachip0, TITAN_pachip1);
+
+	/* Check for graphic console location (if any).  */
+	find_console_vga_hose();
+}
+
+static void
+titan_kill_one_pachip_port(titan_pachip_port *port, int index)
+{
+	port->wsba[0].csr = saved_config[index].wsba[0];
+	port->wsm[0].csr  = saved_config[index].wsm[0];
+	port->tba[0].csr  = saved_config[index].tba[0];
+
+	port->wsba[1].csr = saved_config[index].wsba[1];
+	port->wsm[1].csr  = saved_config[index].wsm[1];
+	port->tba[1].csr  = saved_config[index].tba[1];
+
+	port->wsba[2].csr = saved_config[index].wsba[2];
+	port->wsm[2].csr  = saved_config[index].wsm[2];
+	port->tba[2].csr  = saved_config[index].tba[2];
+
+	port->wsba[3].csr = saved_config[index].wsba[3];
+	port->wsm[3].csr  = saved_config[index].wsm[3];
+	port->tba[3].csr  = saved_config[index].tba[3];
+}
+
+static void
+titan_kill_pachips(titan_pachip *pachip0, titan_pachip *pachip1)
+{
+	if (titan_pchip1_present) {
+		titan_kill_one_pachip_port(&pachip1->g_port, 1);
+		titan_kill_one_pachip_port(&pachip1->a_port, 3);
+	}
+	titan_kill_one_pachip_port(&pachip0->g_port, 0);
+	titan_kill_one_pachip_port(&pachip0->a_port, 2);
+}
+
+void
+titan_kill_arch(int mode)
+{
+	titan_kill_pachips(TITAN_pachip0, TITAN_pachip1);
+}
+
+
+/*
+ * IO map support.
+ */
+
+void __iomem *
+titan_ioportmap(unsigned long addr)
+{
+	FIXUP_IOADDR_VGA(addr);
+	return (void __iomem *)(addr + TITAN_IO_BIAS);
+}
+
+
+void __iomem *
+titan_ioremap(unsigned long addr, unsigned long size)
+{
+	int h = (addr & TITAN_HOSE_MASK) >> TITAN_HOSE_SHIFT;
+	unsigned long baddr = addr & ~TITAN_HOSE_MASK;
+	unsigned long last = baddr + size - 1;
+	struct pci_controller *hose;	
+	struct vm_struct *area;
+	unsigned long vaddr;
+	unsigned long *ptes;
+	unsigned long pfn;
+
+#ifdef CONFIG_VGA_HOSE
+	/*
+	 * Adjust the address and hose, if necessary.
+	 */ 
+	if (pci_vga_hose && __is_mem_vga(addr)) {
+		h = pci_vga_hose->index;
+		addr += pci_vga_hose->mem_space->start;
+	}
+#endif
+
+	/*
+	 * Find the hose.
+	 */
+	for (hose = hose_head; hose; hose = hose->next)
+		if (hose->index == h)
+			break;
+	if (!hose)
+		return NULL;
+
+	/*
+	 * Is it direct-mapped?
+	 */
+	if ((baddr >= __direct_map_base) && 
+	    ((baddr + size - 1) < __direct_map_base + __direct_map_size)) {
+		vaddr = addr - __direct_map_base + TITAN_MEM_BIAS;
+		return (void __iomem *) vaddr;
+	}
+
+	/* 
+	 * Check the scatter-gather arena.
+	 */
+	if (hose->sg_pci &&
+	    baddr >= (unsigned long)hose->sg_pci->dma_base &&
+	    last < (unsigned long)hose->sg_pci->dma_base + hose->sg_pci->size){
+
+		/*
+		 * Adjust the limits (mappings must be page aligned)
+		 */
+		baddr -= hose->sg_pci->dma_base;
+		last -= hose->sg_pci->dma_base;
+		baddr &= PAGE_MASK;
+		size = PAGE_ALIGN(last) - baddr;
+
+		/*
+		 * Map it
+		 */
+		area = get_vm_area(size, VM_IOREMAP);
+		if (!area) {
+			printk("ioremap failed... no vm_area...\n");
+			return NULL;
+		}
+
+		ptes = hose->sg_pci->ptes;
+		for (vaddr = (unsigned long)area->addr; 
+		    baddr <= last; 
+		    baddr += PAGE_SIZE, vaddr += PAGE_SIZE) {
+			pfn = ptes[baddr >> PAGE_SHIFT];
+			if (!(pfn & 1)) {
+				printk("ioremap failed... pte not valid...\n");
+				vfree(area->addr);
+				return NULL;
+			}
+			pfn >>= 1;	/* make it a true pfn */
+			
+			if (__alpha_remap_area_pages(vaddr,
+						     pfn << PAGE_SHIFT, 
+						     PAGE_SIZE, 0)) {
+				printk("FAILED to remap_area_pages...\n");
+				vfree(area->addr);
+				return NULL;
+			}
+		}
+
+		flush_tlb_all();
+
+		vaddr = (unsigned long)area->addr + (addr & ~PAGE_MASK);
+		return (void __iomem *) vaddr;
+	}
+
+	/* Assume a legacy (read: VGA) address, and return appropriately. */
+	return (void __iomem *)(addr + TITAN_MEM_BIAS);
+}
+
+void
+titan_iounmap(volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+	if (addr >= VMALLOC_START)
+		vfree((void *)(PAGE_MASK & addr)); 
+}
+
+int
+titan_is_mmio(const volatile void __iomem *xaddr)
+{
+	unsigned long addr = (unsigned long) xaddr;
+
+	if (addr >= VMALLOC_START)
+		return 1;
+	else
+		return (addr & 0x100000000UL) == 0;
+}
+
+#ifndef CONFIG_ALPHA_GENERIC
+EXPORT_SYMBOL(titan_ioportmap);
+EXPORT_SYMBOL(titan_ioremap);
+EXPORT_SYMBOL(titan_iounmap);
+EXPORT_SYMBOL(titan_is_mmio);
+#endif
+
+/*
+ * AGP GART Support.
+ */
+#include <linux/agp_backend.h>
+#include <asm/agp_backend.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+
+struct titan_agp_aperture {
+	struct pci_iommu_arena *arena;
+	long pg_start;
+	long pg_count;
+};
+
+static int
+titan_agp_setup(alpha_agp_info *agp)
+{
+	struct titan_agp_aperture *aper;
+
+	if (!alpha_agpgart_size)
+		return -ENOMEM;
+
+	aper = kmalloc(sizeof(struct titan_agp_aperture), GFP_KERNEL);
+	if (aper == NULL)
+		return -ENOMEM;
+
+	aper->arena = agp->hose->sg_pci;
+	aper->pg_count = alpha_agpgart_size / PAGE_SIZE;
+	aper->pg_start = iommu_reserve(aper->arena, aper->pg_count,
+				       aper->pg_count - 1);
+	if (aper->pg_start < 0) {
+		printk(KERN_ERR "Failed to reserve AGP memory\n");
+		kfree(aper);
+		return -ENOMEM;
+	}
+
+	agp->aperture.bus_base = 
+		aper->arena->dma_base + aper->pg_start * PAGE_SIZE;
+	agp->aperture.size = aper->pg_count * PAGE_SIZE;
+	agp->aperture.sysdata = aper;
+
+	return 0;
+}
+
+static void
+titan_agp_cleanup(alpha_agp_info *agp)
+{
+	struct titan_agp_aperture *aper = agp->aperture.sysdata;
+	int status;
+
+	status = iommu_release(aper->arena, aper->pg_start, aper->pg_count);
+	if (status == -EBUSY) {
+		printk(KERN_WARNING 
+		       "Attempted to release bound AGP memory - unbinding\n");
+		iommu_unbind(aper->arena, aper->pg_start, aper->pg_count);
+		status = iommu_release(aper->arena, aper->pg_start, 
+				       aper->pg_count);
+	}
+	if (status < 0)
+		printk(KERN_ERR "Failed to release AGP memory\n");
+
+	kfree(aper);
+	kfree(agp);
+}
+
+static int
+titan_agp_configure(alpha_agp_info *agp)
+{
+	union TPAchipPCTL pctl;
+	titan_pachip_port *port = agp->private;
+	pctl.pctl_q_whole = port->pctl.csr;
+
+	/* Side-Band Addressing? */
+	pctl.pctl_r_bits.apctl_v_agp_sba_en = agp->mode.bits.sba;
+
+	/* AGP Rate? */
+	pctl.pctl_r_bits.apctl_v_agp_rate = 0;		/* 1x */
+	if (agp->mode.bits.rate & 2) 
+		pctl.pctl_r_bits.apctl_v_agp_rate = 1;	/* 2x */
+#if 0
+	if (agp->mode.bits.rate & 4) 
+		pctl.pctl_r_bits.apctl_v_agp_rate = 2;	/* 4x */
+#endif
+	
+	/* RQ Depth? */
+	pctl.pctl_r_bits.apctl_v_agp_hp_rd = 2;
+	pctl.pctl_r_bits.apctl_v_agp_lp_rd = 7;
+
+	/*
+	 * AGP Enable.
+	 */
+	pctl.pctl_r_bits.apctl_v_agp_en = agp->mode.bits.enable;
+
+	/* Tell the user.  */
+	printk("Enabling AGP: %dX%s\n", 
+	       1 << pctl.pctl_r_bits.apctl_v_agp_rate,
+	       pctl.pctl_r_bits.apctl_v_agp_sba_en ? " - SBA" : "");
+	       
+	/* Write it.  */
+	port->pctl.csr = pctl.pctl_q_whole;
+	
+	/* And wait at least 5000 66MHz cycles (per Titan spec).  */
+	udelay(100);
+
+	return 0;
+}
+
+static int 
+titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *mem)
+{
+	struct titan_agp_aperture *aper = agp->aperture.sysdata;
+	return iommu_bind(aper->arena, aper->pg_start + pg_start, 
+			  mem->page_count, mem->pages);
+}
+
+static int 
+titan_agp_unbind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *mem)
+{
+	struct titan_agp_aperture *aper = agp->aperture.sysdata;
+	return iommu_unbind(aper->arena, aper->pg_start + pg_start,
+			    mem->page_count);
+}
+
+static unsigned long
+titan_agp_translate(alpha_agp_info *agp, dma_addr_t addr)
+{
+	struct titan_agp_aperture *aper = agp->aperture.sysdata;
+	unsigned long baddr = addr - aper->arena->dma_base;
+	unsigned long pte;
+
+	if (addr < agp->aperture.bus_base ||
+	    addr >= agp->aperture.bus_base + agp->aperture.size) {
+		printk("%s: addr out of range\n", __func__);
+		return -EINVAL;
+	}
+
+	pte = aper->arena->ptes[baddr >> PAGE_SHIFT];
+	if (!(pte & 1)) {
+		printk("%s: pte not valid\n", __func__);
+		return -EINVAL;
+	}
+
+	return (pte >> 1) << PAGE_SHIFT;
+}
+
+struct alpha_agp_ops titan_agp_ops =
+{
+	.setup		= titan_agp_setup,
+	.cleanup	= titan_agp_cleanup,
+	.configure	= titan_agp_configure,
+	.bind		= titan_agp_bind_memory,
+	.unbind		= titan_agp_unbind_memory,
+	.translate	= titan_agp_translate
+};
+
+alpha_agp_info *
+titan_agp_info(void)
+{
+	alpha_agp_info *agp;
+	struct pci_controller *hose;
+	titan_pachip_port *port;
+	int hosenum = -1;
+	union TPAchipPCTL pctl;
+
+	/*
+	 * Find the AGP port.
+	 */
+	port = &TITAN_pachip0->a_port;
+	if (titan_query_agp(port))
+		hosenum = 2;
+	if (hosenum < 0 && 
+	    titan_pchip1_present &&
+	    titan_query_agp(port = &TITAN_pachip1->a_port)) 
+		hosenum = 3;
+	
+	/*
+	 * Find the hose the port is on.
+	 */
+	for (hose = hose_head; hose; hose = hose->next)
+		if (hose->index == hosenum)
+			break;
+
+	if (!hose || !hose->sg_pci)
+		return NULL;
+
+	/*
+	 * Allocate the info structure.
+	 */
+	agp = kmalloc(sizeof(*agp), GFP_KERNEL);
+	if (!agp)
+		return NULL;
+
+	/*
+	 * Fill it in.
+	 */
+	agp->hose = hose;
+	agp->private = port;
+	agp->ops = &titan_agp_ops;
+
+	/*
+	 * Aperture - not configured until ops.setup().
+	 *
+	 * FIXME - should we go ahead and allocate it here?
+	 */
+	agp->aperture.bus_base = 0;
+	agp->aperture.size = 0;
+	agp->aperture.sysdata = NULL;
+
+	/*
+	 * Capabilities.
+	 */
+	agp->capability.lw = 0;
+	agp->capability.bits.rate = 3; 	/* 2x, 1x */
+	agp->capability.bits.sba = 1;
+	agp->capability.bits.rq = 7;	/* 8 - 1 */
+
+	/*
+	 * Mode.
+	 */
+	pctl.pctl_q_whole = port->pctl.csr;
+	agp->mode.lw = 0;
+	agp->mode.bits.rate = 1 << pctl.pctl_r_bits.apctl_v_agp_rate;
+	agp->mode.bits.sba = pctl.pctl_r_bits.apctl_v_agp_sba_en;
+	agp->mode.bits.rq = 7;	/* RQ Depth? */
+	agp->mode.bits.enable = pctl.pctl_r_bits.apctl_v_agp_en;
+
+	return agp;
+}
diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c
new file mode 100644
index 0000000000..fc1ab73f23
--- /dev/null
+++ b/arch/alpha/kernel/core_tsunami.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/core_tsunami.c
+ *
+ * Based on code written by David A. Rusling (david.rusling@reo.mts.dec.com).
+ *
+ * Code common to all TSUNAMI core logic chips.
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_tsunami.h>
+#undef __EXTERN_INLINE
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+#include <asm/vga.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/* Save Tsunami configuration data as the console had it set up.  */
+
+struct 
+{
+	unsigned long wsba[4];
+	unsigned long wsm[4];
+	unsigned long tba[4];
+} saved_config[2] __attribute__((common));
+
+/*
+ * NOTE: Herein lie back-to-back mb instructions.  They are magic. 
+ * One plausible explanation is that the I/O controller does not properly
+ * handle the system transaction.  Another involves timing.  Ho hum.
+ */
+
+/*
+ * BIOS32-style PCI interface:
+ */
+
+#define DEBUG_CONFIG 0
+
+#if DEBUG_CONFIG
+# define DBG_CFG(args)	printk args
+#else
+# define DBG_CFG(args)
+#endif
+
+
+/*
+ * Given a bus, device, and function number, compute resulting
+ * configuration space address
+ * accordingly.  It is therefore not safe to have concurrent
+ * invocations to configuration space access routines, but there
+ * really shouldn't be any need for this.
+ *
+ * Note that all config space accesses use Type 1 address format.
+ *
+ * Note also that type 1 is determined by non-zero bus number.
+ *
+ * Type 1:
+ *
+ *  3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1 
+ *  3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	31:24	reserved
+ *	23:16	bus number (8 bits = 128 possible buses)
+ *	15:11	Device number (5 bits)
+ *	10:8	function number
+ *	 7:2	register number
+ *  
+ * Notes:
+ *	The function number selects which function of a multi-function device 
+ *	(e.g., SCSI and Ethernet).
+ * 
+ *	The register selects a DWORD (32 bit) register offset.  Hence it
+ *	doesn't get shifted by 2 bits as we want to "drop" the bottom two
+ *	bits.
+ */
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
+	     unsigned long *pci_addr, unsigned char *type1)
+{
+	struct pci_controller *hose = pbus->sysdata;
+	unsigned long addr;
+	u8 bus = pbus->number;
+
+	DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, "
+		 "pci_addr=0x%p, type1=0x%p)\n",
+		 bus, device_fn, where, pci_addr, type1));
+	
+	if (!pbus->parent) /* No parent means peer PCI bus. */
+		bus = 0;
+	*type1 = (bus != 0);
+
+	addr = (bus << 16) | (device_fn << 8) | where;
+	addr |= hose->config_space_base;
+		
+	*pci_addr = addr;
+	DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
+	return 0;
+}
+
+static int 
+tsunami_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 *value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		*value = __kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		*value = __kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*value = *(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int 
+tsunami_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		__kernel_stb(value, *(vucp)addr);
+		mb();
+		__kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		__kernel_stw(value, *(vusp)addr);
+		mb();
+		__kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*(vuip)addr = value;
+		mb();
+		*(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops tsunami_pci_ops = 
+{
+	.read =		tsunami_read_config,
+	.write = 	tsunami_write_config,
+};
+
+void
+tsunami_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	tsunami_pchip *pchip = hose->index ? TSUNAMI_pchip1 : TSUNAMI_pchip0;
+	volatile unsigned long *csr;
+	unsigned long value;
+
+	/* We can invalidate up to 8 tlb entries in a go.  The flush
+	   matches against <31:16> in the pci address.  */
+	csr = &pchip->tlbia.csr;
+	if (((start ^ end) & 0xffff0000) == 0)
+		csr = &pchip->tlbiv.csr;
+
+	/* For TBIA, it doesn't matter what value we write.  For TBI, 
+	   it's the shifted tag bits.  */
+	value = (start & 0xffff0000) >> 12;
+
+	*csr = value;
+	mb();
+	*csr;
+}
+
+#ifdef NXM_MACHINE_CHECKS_ON_TSUNAMI
+static long __init
+tsunami_probe_read(volatile unsigned long *vaddr)
+{
+	long dont_care, probe_result;
+	int cpu = smp_processor_id();
+	int s = swpipl(IPL_MCHECK - 1);
+
+	mcheck_taken(cpu) = 0;
+	mcheck_expected(cpu) = 1;
+	mb();
+	dont_care = *vaddr;
+	draina();
+	mcheck_expected(cpu) = 0;
+	probe_result = !mcheck_taken(cpu);
+	mcheck_taken(cpu) = 0;
+	setipl(s);
+
+	printk("dont_care == 0x%lx\n", dont_care);
+
+	return probe_result;
+}
+
+static long __init
+tsunami_probe_write(volatile unsigned long *vaddr)
+{
+	long true_contents, probe_result = 1;
+
+	TSUNAMI_cchip->misc.csr |= (1L << 28); /* clear NXM... */
+	true_contents = *vaddr;
+	*vaddr = 0;
+	draina();
+	if (TSUNAMI_cchip->misc.csr & (1L << 28)) {
+		int source = (TSUNAMI_cchip->misc.csr >> 29) & 7;
+		TSUNAMI_cchip->misc.csr |= (1L << 28); /* ...and unlock NXS. */
+		probe_result = 0;
+		printk("tsunami_probe_write: unit %d at 0x%016lx\n", source,
+		       (unsigned long)vaddr);
+	}
+	if (probe_result)
+		*vaddr = true_contents;
+	return probe_result;
+}
+#else
+#define tsunami_probe_read(ADDR) 1
+#endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */
+
+static void __init
+tsunami_init_one_pchip(tsunami_pchip *pchip, int index)
+{
+	struct pci_controller *hose;
+
+	if (tsunami_probe_read(&pchip->pctl.csr) == 0)
+		return;
+
+	hose = alloc_pci_controller();
+	if (index == 0)
+		pci_isa_hose = hose;
+	hose->io_space = alloc_resource();
+	hose->mem_space = alloc_resource();
+
+	/* This is for userland consumption.  For some reason, the 40-bit
+	   PIO bias that we use in the kernel through KSEG didn't work for
+	   the page table based user mappings.  So make sure we get the
+	   43-bit PIO bias.  */
+	hose->sparse_mem_base = 0;
+	hose->sparse_io_base = 0;
+	hose->dense_mem_base
+	  = (TSUNAMI_MEM(index) & 0xffffffffffL) | 0x80000000000L;
+	hose->dense_io_base
+	  = (TSUNAMI_IO(index) & 0xffffffffffL) | 0x80000000000L;
+
+	hose->config_space_base = TSUNAMI_CONF(index);
+	hose->index = index;
+
+	hose->io_space->start = TSUNAMI_IO(index) - TSUNAMI_IO_BIAS;
+	hose->io_space->end = hose->io_space->start + TSUNAMI_IO_SPACE - 1;
+	hose->io_space->name = pci_io_names[index];
+	hose->io_space->flags = IORESOURCE_IO;
+
+	hose->mem_space->start = TSUNAMI_MEM(index) - TSUNAMI_MEM_BIAS;
+	hose->mem_space->end = hose->mem_space->start + 0xffffffff;
+	hose->mem_space->name = pci_mem_names[index];
+	hose->mem_space->flags = IORESOURCE_MEM;
+
+	if (request_resource(&ioport_resource, hose->io_space) < 0)
+		printk(KERN_ERR "Failed to request IO on hose %d\n", index);
+	if (request_resource(&iomem_resource, hose->mem_space) < 0)
+		printk(KERN_ERR "Failed to request MEM on hose %d\n", index);
+
+	/*
+	 * Save the existing PCI window translations.  SRM will 
+	 * need them when we go to reboot.
+	 */
+
+	saved_config[index].wsba[0] = pchip->wsba[0].csr;
+	saved_config[index].wsm[0] = pchip->wsm[0].csr;
+	saved_config[index].tba[0] = pchip->tba[0].csr;
+
+	saved_config[index].wsba[1] = pchip->wsba[1].csr;
+	saved_config[index].wsm[1] = pchip->wsm[1].csr;
+	saved_config[index].tba[1] = pchip->tba[1].csr;
+
+	saved_config[index].wsba[2] = pchip->wsba[2].csr;
+	saved_config[index].wsm[2] = pchip->wsm[2].csr;
+	saved_config[index].tba[2] = pchip->tba[2].csr;
+
+	saved_config[index].wsba[3] = pchip->wsba[3].csr;
+	saved_config[index].wsm[3] = pchip->wsm[3].csr;
+	saved_config[index].tba[3] = pchip->tba[3].csr;
+
+	/*
+	 * Set up the PCI to main memory translation windows.
+	 *
+	 * Note: Window 3 is scatter-gather only
+	 * 
+	 * Window 0 is scatter-gather 8MB at 8MB (for isa)
+	 * Window 1 is scatter-gather (up to) 1GB at 1GB
+	 * Window 2 is direct access 2GB at 2GB
+	 *
+	 * NOTE: we need the align_entry settings for Acer devices on ES40,
+	 * specifically floppy and IDE when memory is larger than 2GB.
+	 */
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
+	/* Initially set for 4 PTEs, but will be overridden to 64K for ISA. */
+        hose->sg_isa->align_entry = 4;
+
+	hose->sg_pci = iommu_arena_new(hose, 0x40000000,
+				       size_for_memory(0x40000000),
+				       SMP_CACHE_BYTES);
+        hose->sg_pci->align_entry = 4; /* Tsunami caches 4 PTEs at a time */
+
+	__direct_map_base = 0x80000000;
+	__direct_map_size = 0x80000000;
+
+	pchip->wsba[0].csr = hose->sg_isa->dma_base | 3;
+	pchip->wsm[0].csr  = (hose->sg_isa->size - 1) & 0xfff00000;
+	pchip->tba[0].csr  = virt_to_phys(hose->sg_isa->ptes);
+
+	pchip->wsba[1].csr = hose->sg_pci->dma_base | 3;
+	pchip->wsm[1].csr  = (hose->sg_pci->size - 1) & 0xfff00000;
+	pchip->tba[1].csr  = virt_to_phys(hose->sg_pci->ptes);
+
+	pchip->wsba[2].csr = 0x80000000 | 1;
+	pchip->wsm[2].csr  = (0x80000000 - 1) & 0xfff00000;
+	pchip->tba[2].csr  = 0;
+
+	pchip->wsba[3].csr = 0;
+
+	/* Enable the Monster Window to make DAC pci64 possible. */
+	pchip->pctl.csr |= pctl_m_mwin;
+
+	tsunami_pci_tbi(hose, 0, -1);
+}
+
+
+void __iomem *
+tsunami_ioportmap(unsigned long addr)
+{
+	FIXUP_IOADDR_VGA(addr);
+	return (void __iomem *)(addr + TSUNAMI_IO_BIAS);
+}
+
+void __iomem *
+tsunami_ioremap(unsigned long addr, unsigned long size)
+{
+	FIXUP_MEMADDR_VGA(addr);
+	return (void __iomem *)(addr + TSUNAMI_MEM_BIAS);
+}
+
+#ifndef CONFIG_ALPHA_GENERIC
+EXPORT_SYMBOL(tsunami_ioportmap);
+EXPORT_SYMBOL(tsunami_ioremap);
+#endif
+
+void __init
+tsunami_init_arch(void)
+{
+#ifdef NXM_MACHINE_CHECKS_ON_TSUNAMI
+	unsigned long tmp;
+	
+	/* Ho hum.. init_arch is called before init_IRQ, but we need to be
+	   able to handle machine checks.  So install the handler now.  */
+	wrent(entInt, 0);
+
+	/* NXMs just don't matter to Tsunami--unless they make it
+	   choke completely. */
+	tmp = (unsigned long)(TSUNAMI_cchip - 1);
+	printk("%s: probing bogus address:  0x%016lx\n", __func__, bogus_addr);
+	printk("\tprobe %s\n",
+	       tsunami_probe_write((unsigned long *)bogus_addr)
+	       ? "succeeded" : "failed");
+#endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */
+
+#if 0
+	printk("%s: CChip registers:\n", __func__);
+	printk("%s: CSR_CSC 0x%lx\n", __func__, TSUNAMI_cchip->csc.csr);
+	printk("%s: CSR_MTR 0x%lx\n", __func__, TSUNAMI_cchip.mtr.csr);
+	printk("%s: CSR_MISC 0x%lx\n", __func__, TSUNAMI_cchip->misc.csr);
+	printk("%s: CSR_DIM0 0x%lx\n", __func__, TSUNAMI_cchip->dim0.csr);
+	printk("%s: CSR_DIM1 0x%lx\n", __func__, TSUNAMI_cchip->dim1.csr);
+	printk("%s: CSR_DIR0 0x%lx\n", __func__, TSUNAMI_cchip->dir0.csr);
+	printk("%s: CSR_DIR1 0x%lx\n", __func__, TSUNAMI_cchip->dir1.csr);
+	printk("%s: CSR_DRIR 0x%lx\n", __func__, TSUNAMI_cchip->drir.csr);
+
+	printk("%s: DChip registers:\n");
+	printk("%s: CSR_DSC 0x%lx\n", __func__, TSUNAMI_dchip->dsc.csr);
+	printk("%s: CSR_STR 0x%lx\n", __func__, TSUNAMI_dchip->str.csr);
+	printk("%s: CSR_DREV 0x%lx\n", __func__, TSUNAMI_dchip->drev.csr);
+#endif
+	/* With multiple PCI busses, we play with I/O as physical addrs.  */
+	ioport_resource.end = ~0UL;
+
+	/* Find how many hoses we have, and initialize them.  TSUNAMI
+	   and TYPHOON can have 2, but might only have 1 (DS10).  */
+
+	tsunami_init_one_pchip(TSUNAMI_pchip0, 0);
+	if (TSUNAMI_cchip->csc.csr & 1L<<14)
+		tsunami_init_one_pchip(TSUNAMI_pchip1, 1);
+
+	/* Check for graphic console location (if any).  */
+	find_console_vga_hose();
+}
+
+static void
+tsunami_kill_one_pchip(tsunami_pchip *pchip, int index)
+{
+	pchip->wsba[0].csr = saved_config[index].wsba[0];
+	pchip->wsm[0].csr = saved_config[index].wsm[0];
+	pchip->tba[0].csr = saved_config[index].tba[0];
+
+	pchip->wsba[1].csr = saved_config[index].wsba[1];
+	pchip->wsm[1].csr = saved_config[index].wsm[1];
+	pchip->tba[1].csr = saved_config[index].tba[1];
+
+	pchip->wsba[2].csr = saved_config[index].wsba[2];
+	pchip->wsm[2].csr = saved_config[index].wsm[2];
+	pchip->tba[2].csr = saved_config[index].tba[2];
+
+	pchip->wsba[3].csr = saved_config[index].wsba[3];
+	pchip->wsm[3].csr = saved_config[index].wsm[3];
+	pchip->tba[3].csr = saved_config[index].tba[3];
+}
+
+void
+tsunami_kill_arch(int mode)
+{
+	tsunami_kill_one_pchip(TSUNAMI_pchip0, 0);
+	if (TSUNAMI_cchip->csc.csr & 1L<<14)
+		tsunami_kill_one_pchip(TSUNAMI_pchip1, 1);
+}
+
+static inline void
+tsunami_pci_clr_err_1(tsunami_pchip *pchip)
+{
+	pchip->perror.csr;
+	pchip->perror.csr = 0x040;
+	mb();
+	pchip->perror.csr;
+}
+
+static inline void
+tsunami_pci_clr_err(void)
+{
+	tsunami_pci_clr_err_1(TSUNAMI_pchip0);
+
+	/* TSUNAMI and TYPHOON can have 2, but might only have 1 (DS10) */
+	if (TSUNAMI_cchip->csc.csr & 1L<<14)
+		tsunami_pci_clr_err_1(TSUNAMI_pchip1);
+}
+
+void
+tsunami_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	/* Clear error before any reporting.  */
+	mb();
+	mb();  /* magic */
+	draina();
+	tsunami_pci_clr_err();
+	wrmces(0x7);
+	mb();
+
+	process_mcheck_info(vector, la_ptr, "TSUNAMI",
+			    mcheck_expected(smp_processor_id()));
+}
diff --git a/arch/alpha/kernel/core_wildfire.c b/arch/alpha/kernel/core_wildfire.c
new file mode 100644
index 0000000000..3a804b67f9
--- /dev/null
+++ b/arch/alpha/kernel/core_wildfire.c
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/kernel/core_wildfire.c
+ *
+ *  Wildfire support.
+ *
+ *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ */
+
+#define __EXTERN_INLINE inline
+#include <asm/io.h>
+#include <asm/core_wildfire.h>
+#undef __EXTERN_INLINE
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+#define DEBUG_CONFIG 0
+#define DEBUG_DUMP_REGS 0
+#define DEBUG_DUMP_CONFIG 1
+
+#if DEBUG_CONFIG
+# define DBG_CFG(args)	printk args
+#else
+# define DBG_CFG(args)
+#endif
+
+#if DEBUG_DUMP_REGS
+static void wildfire_dump_pci_regs(int qbbno, int hoseno);
+static void wildfire_dump_pca_regs(int qbbno, int pcano);
+static void wildfire_dump_qsa_regs(int qbbno);
+static void wildfire_dump_qsd_regs(int qbbno);
+static void wildfire_dump_iop_regs(int qbbno);
+static void wildfire_dump_gp_regs(int qbbno);
+#endif
+#if DEBUG_DUMP_CONFIG
+static void wildfire_dump_hardware_config(void);
+#endif
+
+unsigned char wildfire_hard_qbb_map[WILDFIRE_MAX_QBB];
+unsigned char wildfire_soft_qbb_map[WILDFIRE_MAX_QBB];
+#define QBB_MAP_EMPTY	0xff
+
+unsigned long wildfire_hard_qbb_mask;
+unsigned long wildfire_soft_qbb_mask;
+unsigned long wildfire_gp_mask;
+unsigned long wildfire_hs_mask;
+unsigned long wildfire_iop_mask;
+unsigned long wildfire_ior_mask;
+unsigned long wildfire_pca_mask;
+unsigned long wildfire_cpu_mask;
+unsigned long wildfire_mem_mask;
+
+void __init
+wildfire_init_hose(int qbbno, int hoseno)
+{
+	struct pci_controller *hose;
+	wildfire_pci *pci;
+
+	hose = alloc_pci_controller();
+	hose->io_space = alloc_resource();
+	hose->mem_space = alloc_resource();
+
+        /* This is for userland consumption. */
+        hose->sparse_mem_base = 0;
+        hose->sparse_io_base  = 0;
+        hose->dense_mem_base  = WILDFIRE_MEM(qbbno, hoseno);
+        hose->dense_io_base   = WILDFIRE_IO(qbbno, hoseno);
+
+	hose->config_space_base = WILDFIRE_CONF(qbbno, hoseno);
+	hose->index = (qbbno << 3) + hoseno;
+
+	hose->io_space->start = WILDFIRE_IO(qbbno, hoseno) - WILDFIRE_IO_BIAS;
+	hose->io_space->end = hose->io_space->start + WILDFIRE_IO_SPACE - 1;
+	hose->io_space->name = pci_io_names[hoseno];
+	hose->io_space->flags = IORESOURCE_IO;
+
+	hose->mem_space->start = WILDFIRE_MEM(qbbno, hoseno)-WILDFIRE_MEM_BIAS;
+	hose->mem_space->end = hose->mem_space->start + 0xffffffff;
+	hose->mem_space->name = pci_mem_names[hoseno];
+	hose->mem_space->flags = IORESOURCE_MEM;
+
+	if (request_resource(&ioport_resource, hose->io_space) < 0)
+		printk(KERN_ERR "Failed to request IO on qbb %d hose %d\n",
+		       qbbno, hoseno);
+	if (request_resource(&iomem_resource, hose->mem_space) < 0)
+		printk(KERN_ERR "Failed to request MEM on qbb %d hose %d\n",
+		       qbbno, hoseno);
+
+#if DEBUG_DUMP_REGS
+	wildfire_dump_pci_regs(qbbno, hoseno);
+#endif
+
+        /*
+         * Set up the PCI to main memory translation windows.
+         *
+         * Note: Window 3 is scatter-gather only
+         * 
+         * Window 0 is scatter-gather 8MB at 8MB (for isa)
+	 * Window 1 is direct access 1GB at 1GB
+	 * Window 2 is direct access 1GB at 2GB
+         * Window 3 is scatter-gather 128MB at 3GB
+         * ??? We ought to scale window 3 memory.
+         *
+         */
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
+	hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000,
+				       SMP_CACHE_BYTES);
+
+	pci = WILDFIRE_pci(qbbno, hoseno);
+
+	pci->pci_window[0].wbase.csr = hose->sg_isa->dma_base | 3;
+	pci->pci_window[0].wmask.csr = (hose->sg_isa->size - 1) & 0xfff00000;
+	pci->pci_window[0].tbase.csr = virt_to_phys(hose->sg_isa->ptes);
+
+	pci->pci_window[1].wbase.csr = 0x40000000 | 1;
+	pci->pci_window[1].wmask.csr = (0x40000000 -1) & 0xfff00000;
+	pci->pci_window[1].tbase.csr = 0;
+
+	pci->pci_window[2].wbase.csr = 0x80000000 | 1;
+	pci->pci_window[2].wmask.csr = (0x40000000 -1) & 0xfff00000;
+	pci->pci_window[2].tbase.csr = 0x40000000;
+
+	pci->pci_window[3].wbase.csr = hose->sg_pci->dma_base | 3;
+	pci->pci_window[3].wmask.csr = (hose->sg_pci->size - 1) & 0xfff00000;
+	pci->pci_window[3].tbase.csr = virt_to_phys(hose->sg_pci->ptes);
+
+	wildfire_pci_tbi(hose, 0, 0); /* Flush TLB at the end. */
+}
+
+void __init
+wildfire_init_pca(int qbbno, int pcano)
+{
+
+	/* Test for PCA existence first. */
+	if (!WILDFIRE_PCA_EXISTS(qbbno, pcano))
+	    return;
+
+#if DEBUG_DUMP_REGS
+	wildfire_dump_pca_regs(qbbno, pcano);
+#endif
+
+	/* Do both hoses of the PCA. */
+	wildfire_init_hose(qbbno, (pcano << 1) + 0);
+	wildfire_init_hose(qbbno, (pcano << 1) + 1);
+}
+
+void __init
+wildfire_init_qbb(int qbbno)
+{
+	int pcano;
+
+	/* Test for QBB existence first. */
+	if (!WILDFIRE_QBB_EXISTS(qbbno))
+		return;
+
+#if DEBUG_DUMP_REGS
+	wildfire_dump_qsa_regs(qbbno);
+	wildfire_dump_qsd_regs(qbbno);
+	wildfire_dump_iop_regs(qbbno);
+	wildfire_dump_gp_regs(qbbno);
+#endif
+
+	/* Init all PCAs here. */
+	for (pcano = 0; pcano < WILDFIRE_PCA_PER_QBB; pcano++) {
+		wildfire_init_pca(qbbno, pcano);
+	}
+}
+
+void __init
+wildfire_hardware_probe(void)
+{
+	unsigned long temp;
+	unsigned int hard_qbb, soft_qbb;
+	wildfire_fast_qsd *fast = WILDFIRE_fast_qsd();
+	wildfire_qsd *qsd;
+	wildfire_qsa *qsa;
+	wildfire_iop *iop;
+	wildfire_gp *gp;
+	wildfire_ne *ne;
+	wildfire_fe *fe;
+	int i;
+
+	temp = fast->qsd_whami.csr;
+#if 0
+	printk(KERN_ERR "fast QSD_WHAMI at base %p is 0x%lx\n", fast, temp);
+#endif
+
+	hard_qbb = (temp >> 8) & 7;
+	soft_qbb = (temp >> 4) & 7;
+
+	/* Init the HW configuration variables. */
+	wildfire_hard_qbb_mask = (1 << hard_qbb);
+	wildfire_soft_qbb_mask = (1 << soft_qbb);
+
+	wildfire_gp_mask = 0;
+	wildfire_hs_mask = 0;
+	wildfire_iop_mask = 0;
+	wildfire_ior_mask = 0;
+	wildfire_pca_mask = 0;
+
+	wildfire_cpu_mask = 0;
+	wildfire_mem_mask = 0;
+
+	memset(wildfire_hard_qbb_map, QBB_MAP_EMPTY, WILDFIRE_MAX_QBB);
+	memset(wildfire_soft_qbb_map, QBB_MAP_EMPTY, WILDFIRE_MAX_QBB);
+
+	/* First, determine which QBBs are present. */
+	qsa = WILDFIRE_qsa(soft_qbb);
+
+	temp = qsa->qsa_qbb_id.csr;
+#if 0
+	printk(KERN_ERR "QSA_QBB_ID at base %p is 0x%lx\n", qsa, temp);
+#endif
+
+	if (temp & 0x40) /* Is there an HS? */
+		wildfire_hs_mask = 1;
+
+	if (temp & 0x20) { /* Is there a GP? */
+		gp = WILDFIRE_gp(soft_qbb);
+		temp = 0;
+		for (i = 0; i < 4; i++) {
+			temp |= gp->gpa_qbb_map[i].csr << (i * 8);
+#if 0
+			printk(KERN_ERR "GPA_QBB_MAP[%d] at base %p is 0x%lx\n",
+			       i, gp, temp);
+#endif
+		}
+
+		for (hard_qbb = 0; hard_qbb < WILDFIRE_MAX_QBB; hard_qbb++) {
+			if (temp & 8) { /* Is there a QBB? */
+				soft_qbb = temp & 7;
+				wildfire_hard_qbb_mask |= (1 << hard_qbb);
+				wildfire_soft_qbb_mask |= (1 << soft_qbb);
+			}
+			temp >>= 4;
+		}
+		wildfire_gp_mask = wildfire_soft_qbb_mask;
+        }
+
+	/* Next determine each QBBs resources. */
+	for (soft_qbb = 0; soft_qbb < WILDFIRE_MAX_QBB; soft_qbb++) {
+	    if (WILDFIRE_QBB_EXISTS(soft_qbb)) {
+	        qsd = WILDFIRE_qsd(soft_qbb);
+		temp = qsd->qsd_whami.csr;
+#if 0
+	printk(KERN_ERR "QSD_WHAMI at base %p is 0x%lx\n", qsd, temp);
+#endif
+		hard_qbb = (temp >> 8) & 7;
+		wildfire_hard_qbb_map[hard_qbb] = soft_qbb;
+		wildfire_soft_qbb_map[soft_qbb] = hard_qbb;
+
+		qsa = WILDFIRE_qsa(soft_qbb);
+		temp = qsa->qsa_qbb_pop[0].csr;
+#if 0
+	printk(KERN_ERR "QSA_QBB_POP_0 at base %p is 0x%lx\n", qsa, temp);
+#endif
+		wildfire_cpu_mask |= ((temp >> 0) & 0xf) << (soft_qbb << 2);
+		wildfire_mem_mask |= ((temp >> 4) & 0xf) << (soft_qbb << 2);
+
+		temp = qsa->qsa_qbb_pop[1].csr;
+#if 0
+	printk(KERN_ERR "QSA_QBB_POP_1 at base %p is 0x%lx\n", qsa, temp);
+#endif
+		wildfire_iop_mask |= (1 << soft_qbb);
+		wildfire_ior_mask |= ((temp >> 4) & 0xf) << (soft_qbb << 2);
+
+		temp = qsa->qsa_qbb_id.csr;
+#if 0
+	printk(KERN_ERR "QSA_QBB_ID at %p is 0x%lx\n", qsa, temp);
+#endif
+		if (temp & 0x20)
+		    wildfire_gp_mask |= (1 << soft_qbb);
+
+		/* Probe for PCA existence here. */
+		for (i = 0; i < WILDFIRE_PCA_PER_QBB; i++) {
+		    iop = WILDFIRE_iop(soft_qbb);
+		    ne = WILDFIRE_ne(soft_qbb, i);
+		    fe = WILDFIRE_fe(soft_qbb, i);
+
+		    if ((iop->iop_hose[i].init.csr & 1) == 1 &&
+			((ne->ne_what_am_i.csr & 0xf00000300UL) == 0x100000300UL) &&
+			((fe->fe_what_am_i.csr & 0xf00000300UL) == 0x100000200UL))
+		    {
+		        wildfire_pca_mask |= 1 << ((soft_qbb << 2) + i);
+		    }
+		}
+
+	    }
+	}
+#if DEBUG_DUMP_CONFIG
+	wildfire_dump_hardware_config();
+#endif
+}
+
+void __init
+wildfire_init_arch(void)
+{
+	int qbbno;
+
+	/* With multiple PCI buses, we play with I/O as physical addrs.  */
+	ioport_resource.end = ~0UL;
+
+
+	/* Probe the hardware for info about configuration. */
+	wildfire_hardware_probe();
+
+	/* Now init all the found QBBs. */
+	for (qbbno = 0; qbbno < WILDFIRE_MAX_QBB; qbbno++) {
+		wildfire_init_qbb(qbbno);
+	}
+
+	/* Normal direct PCI DMA mapping. */ 
+	__direct_map_base = 0x40000000UL;
+	__direct_map_size = 0x80000000UL;
+}
+
+void
+wildfire_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	mb();
+	mb();  /* magic */
+	draina();
+	/* FIXME: clear pci errors */
+	wrmces(0x7);
+	mb();
+
+	process_mcheck_info(vector, la_ptr, "WILDFIRE",
+			    mcheck_expected(smp_processor_id()));
+}
+
+void
+wildfire_kill_arch(int mode)
+{
+}
+
+void
+wildfire_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
+{
+	int qbbno = hose->index >> 3;
+	int hoseno = hose->index & 7;
+	wildfire_pci *pci = WILDFIRE_pci(qbbno, hoseno);
+
+	mb();
+	pci->pci_flush_tlb.csr; /* reading does the trick */
+}
+
+static int
+mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
+	     unsigned long *pci_addr, unsigned char *type1)
+{
+	struct pci_controller *hose = pbus->sysdata;
+	unsigned long addr;
+	u8 bus = pbus->number;
+
+	DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, "
+		 "pci_addr=0x%p, type1=0x%p)\n",
+		 bus, device_fn, where, pci_addr, type1));
+
+	if (!pbus->parent) /* No parent means peer PCI bus. */
+		bus = 0;
+	*type1 = (bus != 0);
+
+	addr = (bus << 16) | (device_fn << 8) | where;
+	addr |= hose->config_space_base;
+		
+	*pci_addr = addr;
+	DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
+	return 0;
+}
+
+static int 
+wildfire_read_config(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 *value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		*value = __kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		*value = __kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*value = *(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int 
+wildfire_write_config(struct pci_bus *bus, unsigned int devfn, int where,
+		      int size, u32 value)
+{
+	unsigned long addr;
+	unsigned char type1;
+
+	if (mk_conf_addr(bus, devfn, where, &addr, &type1))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (size) {
+	case 1:
+		__kernel_stb(value, *(vucp)addr);
+		mb();
+		__kernel_ldbu(*(vucp)addr);
+		break;
+	case 2:
+		__kernel_stw(value, *(vusp)addr);
+		mb();
+		__kernel_ldwu(*(vusp)addr);
+		break;
+	case 4:
+		*(vuip)addr = value;
+		mb();
+		*(vuip)addr;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops wildfire_pci_ops =
+{
+	.read =		wildfire_read_config,
+	.write =	wildfire_write_config,
+};
+
+#if DEBUG_DUMP_REGS
+
+static void __init
+wildfire_dump_pci_regs(int qbbno, int hoseno)
+{
+	wildfire_pci *pci = WILDFIRE_pci(qbbno, hoseno);
+	int i;
+
+	printk(KERN_ERR "PCI registers for QBB %d hose %d (%p)\n",
+	       qbbno, hoseno, pci);
+
+	printk(KERN_ERR " PCI_IO_ADDR_EXT: 0x%16lx\n",
+	       pci->pci_io_addr_ext.csr);
+	printk(KERN_ERR " PCI_CTRL:        0x%16lx\n", pci->pci_ctrl.csr);
+	printk(KERN_ERR " PCI_ERR_SUM:     0x%16lx\n", pci->pci_err_sum.csr);
+	printk(KERN_ERR " PCI_ERR_ADDR:    0x%16lx\n", pci->pci_err_addr.csr);
+	printk(KERN_ERR " PCI_STALL_CNT:   0x%16lx\n", pci->pci_stall_cnt.csr);
+	printk(KERN_ERR " PCI_PEND_INT:    0x%16lx\n", pci->pci_pend_int.csr);
+	printk(KERN_ERR " PCI_SENT_INT:    0x%16lx\n", pci->pci_sent_int.csr);
+
+	printk(KERN_ERR " DMA window registers for QBB %d hose %d (%p)\n",
+	       qbbno, hoseno, pci);
+	for (i = 0; i < 4; i++) {
+		printk(KERN_ERR "  window %d: 0x%16lx 0x%16lx 0x%16lx\n", i,
+		       pci->pci_window[i].wbase.csr,
+		       pci->pci_window[i].wmask.csr,
+		       pci->pci_window[i].tbase.csr);
+	}
+	printk(KERN_ERR "\n");
+}
+
+static void __init
+wildfire_dump_pca_regs(int qbbno, int pcano)
+{
+	wildfire_pca *pca = WILDFIRE_pca(qbbno, pcano);
+	int i;
+
+	printk(KERN_ERR "PCA registers for QBB %d PCA %d (%p)\n",
+	       qbbno, pcano, pca);
+
+	printk(KERN_ERR " PCA_WHAT_AM_I: 0x%16lx\n", pca->pca_what_am_i.csr);
+	printk(KERN_ERR " PCA_ERR_SUM:   0x%16lx\n", pca->pca_err_sum.csr);
+	printk(KERN_ERR " PCA_PEND_INT:  0x%16lx\n", pca->pca_pend_int.csr);
+	printk(KERN_ERR " PCA_SENT_INT:  0x%16lx\n", pca->pca_sent_int.csr);
+	printk(KERN_ERR " PCA_STDIO_EL:  0x%16lx\n",
+	       pca->pca_stdio_edge_level.csr);
+
+	printk(KERN_ERR " PCA target registers for QBB %d PCA %d (%p)\n",
+	       qbbno, pcano, pca);
+	for (i = 0; i < 4; i++) {
+	  printk(KERN_ERR "  target %d: 0x%16lx 0x%16lx\n", i,
+		       pca->pca_int[i].target.csr,
+		       pca->pca_int[i].enable.csr);
+	}
+
+	printk(KERN_ERR "\n");
+}
+
+static void __init
+wildfire_dump_qsa_regs(int qbbno)
+{
+	wildfire_qsa *qsa = WILDFIRE_qsa(qbbno);
+	int i;
+
+	printk(KERN_ERR "QSA registers for QBB %d (%p)\n", qbbno, qsa);
+
+	printk(KERN_ERR " QSA_QBB_ID:      0x%16lx\n", qsa->qsa_qbb_id.csr);
+	printk(KERN_ERR " QSA_PORT_ENA:    0x%16lx\n", qsa->qsa_port_ena.csr);
+	printk(KERN_ERR " QSA_REF_INT:     0x%16lx\n", qsa->qsa_ref_int.csr);
+
+	for (i = 0; i < 5; i++)
+		printk(KERN_ERR " QSA_CONFIG_%d:    0x%16lx\n",
+		       i, qsa->qsa_config[i].csr);
+
+	for (i = 0; i < 2; i++)
+		printk(KERN_ERR " QSA_QBB_POP_%d:   0x%16lx\n",
+		       i, qsa->qsa_qbb_pop[0].csr);
+
+	printk(KERN_ERR "\n");
+}
+
+static void __init
+wildfire_dump_qsd_regs(int qbbno)
+{
+	wildfire_qsd *qsd = WILDFIRE_qsd(qbbno);
+
+	printk(KERN_ERR "QSD registers for QBB %d (%p)\n", qbbno, qsd);
+
+	printk(KERN_ERR " QSD_WHAMI:         0x%16lx\n", qsd->qsd_whami.csr);
+	printk(KERN_ERR " QSD_REV:           0x%16lx\n", qsd->qsd_rev.csr);
+	printk(KERN_ERR " QSD_PORT_PRESENT:  0x%16lx\n",
+	       qsd->qsd_port_present.csr);
+	printk(KERN_ERR " QSD_PORT_ACTIVE:   0x%16lx\n",
+	       qsd->qsd_port_active.csr);
+	printk(KERN_ERR " QSD_FAULT_ENA:     0x%16lx\n",
+	       qsd->qsd_fault_ena.csr);
+	printk(KERN_ERR " QSD_CPU_INT_ENA:   0x%16lx\n",
+	       qsd->qsd_cpu_int_ena.csr);
+	printk(KERN_ERR " QSD_MEM_CONFIG:    0x%16lx\n",
+	       qsd->qsd_mem_config.csr);
+	printk(KERN_ERR " QSD_ERR_SUM:       0x%16lx\n",
+	       qsd->qsd_err_sum.csr);
+
+	printk(KERN_ERR "\n");
+}
+
+static void __init
+wildfire_dump_iop_regs(int qbbno)
+{
+	wildfire_iop *iop = WILDFIRE_iop(qbbno);
+	int i;
+
+	printk(KERN_ERR "IOP registers for QBB %d (%p)\n", qbbno, iop);
+
+	printk(KERN_ERR " IOA_CONFIG:          0x%16lx\n", iop->ioa_config.csr);
+	printk(KERN_ERR " IOD_CONFIG:          0x%16lx\n", iop->iod_config.csr);
+	printk(KERN_ERR " IOP_SWITCH_CREDITS:  0x%16lx\n",
+	       iop->iop_switch_credits.csr);
+	printk(KERN_ERR " IOP_HOSE_CREDITS:    0x%16lx\n",
+	       iop->iop_hose_credits.csr);
+
+	for (i = 0; i < 4; i++) 
+		printk(KERN_ERR " IOP_HOSE_%d_INIT:     0x%16lx\n",
+		       i, iop->iop_hose[i].init.csr);
+	for (i = 0; i < 4; i++) 
+		printk(KERN_ERR " IOP_DEV_INT_TARGET_%d: 0x%16lx\n",
+		       i, iop->iop_dev_int[i].target.csr);
+
+	printk(KERN_ERR "\n");
+}
+
+static void __init
+wildfire_dump_gp_regs(int qbbno)
+{
+	wildfire_gp *gp = WILDFIRE_gp(qbbno);
+	int i;
+
+	printk(KERN_ERR "GP registers for QBB %d (%p)\n", qbbno, gp);
+	for (i = 0; i < 4; i++) 
+		printk(KERN_ERR " GPA_QBB_MAP_%d:     0x%16lx\n",
+		       i, gp->gpa_qbb_map[i].csr);
+
+	printk(KERN_ERR " GPA_MEM_POP_MAP:   0x%16lx\n",
+	       gp->gpa_mem_pop_map.csr);
+	printk(KERN_ERR " GPA_SCRATCH:       0x%16lx\n", gp->gpa_scratch.csr);
+	printk(KERN_ERR " GPA_DIAG:          0x%16lx\n", gp->gpa_diag.csr);
+	printk(KERN_ERR " GPA_CONFIG_0:      0x%16lx\n", gp->gpa_config_0.csr);
+	printk(KERN_ERR " GPA_INIT_ID:       0x%16lx\n", gp->gpa_init_id.csr);
+	printk(KERN_ERR " GPA_CONFIG_2:      0x%16lx\n", gp->gpa_config_2.csr);
+
+	printk(KERN_ERR "\n");
+}
+#endif /* DUMP_REGS */
+
+#if DEBUG_DUMP_CONFIG
+static void __init
+wildfire_dump_hardware_config(void)
+{
+	int i;
+
+	printk(KERN_ERR "Probed Hardware Configuration\n");
+
+	printk(KERN_ERR " hard_qbb_mask:  0x%16lx\n", wildfire_hard_qbb_mask);
+	printk(KERN_ERR " soft_qbb_mask:  0x%16lx\n", wildfire_soft_qbb_mask);
+
+	printk(KERN_ERR " gp_mask:        0x%16lx\n", wildfire_gp_mask);
+	printk(KERN_ERR " hs_mask:        0x%16lx\n", wildfire_hs_mask);
+	printk(KERN_ERR " iop_mask:       0x%16lx\n", wildfire_iop_mask);
+	printk(KERN_ERR " ior_mask:       0x%16lx\n", wildfire_ior_mask);
+	printk(KERN_ERR " pca_mask:       0x%16lx\n", wildfire_pca_mask);
+
+	printk(KERN_ERR " cpu_mask:       0x%16lx\n", wildfire_cpu_mask);
+	printk(KERN_ERR " mem_mask:       0x%16lx\n", wildfire_mem_mask);
+
+	printk(" hard_qbb_map: ");
+	for (i = 0; i < WILDFIRE_MAX_QBB; i++)
+	    if (wildfire_hard_qbb_map[i] == QBB_MAP_EMPTY)
+		printk("--- ");
+	    else
+		printk("%3d ", wildfire_hard_qbb_map[i]);
+	printk("\n");
+
+	printk(" soft_qbb_map: ");
+	for (i = 0; i < WILDFIRE_MAX_QBB; i++)
+	    if (wildfire_soft_qbb_map[i] == QBB_MAP_EMPTY)
+		printk("--- ");
+	    else
+		printk("%3d ", wildfire_soft_qbb_map[i]);
+	printk("\n");
+}
+#endif /* DUMP_CONFIG */
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
new file mode 100644
index 0000000000..e227f3a29a
--- /dev/null
+++ b/arch/alpha/kernel/entry.S
@@ -0,0 +1,852 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/kernel/entry.S
+ *
+ * Kernel entry-points.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/pal.h>
+#include <asm/errno.h>
+#include <asm/unistd.h>
+
+	.text
+	.set noat
+	.cfi_sections	.debug_frame
+
+/* Stack offsets.  */
+#define SP_OFF			184
+#define SWITCH_STACK_SIZE	320
+
+.macro	CFI_START_OSF_FRAME	func
+	.align	4
+	.globl	\func
+	.type	\func,@function
+\func:
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
+	.cfi_rel_offset	$16, 24
+	.cfi_rel_offset	$17, 32
+	.cfi_rel_offset	$18, 40
+.endm
+
+.macro	CFI_END_OSF_FRAME	func
+	.cfi_endproc
+	.size	\func, . - \func
+.endm
+
+/*
+ * This defines the normal kernel pt-regs layout.
+ *
+ * regs 9-15 preserved by C code
+ * regs 16-18 saved by PAL-code
+ * regs 29-30 saved and set up by PAL-code
+ * JRP - Save regs 16-18 in a special area of the stack, so that
+ * the palcode-provided values are available to the signal handler.
+ */
+
+.macro	SAVE_ALL
+	subq	$sp, SP_OFF, $sp
+	.cfi_adjust_cfa_offset	SP_OFF
+	stq	$0, 0($sp)
+	stq	$1, 8($sp)
+	stq	$2, 16($sp)
+	stq	$3, 24($sp)
+	stq	$4, 32($sp)
+	stq	$28, 144($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_rel_offset $1, 8
+	.cfi_rel_offset	$2, 16
+	.cfi_rel_offset	$3, 24
+	.cfi_rel_offset	$4, 32
+	.cfi_rel_offset	$28, 144
+	lda	$2, alpha_mv
+	stq	$5, 40($sp)
+	stq	$6, 48($sp)
+	stq	$7, 56($sp)
+	stq	$8, 64($sp)
+	stq	$19, 72($sp)
+	stq	$20, 80($sp)
+	stq	$21, 88($sp)
+	ldq	$2, HAE_CACHE($2)
+	stq	$22, 96($sp)
+	stq	$23, 104($sp)
+	stq	$24, 112($sp)
+	stq	$25, 120($sp)
+	stq	$26, 128($sp)
+	stq	$27, 136($sp)
+	stq	$2, 152($sp)
+	stq	$16, 160($sp)
+	stq	$17, 168($sp)
+	stq	$18, 176($sp)
+	.cfi_rel_offset	$5, 40
+	.cfi_rel_offset	$6, 48
+	.cfi_rel_offset	$7, 56
+	.cfi_rel_offset	$8, 64
+	.cfi_rel_offset $19, 72
+	.cfi_rel_offset	$20, 80
+	.cfi_rel_offset	$21, 88
+	.cfi_rel_offset $22, 96
+	.cfi_rel_offset	$23, 104
+	.cfi_rel_offset	$24, 112
+	.cfi_rel_offset	$25, 120
+	.cfi_rel_offset	$26, 128
+	.cfi_rel_offset	$27, 136
+.endm
+
+.macro	RESTORE_ALL
+	lda	$19, alpha_mv
+	ldq	$0, 0($sp)
+	ldq	$1, 8($sp)
+	ldq	$2, 16($sp)
+	ldq	$3, 24($sp)
+	ldq	$21, 152($sp)
+	ldq	$20, HAE_CACHE($19)
+	ldq	$4, 32($sp)
+	ldq	$5, 40($sp)
+	ldq	$6, 48($sp)
+	ldq	$7, 56($sp)
+	subq	$20, $21, $20
+	ldq	$8, 64($sp)
+	beq	$20, 99f
+	ldq	$20, HAE_REG($19)
+	stq	$21, HAE_CACHE($19)
+	stq	$21, 0($20)
+99:	ldq	$19, 72($sp)
+	ldq	$20, 80($sp)
+	ldq	$21, 88($sp)
+	ldq	$22, 96($sp)
+	ldq	$23, 104($sp)
+	ldq	$24, 112($sp)
+	ldq	$25, 120($sp)
+	ldq	$26, 128($sp)
+	ldq	$27, 136($sp)
+	ldq	$28, 144($sp)
+	addq	$sp, SP_OFF, $sp
+	.cfi_restore	$0
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_adjust_cfa_offset	-SP_OFF
+.endm
+
+.macro	DO_SWITCH_STACK
+	bsr	$1, do_switch_stack
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
+	/* We don't really care about the FP registers for debugging.  */
+.endm
+
+.macro	UNDO_SWITCH_STACK
+	bsr	$1, undo_switch_stack
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-SWITCH_STACK_SIZE
+.endm
+
+/*
+ * Non-syscall kernel entry points.
+ */
+
+CFI_START_OSF_FRAME entInt
+	SAVE_ALL
+	lda	$8, 0x3fff
+	lda	$26, ret_from_sys_call
+	bic	$sp, $8, $8
+	mov	$sp, $19
+	jsr	$31, do_entInt
+CFI_END_OSF_FRAME entInt
+
+CFI_START_OSF_FRAME entArith
+	SAVE_ALL
+	lda	$8, 0x3fff
+	lda	$26, ret_from_sys_call
+	bic	$sp, $8, $8
+	mov	$sp, $18
+	jsr	$31, do_entArith
+CFI_END_OSF_FRAME entArith
+
+CFI_START_OSF_FRAME entMM
+	SAVE_ALL
+/* save $9 - $15 so the inline exception code can manipulate them.  */
+	subq	$sp, 56, $sp
+	.cfi_adjust_cfa_offset	56
+	stq	$9, 0($sp)
+	stq	$10, 8($sp)
+	stq	$11, 16($sp)
+	stq	$12, 24($sp)
+	stq	$13, 32($sp)
+	stq	$14, 40($sp)
+	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
+	addq	$sp, 56, $19
+/* handle the fault */
+	lda	$8, 0x3fff
+	bic	$sp, $8, $8
+	jsr	$26, do_page_fault
+/* reload the registers after the exception code played.  */
+	ldq	$9, 0($sp)
+	ldq	$10, 8($sp)
+	ldq	$11, 16($sp)
+	ldq	$12, 24($sp)
+	ldq	$13, 32($sp)
+	ldq	$14, 40($sp)
+	ldq	$15, 48($sp)
+	addq	$sp, 56, $sp
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
+/* finish up the syscall as normal.  */
+	br	ret_from_sys_call
+CFI_END_OSF_FRAME entMM
+
+CFI_START_OSF_FRAME entIF
+	SAVE_ALL
+	lda	$8, 0x3fff
+	lda	$26, ret_from_sys_call
+	bic	$sp, $8, $8
+	mov	$sp, $17
+	jsr	$31, do_entIF
+CFI_END_OSF_FRAME entIF
+
+CFI_START_OSF_FRAME entUna
+	lda	$sp, -256($sp)
+	.cfi_adjust_cfa_offset	256
+	stq	$0, 0($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_remember_state
+	ldq	$0, 256($sp)	/* get PS */
+	stq	$1, 8($sp)
+	stq	$2, 16($sp)
+	stq	$3, 24($sp)
+	and	$0, 8, $0		/* user mode? */
+	stq	$4, 32($sp)
+	bne	$0, entUnaUser	/* yup -> do user-level unaligned fault */
+	stq	$5, 40($sp)
+	stq	$6, 48($sp)
+	stq	$7, 56($sp)
+	stq	$8, 64($sp)
+	stq	$9, 72($sp)
+	stq	$10, 80($sp)
+	stq	$11, 88($sp)
+	stq	$12, 96($sp)
+	stq	$13, 104($sp)
+	stq	$14, 112($sp)
+	stq	$15, 120($sp)
+	/* 16-18 PAL-saved */
+	stq	$19, 152($sp)
+	stq	$20, 160($sp)
+	stq	$21, 168($sp)
+	stq	$22, 176($sp)
+	stq	$23, 184($sp)
+	stq	$24, 192($sp)
+	stq	$25, 200($sp)
+	stq	$26, 208($sp)
+	stq	$27, 216($sp)
+	stq	$28, 224($sp)
+	mov	$sp, $19
+	stq	$gp, 232($sp)
+	.cfi_rel_offset	$1, 1*8
+	.cfi_rel_offset	$2, 2*8
+	.cfi_rel_offset	$3, 3*8
+	.cfi_rel_offset	$4, 4*8
+	.cfi_rel_offset	$5, 5*8
+	.cfi_rel_offset	$6, 6*8
+	.cfi_rel_offset	$7, 7*8
+	.cfi_rel_offset	$8, 8*8
+	.cfi_rel_offset	$9, 9*8
+	.cfi_rel_offset	$10, 10*8
+	.cfi_rel_offset	$11, 11*8
+	.cfi_rel_offset	$12, 12*8
+	.cfi_rel_offset	$13, 13*8
+	.cfi_rel_offset	$14, 14*8
+	.cfi_rel_offset	$15, 15*8
+	.cfi_rel_offset	$19, 19*8
+	.cfi_rel_offset	$20, 20*8
+	.cfi_rel_offset	$21, 21*8
+	.cfi_rel_offset	$22, 22*8
+	.cfi_rel_offset	$23, 23*8
+	.cfi_rel_offset	$24, 24*8
+	.cfi_rel_offset	$25, 25*8
+	.cfi_rel_offset	$26, 26*8
+	.cfi_rel_offset	$27, 27*8
+	.cfi_rel_offset	$28, 28*8
+	.cfi_rel_offset	$29, 29*8
+	lda	$8, 0x3fff
+	stq	$31, 248($sp)
+	bic	$sp, $8, $8
+	jsr	$26, do_entUna
+	ldq	$0, 0($sp)
+	ldq	$1, 8($sp)
+	ldq	$2, 16($sp)
+	ldq	$3, 24($sp)
+	ldq	$4, 32($sp)
+	ldq	$5, 40($sp)
+	ldq	$6, 48($sp)
+	ldq	$7, 56($sp)
+	ldq	$8, 64($sp)
+	ldq	$9, 72($sp)
+	ldq	$10, 80($sp)
+	ldq	$11, 88($sp)
+	ldq	$12, 96($sp)
+	ldq	$13, 104($sp)
+	ldq	$14, 112($sp)
+	ldq	$15, 120($sp)
+	/* 16-18 PAL-saved */
+	ldq	$19, 152($sp)
+	ldq	$20, 160($sp)
+	ldq	$21, 168($sp)
+	ldq	$22, 176($sp)
+	ldq	$23, 184($sp)
+	ldq	$24, 192($sp)
+	ldq	$25, 200($sp)
+	ldq	$26, 208($sp)
+	ldq	$27, 216($sp)
+	ldq	$28, 224($sp)
+	ldq	$gp, 232($sp)
+	lda	$sp, 256($sp)
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_restore	$29
+	.cfi_adjust_cfa_offset	-256
+	call_pal PAL_rti
+
+	.align	4
+entUnaUser:
+	.cfi_restore_state
+	ldq	$0, 0($sp)	/* restore original $0 */
+	lda	$sp, 256($sp)	/* pop entUna's stack frame */
+	.cfi_restore	$0
+	.cfi_adjust_cfa_offset	-256
+	SAVE_ALL		/* setup normal kernel stack */
+	lda	$sp, -56($sp)
+	.cfi_adjust_cfa_offset	56
+	stq	$9, 0($sp)
+	stq	$10, 8($sp)
+	stq	$11, 16($sp)
+	stq	$12, 24($sp)
+	stq	$13, 32($sp)
+	stq	$14, 40($sp)
+	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
+	lda	$8, 0x3fff
+	addq	$sp, 56, $19
+	bic	$sp, $8, $8
+	jsr	$26, do_entUnaUser
+	ldq	$9, 0($sp)
+	ldq	$10, 8($sp)
+	ldq	$11, 16($sp)
+	ldq	$12, 24($sp)
+	ldq	$13, 32($sp)
+	ldq	$14, 40($sp)
+	ldq	$15, 48($sp)
+	lda	$sp, 56($sp)
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
+	br	ret_from_sys_call
+CFI_END_OSF_FRAME entUna
+
+CFI_START_OSF_FRAME entDbg
+	SAVE_ALL
+	lda	$8, 0x3fff
+	lda	$26, ret_from_sys_call
+	bic	$sp, $8, $8
+	mov	$sp, $16
+	jsr	$31, do_entDbg
+CFI_END_OSF_FRAME entDbg
+
+/*
+ * The system call entry point is special.  Most importantly, it looks
+ * like a function call to userspace as far as clobbered registers.  We
+ * do preserve the argument registers (for syscall restarts) and $26
+ * (for leaf syscall functions).
+ *
+ * So much for theory.  We don't take advantage of this yet.
+ *
+ * Note that a0-a2 are not saved by PALcode as with the other entry points.
+ */
+
+	.align	4
+	.globl	entSys
+	.type	entSys, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
+entSys:
+	SAVE_ALL
+	lda	$8, 0x3fff
+	bic	$sp, $8, $8
+	lda	$4, NR_SYSCALLS($31)
+	stq	$16, SP_OFF+24($sp)
+	lda	$5, sys_call_table
+	lda	$27, sys_ni_syscall
+	cmpult	$0, $4, $4
+	ldl	$3, TI_FLAGS($8)
+	stq	$17, SP_OFF+32($sp)
+	s8addq	$0, $5, $5
+	stq	$18, SP_OFF+40($sp)
+	.cfi_rel_offset	$16, SP_OFF+24
+	.cfi_rel_offset	$17, SP_OFF+32
+	.cfi_rel_offset	$18, SP_OFF+40
+#ifdef CONFIG_AUDITSYSCALL
+	lda     $6, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+	and     $3, $6, $3
+#endif
+	bne     $3, strace
+	beq	$4, 1f
+	ldq	$27, 0($5)
+1:	jsr	$26, ($27), sys_ni_syscall
+	ldgp	$gp, 0($26)
+	blt	$0, $syscall_error	/* the call failed */
+	stq	$0, 0($sp)
+	stq	$31, 72($sp)		/* a3=0 => no error */
+
+	.align	4
+	.globl	ret_from_sys_call
+ret_from_sys_call:
+	cmovne	$26, 0, $18		/* $18 = 0 => non-restartable */
+	ldq	$0, SP_OFF($sp)
+	and	$0, 8, $0
+	beq	$0, ret_to_kernel
+ret_to_user:
+	/* Make sure need_resched and sigpending don't change between
+		sampling and the rti.  */
+	lda	$16, 7
+	call_pal PAL_swpipl
+	ldl	$17, TI_FLAGS($8)
+	and	$17, _TIF_WORK_MASK, $2
+	bne	$2, work_pending
+restore_all:
+	.cfi_remember_state
+	RESTORE_ALL
+	call_pal PAL_rti
+
+ret_to_kernel:
+	.cfi_restore_state
+	lda	$16, 7
+	call_pal PAL_swpipl
+	br restore_all
+
+	.align 3
+$syscall_error:
+	/*
+	 * Some system calls (e.g., ptrace) can return arbitrary
+	 * values which might normally be mistaken as error numbers.
+	 * Those functions must zero $0 (v0) directly in the stack
+	 * frame to indicate that a negative return value wasn't an
+	 * error number..
+	 */
+	ldq	$18, 0($sp)	/* old syscall nr (zero if success) */
+	beq	$18, $ret_success
+
+	ldq	$19, 72($sp)	/* .. and this a3 */
+	subq	$31, $0, $0	/* with error in v0 */
+	addq	$31, 1, $1	/* set a3 for errno return */
+	stq	$0, 0($sp)
+	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
+	stq	$1, 72($sp)	/* a3 for return */
+	br	ret_from_sys_call
+
+$ret_success:
+	stq	$0, 0($sp)
+	stq	$31, 72($sp)	/* a3=0 => no error */
+	br	ret_from_sys_call
+
+/*
+ * Do all cleanup when returning from all interrupts and system calls.
+ *
+ * Arguments:
+ *       $8: current.
+ *      $17: TI_FLAGS.
+ *      $18: The old syscall number, or zero if this is not a return
+ *           from a syscall that errored and is possibly restartable.
+ *      $19: The old a3 value
+ */
+
+	.align	4
+	.type	work_pending, @function
+work_pending:
+	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL, $2
+	bne	$2, $work_notifysig
+
+$work_resched:
+	/*
+	 * We can get here only if we returned from syscall without SIGPENDING
+	 * or got through work_notifysig already.  Either case means no syscall
+	 * restarts for us, so let $18 and $19 burn.
+	 */
+	jsr	$26, schedule
+	mov	0, $18
+	br	ret_to_user
+
+$work_notifysig:
+	mov	$sp, $16
+	DO_SWITCH_STACK
+	jsr	$26, do_work_pending
+	UNDO_SWITCH_STACK
+	br	restore_all
+
+/*
+ * PTRACE syscall handler
+ */
+
+	.align	4
+	.type	strace, @function
+strace:
+	/* set up signal stack, call syscall_trace */
+	DO_SWITCH_STACK
+	jsr	$26, syscall_trace_enter /* returns the syscall number */
+	UNDO_SWITCH_STACK
+
+	/* get the arguments back.. */
+	ldq	$16, SP_OFF+24($sp)
+	ldq	$17, SP_OFF+32($sp)
+	ldq	$18, SP_OFF+40($sp)
+	ldq	$19, 72($sp)
+	ldq	$20, 80($sp)
+	ldq	$21, 88($sp)
+
+	/* get the system call pointer.. */
+	lda	$1, NR_SYSCALLS($31)
+	lda	$2, sys_call_table
+	lda	$27, sys_ni_syscall
+	cmpult	$0, $1, $1
+	s8addq	$0, $2, $2
+	beq	$1, 1f
+	ldq	$27, 0($2)
+1:	jsr	$26, ($27), sys_gettimeofday
+ret_from_straced:
+	ldgp	$gp, 0($26)
+
+	/* check return.. */
+	blt	$0, $strace_error	/* the call failed */
+	stq	$31, 72($sp)		/* a3=0 => no error */
+$strace_success:
+	stq	$0, 0($sp)		/* save return value */
+
+	DO_SWITCH_STACK
+	jsr	$26, syscall_trace_leave
+	UNDO_SWITCH_STACK
+	br	$31, ret_from_sys_call
+
+	.align	3
+$strace_error:
+	ldq	$18, 0($sp)	/* old syscall nr (zero if success) */
+	beq	$18, $strace_success
+	ldq	$19, 72($sp)	/* .. and this a3 */
+
+	subq	$31, $0, $0	/* with error in v0 */
+	addq	$31, 1, $1	/* set a3 for errno return */
+	stq	$0, 0($sp)
+	stq	$1, 72($sp)	/* a3 for return */
+
+	DO_SWITCH_STACK
+	mov	$18, $9		/* save old syscall number */
+	mov	$19, $10	/* save old a3 */
+	jsr	$26, syscall_trace_leave
+	mov	$9, $18
+	mov	$10, $19
+	UNDO_SWITCH_STACK
+
+	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
+	br	ret_from_sys_call
+CFI_END_OSF_FRAME entSys
+
+/*
+ * Save and restore the switch stack -- aka the balance of the user context.
+ */
+
+	.align	4
+	.type	do_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
+do_switch_stack:
+	lda	$sp, -SWITCH_STACK_SIZE($sp)
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
+	stq	$9, 0($sp)
+	stq	$10, 8($sp)
+	stq	$11, 16($sp)
+	stq	$12, 24($sp)
+	stq	$13, 32($sp)
+	stq	$14, 40($sp)
+	stq	$15, 48($sp)
+	stq	$26, 56($sp)
+	stt	$f0, 64($sp)
+	stt	$f1, 72($sp)
+	stt	$f2, 80($sp)
+	stt	$f3, 88($sp)
+	stt	$f4, 96($sp)
+	stt	$f5, 104($sp)
+	stt	$f6, 112($sp)
+	stt	$f7, 120($sp)
+	stt	$f8, 128($sp)
+	stt	$f9, 136($sp)
+	stt	$f10, 144($sp)
+	stt	$f11, 152($sp)
+	stt	$f12, 160($sp)
+	stt	$f13, 168($sp)
+	stt	$f14, 176($sp)
+	stt	$f15, 184($sp)
+	stt	$f16, 192($sp)
+	stt	$f17, 200($sp)
+	stt	$f18, 208($sp)
+	stt	$f19, 216($sp)
+	stt	$f20, 224($sp)
+	stt	$f21, 232($sp)
+	stt	$f22, 240($sp)
+	stt	$f23, 248($sp)
+	stt	$f24, 256($sp)
+	stt	$f25, 264($sp)
+	stt	$f26, 272($sp)
+	stt	$f27, 280($sp)
+	mf_fpcr	$f0		# get fpcr
+	stt	$f28, 288($sp)
+	stt	$f29, 296($sp)
+	stt	$f30, 304($sp)
+	stt	$f0, 312($sp)	# save fpcr in slot of $f31
+	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
+	ret	$31, ($1), 1
+	.cfi_endproc
+	.size	do_switch_stack, .-do_switch_stack
+
+	.align	4
+	.type	undo_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
+undo_switch_stack:
+	ldq	$9, 0($sp)
+	ldq	$10, 8($sp)
+	ldq	$11, 16($sp)
+	ldq	$12, 24($sp)
+	ldq	$13, 32($sp)
+	ldq	$14, 40($sp)
+	ldq	$15, 48($sp)
+	ldq	$26, 56($sp)
+	ldt	$f30, 312($sp)	# get saved fpcr
+	ldt	$f0, 64($sp)
+	ldt	$f1, 72($sp)
+	ldt	$f2, 80($sp)
+	ldt	$f3, 88($sp)
+	mt_fpcr	$f30		# install saved fpcr
+	ldt	$f4, 96($sp)
+	ldt	$f5, 104($sp)
+	ldt	$f6, 112($sp)
+	ldt	$f7, 120($sp)
+	ldt	$f8, 128($sp)
+	ldt	$f9, 136($sp)
+	ldt	$f10, 144($sp)
+	ldt	$f11, 152($sp)
+	ldt	$f12, 160($sp)
+	ldt	$f13, 168($sp)
+	ldt	$f14, 176($sp)
+	ldt	$f15, 184($sp)
+	ldt	$f16, 192($sp)
+	ldt	$f17, 200($sp)
+	ldt	$f18, 208($sp)
+	ldt	$f19, 216($sp)
+	ldt	$f20, 224($sp)
+	ldt	$f21, 232($sp)
+	ldt	$f22, 240($sp)
+	ldt	$f23, 248($sp)
+	ldt	$f24, 256($sp)
+	ldt	$f25, 264($sp)
+	ldt	$f26, 272($sp)
+	ldt	$f27, 280($sp)
+	ldt	$f28, 288($sp)
+	ldt	$f29, 296($sp)
+	ldt	$f30, 304($sp)
+	lda	$sp, SWITCH_STACK_SIZE($sp)
+	ret	$31, ($1), 1
+	.cfi_endproc
+	.size	undo_switch_stack, .-undo_switch_stack
+
+/*
+ * The meat of the context switch code.
+ */
+
+	.align	4
+	.globl	alpha_switch_to
+	.type	alpha_switch_to, @function
+	.cfi_startproc
+alpha_switch_to:
+	DO_SWITCH_STACK
+	call_pal PAL_swpctx
+	lda	$8, 0x3fff
+	UNDO_SWITCH_STACK
+	bic	$sp, $8, $8
+	mov	$17, $0
+	ret
+	.cfi_endproc
+	.size	alpha_switch_to, .-alpha_switch_to
+
+/*
+ * New processes begin life here.
+ */
+
+	.globl	ret_from_fork
+	.align	4
+	.ent	ret_from_fork
+ret_from_fork:
+	lda	$26, ret_from_sys_call
+	mov	$17, $16
+	jmp	$31, schedule_tail
+.end ret_from_fork
+
+/*
+ * ... and new kernel threads - here
+ */
+	.align 4
+	.globl	ret_from_kernel_thread
+	.ent	ret_from_kernel_thread
+ret_from_kernel_thread:
+	mov	$17, $16
+	jsr	$26, schedule_tail
+	mov	$9, $27
+	mov	$10, $16
+	jsr	$26, ($9)
+	br	$31, ret_to_user
+.end ret_from_kernel_thread
+
+
+/*
+ * Special system calls.  Most of these are special in that they either
+ * have to play switch_stack games.
+ */
+
+.macro	fork_like name
+	.align	4
+	.globl	alpha_\name
+	.ent	alpha_\name
+alpha_\name:
+	.prologue 0
+	bsr	$1, do_switch_stack
+	jsr	$26, sys_\name
+	ldq	$26, 56($sp)
+	lda	$sp, SWITCH_STACK_SIZE($sp)
+	ret
+.end	alpha_\name
+.endm
+
+fork_like fork
+fork_like vfork
+fork_like clone
+
+.macro	sigreturn_like name
+	.align	4
+	.globl	sys_\name
+	.ent	sys_\name
+sys_\name:
+	.prologue 0
+	lda	$9, ret_from_straced
+	cmpult	$26, $9, $9
+	lda	$sp, -SWITCH_STACK_SIZE($sp)
+	jsr	$26, do_\name
+	bne	$9, 1f
+	jsr	$26, syscall_trace_leave
+1:	br	$1, undo_switch_stack
+	br	ret_from_sys_call
+.end sys_\name
+.endm
+
+sigreturn_like sigreturn
+sigreturn_like rt_sigreturn
+
+	.align	4
+	.globl	alpha_syscall_zero
+	.ent	alpha_syscall_zero
+alpha_syscall_zero:
+	.prologue 0
+	/* Special because it needs to do something opposite to
+	   force_successful_syscall_return().  We use the saved
+	   syscall number for that, zero meaning "not an error".
+	   That works nicely, but for real syscall 0 we need to
+	   make sure that this logics doesn't get confused.
+	   Store a non-zero there - -ENOSYS we need in register
+	   for our return value will do just fine.
+	  */
+	lda	$0, -ENOSYS
+	unop
+	stq	$0, 0($sp)
+	ret
+.end alpha_syscall_zero
diff --git a/arch/alpha/kernel/err_common.c b/arch/alpha/kernel/err_common.c
new file mode 100644
index 0000000000..94e1b3dcf6
--- /dev/null
+++ b/arch/alpha/kernel/err_common.c
@@ -0,0 +1,321 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/err_common.c
+ *
+ *	Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation)
+ *
+ *	Error handling code supporting Alpha systems
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/hwrpb.h>
+#include <asm/smp.h>
+#include <asm/err_common.h>
+
+#include "err_impl.h"
+#include "proto.h"
+
+/*
+ * err_print_prefix -- error handling print routines should prefix
+ * all prints with this
+ */
+char *err_print_prefix = KERN_NOTICE;
+
+
+/*
+ * Generic
+ */
+void
+mchk_dump_mem(void *data, size_t length, char **annotation)
+{
+	unsigned long *ldata = data;
+	size_t i;
+	
+	for (i = 0; (i * sizeof(*ldata)) < length; i++) {
+		if (annotation && !annotation[i]) 
+			annotation = NULL;
+		printk("%s    %08x: %016lx    %s\n",
+		       err_print_prefix,
+		       (unsigned)(i * sizeof(*ldata)), ldata[i],
+		       annotation ? annotation[i] : "");
+	}
+}
+
+void
+mchk_dump_logout_frame(struct el_common *mchk_header)
+{
+	printk("%s  -- Frame Header --\n"
+	         "    Frame Size:   %d (0x%x) bytes\n"
+	         "    Flags:        %s%s\n"
+	         "    MCHK Code:    0x%x\n"
+	         "    Frame Rev:    %d\n"
+	         "    Proc Offset:  0x%08x\n"
+	         "    Sys Offset:   0x%08x\n"
+  	         "  -- Processor Region --\n",
+	       err_print_prefix, 
+	       mchk_header->size, mchk_header->size,
+	       mchk_header->retry ? "RETRY " : "", 
+  	         mchk_header->err2 ? "SECOND_ERR " : "",
+	       mchk_header->code,
+	       mchk_header->frame_rev,
+	       mchk_header->proc_offset,
+	       mchk_header->sys_offset);
+
+	mchk_dump_mem((void *)
+		      ((unsigned long)mchk_header + mchk_header->proc_offset),
+		      mchk_header->sys_offset - mchk_header->proc_offset,
+		      NULL);
+	
+	printk("%s  -- System Region --\n", err_print_prefix);
+	mchk_dump_mem((void *)
+		      ((unsigned long)mchk_header + mchk_header->sys_offset),
+		      mchk_header->size - mchk_header->sys_offset,
+		      NULL);
+	printk("%s  -- End of Frame --\n", err_print_prefix);
+}
+
+
+/*
+ * Console Data Log
+ */
+/* Data */
+static struct el_subpacket_handler *subpacket_handler_list = NULL;
+static struct el_subpacket_annotation *subpacket_annotation_list = NULL;
+
+static struct el_subpacket *
+el_process_header_subpacket(struct el_subpacket *header)
+{
+	union el_timestamp timestamp;
+	char *name = "UNKNOWN EVENT";
+	int packet_count = 0;
+	int length = 0;
+
+	if (header->class != EL_CLASS__HEADER) {
+		printk("%s** Unexpected header CLASS %d TYPE %d, aborting\n",
+		       err_print_prefix,
+		       header->class, header->type);
+		return NULL;
+	}
+
+	switch(header->type) {
+	case EL_TYPE__HEADER__SYSTEM_ERROR_FRAME:
+		name = "SYSTEM ERROR";
+		length = header->by_type.sys_err.frame_length;
+		packet_count = 
+			header->by_type.sys_err.frame_packet_count;
+		timestamp.as_int = 0;
+		break;
+	case EL_TYPE__HEADER__SYSTEM_EVENT_FRAME:
+		name = "SYSTEM EVENT";
+		length = header->by_type.sys_event.frame_length;
+		packet_count = 
+			header->by_type.sys_event.frame_packet_count;
+		timestamp = header->by_type.sys_event.timestamp;
+		break;
+	case EL_TYPE__HEADER__HALT_FRAME:
+		name = "ERROR HALT";
+		length = header->by_type.err_halt.frame_length;
+		packet_count = 
+			header->by_type.err_halt.frame_packet_count;
+		timestamp = header->by_type.err_halt.timestamp;
+		break;
+	case EL_TYPE__HEADER__LOGOUT_FRAME:
+		name = "LOGOUT FRAME";
+		length = header->by_type.logout_header.frame_length;
+		packet_count = 1;
+		timestamp.as_int = 0;
+		break;
+	default: /* Unknown */
+		printk("%s** Unknown header - CLASS %d TYPE %d, aborting\n",
+		       err_print_prefix,
+		       header->class, header->type);
+		return NULL;		
+	}
+
+	printk("%s*** %s:\n"
+	         "  CLASS %d, TYPE %d\n", 
+	       err_print_prefix,
+	       name,
+	       header->class, header->type);
+	el_print_timestamp(&timestamp);
+	
+	/*
+	 * Process the subpackets
+	 */
+	el_process_subpackets(header, packet_count);
+
+	/* return the next header */
+	header = (struct el_subpacket *)
+		((unsigned long)header + header->length + length);
+	return header;
+}
+
+static struct el_subpacket *
+el_process_subpacket_reg(struct el_subpacket *header)
+{
+	struct el_subpacket *next = NULL;
+	struct el_subpacket_handler *h = subpacket_handler_list;
+
+	for (; h && h->class != header->class; h = h->next);
+	if (h) next = h->handler(header);
+
+	return next;
+}
+
+void
+el_print_timestamp(union el_timestamp *timestamp)
+{
+	if (timestamp->as_int)
+		printk("%s  TIMESTAMP: %d/%d/%02d %d:%02d:%0d\n", 
+		       err_print_prefix,
+		       timestamp->b.month, timestamp->b.day,
+		       timestamp->b.year, timestamp->b.hour,
+		       timestamp->b.minute, timestamp->b.second);
+}
+
+void
+el_process_subpackets(struct el_subpacket *header, int packet_count)
+{
+	struct el_subpacket *subpacket;
+	int i;
+
+	subpacket = (struct el_subpacket *)
+		((unsigned long)header + header->length);
+
+	for (i = 0; subpacket && i < packet_count; i++) {
+		printk("%sPROCESSING SUBPACKET %d\n", err_print_prefix, i);
+		subpacket = el_process_subpacket(subpacket);
+	}
+}
+
+struct el_subpacket *
+el_process_subpacket(struct el_subpacket *header)
+{
+	struct el_subpacket *next = NULL;
+
+	switch(header->class) {
+	case EL_CLASS__TERMINATION:
+		/* Termination packet, there are no more */
+		break;
+	case EL_CLASS__HEADER: 
+		next = el_process_header_subpacket(header);
+		break;
+	default:
+		if (NULL == (next = el_process_subpacket_reg(header))) {
+			printk("%s** Unexpected header CLASS %d TYPE %d"
+			       " -- aborting.\n",
+			       err_print_prefix,
+			       header->class, header->type);
+		}
+		break;
+	}
+
+	return next;
+}
+
+void 
+el_annotate_subpacket(struct el_subpacket *header)
+{
+	struct el_subpacket_annotation *a;
+	char **annotation = NULL;
+
+	for (a = subpacket_annotation_list; a; a = a->next) {
+		if (a->class == header->class &&
+		    a->type == header->type &&
+		    a->revision == header->revision) {
+			/*
+			 * We found the annotation
+			 */
+			annotation = a->annotation;
+			printk("%s  %s\n", err_print_prefix, a->description);
+			break;
+		}
+	}
+
+	mchk_dump_mem(header, header->length, annotation);
+}
+
+static void __init
+cdl_process_console_data_log(int cpu, struct percpu_struct *pcpu)
+{
+	struct el_subpacket *header = (struct el_subpacket *)
+		(IDENT_ADDR | pcpu->console_data_log_pa);
+	int err;
+
+	printk("%s******* CONSOLE DATA LOG FOR CPU %d. *******\n"
+	         "*** Error(s) were logged on a previous boot\n",
+	       err_print_prefix, cpu);
+	
+	for (err = 0; header && (header->class != EL_CLASS__TERMINATION); err++)
+		header = el_process_subpacket(header);
+
+	/* let the console know it's ok to clear the error(s) at restart */
+	pcpu->console_data_log_pa = 0;
+
+	printk("%s*** %d total error(s) logged\n"
+	         "**** END OF CONSOLE DATA LOG FOR CPU %d ****\n", 
+	       err_print_prefix, err, cpu);
+}
+
+void __init
+cdl_check_console_data_log(void)
+{
+	struct percpu_struct *pcpu;
+	unsigned long cpu;
+
+	for (cpu = 0; cpu < hwrpb->nr_processors; cpu++) {
+		pcpu = (struct percpu_struct *)
+			((unsigned long)hwrpb + hwrpb->processor_offset 
+			 + cpu * hwrpb->processor_size);
+		if (pcpu->console_data_log_pa)
+			cdl_process_console_data_log(cpu, pcpu);
+	}
+
+}
+
+int __init
+cdl_register_subpacket_annotation(struct el_subpacket_annotation *new)
+{
+	struct el_subpacket_annotation *a = subpacket_annotation_list;
+
+	if (a == NULL) subpacket_annotation_list = new;
+	else {
+		for (; a->next != NULL; a = a->next) {
+			if ((a->class == new->class && a->type == new->type) ||
+			    a == new) {
+				printk("Attempted to re-register "
+				       "subpacket annotation\n");
+				return -EINVAL;
+			}
+		}
+		a->next = new;
+	}
+	new->next = NULL;
+
+	return 0;
+}
+
+int __init
+cdl_register_subpacket_handler(struct el_subpacket_handler *new)
+{
+	struct el_subpacket_handler *h = subpacket_handler_list;
+
+	if (h == NULL) subpacket_handler_list = new;
+	else {
+		for (; h->next != NULL; h = h->next) {
+			if (h->class == new->class || h == new) {
+				printk("Attempted to re-register "
+				       "subpacket handler\n");
+				return -EINVAL;
+			}
+		}
+		h->next = new;
+	}
+	new->next = NULL;
+
+	return 0;
+}
+
diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c
new file mode 100644
index 0000000000..8144f2045b
--- /dev/null
+++ b/arch/alpha/kernel/err_ev6.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/err_ev6.c
+ *
+ *	Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation)
+ *
+ *	Error handling code supporting Alpha systems
+ */
+
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/hwrpb.h>
+#include <asm/smp.h>
+#include <asm/err_common.h>
+#include <asm/err_ev6.h>
+
+#include "err_impl.h"
+#include "proto.h"
+
+static int
+ev6_parse_ibox(u64 i_stat, int print)
+{
+	int status = MCHK_DISPOSITION_REPORT;
+
+#define EV6__I_STAT__PAR	(1UL << 29)
+#define EV6__I_STAT__ERRMASK	(EV6__I_STAT__PAR)
+
+	if (!(i_stat & EV6__I_STAT__ERRMASK))
+		return MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+	if (!print)
+		return status;
+
+	if (i_stat & EV6__I_STAT__PAR)
+		printk("%s    Icache parity error\n", err_print_prefix);
+
+	return status;
+}
+
+static int
+ev6_parse_mbox(u64 mm_stat, u64 d_stat, u64 c_stat, int print)
+{
+	int status = MCHK_DISPOSITION_REPORT;
+
+#define EV6__MM_STAT__DC_TAG_PERR	(1UL << 10)
+#define EV6__MM_STAT__ERRMASK		(EV6__MM_STAT__DC_TAG_PERR)
+#define EV6__D_STAT__TPERR_P0		(1UL << 0)
+#define EV6__D_STAT__TPERR_P1		(1UL << 1)
+#define EV6__D_STAT__ECC_ERR_ST		(1UL << 2)
+#define EV6__D_STAT__ECC_ERR_LD		(1UL << 3)
+#define EV6__D_STAT__SEO		(1UL << 4)
+#define EV6__D_STAT__ERRMASK		(EV6__D_STAT__TPERR_P0 |	\
+                                         EV6__D_STAT__TPERR_P1 | 	\
+                                         EV6__D_STAT__ECC_ERR_ST | 	\
+                                         EV6__D_STAT__ECC_ERR_LD | 	\
+                                         EV6__D_STAT__SEO)
+
+	if (!(d_stat & EV6__D_STAT__ERRMASK) && 
+	    !(mm_stat & EV6__MM_STAT__ERRMASK))
+		return MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+	if (!print)
+		return status;
+
+	if (mm_stat & EV6__MM_STAT__DC_TAG_PERR)
+		printk("%s    Dcache tag parity error on probe\n",
+		       err_print_prefix);
+	if (d_stat & EV6__D_STAT__TPERR_P0)
+		printk("%s    Dcache tag parity error - pipe 0\n",
+		       err_print_prefix);
+	if (d_stat & EV6__D_STAT__TPERR_P1)
+		printk("%s    Dcache tag parity error - pipe 1\n",
+		       err_print_prefix);
+	if (d_stat & EV6__D_STAT__ECC_ERR_ST)
+		printk("%s    ECC error occurred on a store\n", 
+		       err_print_prefix);
+	if (d_stat & EV6__D_STAT__ECC_ERR_LD)
+		printk("%s    ECC error occurred on a %s load\n",
+		       err_print_prefix,
+		       c_stat ? "" : "speculative ");
+	if (d_stat & EV6__D_STAT__SEO)
+		printk("%s    Dcache second error\n", err_print_prefix);
+
+	return status;
+}
+
+static int
+ev6_parse_cbox(u64 c_addr, u64 c1_syn, u64 c2_syn, 
+	       u64 c_stat, u64 c_sts, int print)
+{
+	static const char * const sourcename[] = {
+		"UNKNOWN", "UNKNOWN", "UNKNOWN",
+		"MEMORY", "BCACHE", "DCACHE",
+		"BCACHE PROBE", "BCACHE PROBE"
+	};
+	static const char * const streamname[] = { "D", "I" };
+	static const char * const bitsname[] = { "SINGLE", "DOUBLE" };
+	int status = MCHK_DISPOSITION_REPORT;
+	int source = -1, stream = -1, bits = -1;
+
+#define EV6__C_STAT__BC_PERR		(0x01)
+#define EV6__C_STAT__DC_PERR		(0x02)
+#define EV6__C_STAT__DSTREAM_MEM_ERR	(0x03)
+#define EV6__C_STAT__DSTREAM_BC_ERR	(0x04)
+#define EV6__C_STAT__DSTREAM_DC_ERR	(0x05)
+#define EV6__C_STAT__PROBE_BC_ERR0	(0x06)	/* both 6 and 7 indicate... */
+#define EV6__C_STAT__PROBE_BC_ERR1	(0x07)	/* ...probe bc error.       */
+#define EV6__C_STAT__ISTREAM_MEM_ERR	(0x0B)
+#define EV6__C_STAT__ISTREAM_BC_ERR	(0x0C)
+#define EV6__C_STAT__DSTREAM_MEM_DBL	(0x13)
+#define EV6__C_STAT__DSTREAM_BC_DBL	(0x14)
+#define EV6__C_STAT__ISTREAM_MEM_DBL	(0x1B)
+#define EV6__C_STAT__ISTREAM_BC_DBL	(0x1C)
+#define EV6__C_STAT__SOURCE_MEMORY	(0x03)
+#define EV6__C_STAT__SOURCE_BCACHE	(0x04)
+#define EV6__C_STAT__SOURCE__S		(0)
+#define EV6__C_STAT__SOURCE__M 		(0x07)
+#define EV6__C_STAT__ISTREAM__S		(3)
+#define EV6__C_STAT__ISTREAM__M		(0x01)
+#define EV6__C_STAT__DOUBLE__S		(4)
+#define EV6__C_STAT__DOUBLE__M		(0x01)
+#define EV6__C_STAT__ERRMASK		(0x1F)
+#define EV6__C_STS__SHARED		(1 << 0)
+#define EV6__C_STS__DIRTY		(1 << 1)
+#define EV6__C_STS__VALID		(1 << 2)
+#define EV6__C_STS__PARITY		(1 << 3)
+
+	if (!(c_stat & EV6__C_STAT__ERRMASK))
+		return MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+	if (!print)
+		return status;
+
+	source = EXTRACT(c_stat, EV6__C_STAT__SOURCE);
+	stream = EXTRACT(c_stat, EV6__C_STAT__ISTREAM);
+	bits = EXTRACT(c_stat, EV6__C_STAT__DOUBLE);
+
+	if (c_stat & EV6__C_STAT__BC_PERR) {
+		printk("%s    Bcache tag parity error\n", err_print_prefix);
+		source = -1;
+	}
+
+	if (c_stat & EV6__C_STAT__DC_PERR) {
+		printk("%s    Dcache tag parity error\n", err_print_prefix);
+		source = -1;
+	}
+
+	if (c_stat == EV6__C_STAT__PROBE_BC_ERR0 ||
+	    c_stat == EV6__C_STAT__PROBE_BC_ERR1) {
+		printk("%s    Bcache single-bit error on a probe hit\n",
+		       err_print_prefix);
+		source = -1;
+	}
+
+	if (source != -1) 
+		printk("%s    %s-STREAM %s-BIT ECC error from %s\n",
+		       err_print_prefix,
+		       streamname[stream], bitsname[bits], sourcename[source]);
+
+	printk("%s    Address: 0x%016llx\n"
+	         "    Syndrome[upper.lower]: %02llx.%02llx\n",
+	       err_print_prefix,
+	       c_addr,
+	       c2_syn, c1_syn);
+
+	if (source == EV6__C_STAT__SOURCE_MEMORY ||
+	    source == EV6__C_STAT__SOURCE_BCACHE) 
+		printk("%s    Block status: %s%s%s%s\n",
+		       err_print_prefix,
+		       (c_sts & EV6__C_STS__SHARED) ? "SHARED " : "",
+		       (c_sts & EV6__C_STS__DIRTY)  ? "DIRTY "  : "",
+		       (c_sts & EV6__C_STS__VALID)  ? "VALID "  : "",
+		       (c_sts & EV6__C_STS__PARITY) ? "PARITY " : "");
+		
+	return status;
+}
+
+void
+ev6_register_error_handlers(void)
+{
+	/* None right now. */
+}
+
+int
+ev6_process_logout_frame(struct el_common *mchk_header, int print)
+{
+	struct el_common_EV6_mcheck *ev6mchk = 
+		(struct el_common_EV6_mcheck *)mchk_header;
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+	status |= ev6_parse_ibox(ev6mchk->I_STAT, print);
+	status |= ev6_parse_mbox(ev6mchk->MM_STAT, ev6mchk->DC_STAT, 
+				 ev6mchk->C_STAT, print);
+	status |= ev6_parse_cbox(ev6mchk->C_ADDR, ev6mchk->DC1_SYNDROME,
+				 ev6mchk->DC0_SYNDROME, ev6mchk->C_STAT,
+				 ev6mchk->C_STS, print);
+
+	if (!print)
+		return status;
+
+	if (status != MCHK_DISPOSITION_DISMISS) {
+		char *saved_err_prefix = err_print_prefix;
+
+		/*
+		 * Dump some additional information from the frame
+		 */
+		printk("%s    EXC_ADDR: 0x%016lx   IER_CM: 0x%016lx"
+		            "   ISUM: 0x%016lx\n"
+		         "    PAL_BASE: 0x%016lx   I_CTL:  0x%016lx"
+		            "   PCTX: 0x%016lx\n",
+		       err_print_prefix,
+		       ev6mchk->EXC_ADDR, ev6mchk->IER_CM, ev6mchk->ISUM,
+		       ev6mchk->PAL_BASE, ev6mchk->I_CTL, ev6mchk->PCTX);
+
+		if (status == MCHK_DISPOSITION_UNKNOWN_ERROR) {
+			printk("%s    UNKNOWN error, frame follows:\n",
+			       err_print_prefix);
+		} else {
+			/* had decode -- downgrade print level for frame */
+			err_print_prefix = KERN_NOTICE;
+		}
+
+		mchk_dump_logout_frame(mchk_header);
+
+		err_print_prefix = saved_err_prefix;
+	}
+
+	return status;
+}
+
+void
+ev6_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	struct el_common *mchk_header = (struct el_common *)la_ptr;
+
+	/*
+	 * Sync the processor
+	 */
+	mb();
+	draina();
+
+	/*
+	 * Parse the logout frame without printing first. If the only error(s)
+	 * found are have a disposition of "dismiss", then just dismiss them
+	 * and don't print any message
+	 */
+	if (ev6_process_logout_frame(mchk_header, 0) != 
+	    MCHK_DISPOSITION_DISMISS) {
+		char *saved_err_prefix = err_print_prefix;
+		err_print_prefix = KERN_CRIT;
+
+		/*
+		 * Either a nondismissable error was detected or no
+		 * recognized error was detected  in the logout frame 
+		 * -- report the error in either case
+		 */
+		printk("%s*CPU %s Error (Vector 0x%x) reported on CPU %d:\n", 
+		       err_print_prefix,
+		       (vector == SCB_Q_PROCERR)?"Correctable":"Uncorrectable",
+		       (unsigned int)vector, (int)smp_processor_id());
+		
+		ev6_process_logout_frame(mchk_header, 1);
+		dik_show_regs(get_irq_regs(), NULL);
+
+		err_print_prefix = saved_err_prefix;
+	}
+
+	/* 
+	 * Release the logout frame 
+	 */
+	wrmces(0x7);
+	mb();
+}
+
diff --git a/arch/alpha/kernel/err_ev7.c b/arch/alpha/kernel/err_ev7.c
new file mode 100644
index 0000000000..565de1acf1
--- /dev/null
+++ b/arch/alpha/kernel/err_ev7.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/err_ev7.c
+ *
+ *	Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation)
+ *
+ *	Error handling code supporting Alpha systems
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/hwrpb.h>
+#include <asm/smp.h>
+#include <asm/err_common.h>
+#include <asm/err_ev7.h>
+
+#include "err_impl.h"
+#include "proto.h"
+
+struct ev7_lf_subpackets *
+ev7_collect_logout_frame_subpackets(struct el_subpacket *el_ptr,
+				    struct ev7_lf_subpackets *lf_subpackets)
+{
+	struct el_subpacket *subpacket;
+	int i;
+
+	/*
+	 * A Marvel machine check frame is always packaged in an
+	 * el_subpacket of class HEADER, type LOGOUT_FRAME.
+	 */
+	if (el_ptr->class != EL_CLASS__HEADER || 
+	    el_ptr->type != EL_TYPE__HEADER__LOGOUT_FRAME)
+		return NULL;
+
+	/*
+	 * It is a logout frame header. Look at the one subpacket.
+	 */
+	el_ptr = (struct el_subpacket *)
+		((unsigned long)el_ptr + el_ptr->length);
+
+	/*
+	 * It has to be class PAL, type LOGOUT_FRAME.
+	 */
+	if (el_ptr->class != EL_CLASS__PAL ||
+	    el_ptr->type != EL_TYPE__PAL__LOGOUT_FRAME)
+		return NULL;
+
+	lf_subpackets->logout = (struct ev7_pal_logout_subpacket *)
+		el_ptr->by_type.raw.data_start;
+
+	/*
+	 * Process the subpackets.
+	 */
+	subpacket = (struct el_subpacket *)
+		((unsigned long)el_ptr + el_ptr->length);
+	for (i = 0;
+	     subpacket && i < lf_subpackets->logout->subpacket_count;
+	     subpacket = (struct el_subpacket *)
+		     ((unsigned long)subpacket + subpacket->length), i++) {
+		/*
+		 * All subpackets should be class PAL.
+		 */
+		if (subpacket->class != EL_CLASS__PAL) {
+			printk("%s**UNEXPECTED SUBPACKET CLASS %d "
+			       "IN LOGOUT FRAME (packet %d\n",
+			       err_print_prefix, subpacket->class, i);
+			return NULL;
+		}
+
+		/*
+		 * Remember the subpacket.
+		 */
+		switch(subpacket->type) {
+		case EL_TYPE__PAL__EV7_PROCESSOR:
+			lf_subpackets->ev7 =
+				(struct ev7_pal_processor_subpacket *)
+				subpacket->by_type.raw.data_start;
+			break;
+
+		case EL_TYPE__PAL__EV7_RBOX:
+			lf_subpackets->rbox = (struct ev7_pal_rbox_subpacket *)
+				subpacket->by_type.raw.data_start;
+			break;
+
+		case EL_TYPE__PAL__EV7_ZBOX:
+			lf_subpackets->zbox = (struct ev7_pal_zbox_subpacket *)
+				subpacket->by_type.raw.data_start;
+			break;
+
+		case EL_TYPE__PAL__EV7_IO:
+			lf_subpackets->io = (struct ev7_pal_io_subpacket *)
+				subpacket->by_type.raw.data_start;
+			break;
+
+		case EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE:
+		case EL_TYPE__PAL__ENV__AIRMOVER_FAN:
+		case EL_TYPE__PAL__ENV__VOLTAGE:
+		case EL_TYPE__PAL__ENV__INTRUSION:
+		case EL_TYPE__PAL__ENV__POWER_SUPPLY:
+		case EL_TYPE__PAL__ENV__LAN:
+		case EL_TYPE__PAL__ENV__HOT_PLUG:
+			lf_subpackets->env[ev7_lf_env_index(subpacket->type)] =
+ 				(struct ev7_pal_environmental_subpacket *)
+				subpacket->by_type.raw.data_start;
+			break;
+				
+		default:
+			/*
+			 * Don't know what kind of frame this is.
+			 */
+			return NULL;
+		}
+	}
+
+	return lf_subpackets;
+}
+
+void
+ev7_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	struct el_subpacket *el_ptr = (struct el_subpacket *)la_ptr;
+	char *saved_err_prefix = err_print_prefix;
+
+	/*
+	 * Sync the processor
+	 */
+	mb();
+	draina();
+
+	err_print_prefix = KERN_CRIT;
+	printk("%s*CPU %s Error (Vector 0x%x) reported on CPU %d\n",
+	       err_print_prefix, 
+	       (vector == SCB_Q_PROCERR) ? "Correctable" : "Uncorrectable",
+	       (unsigned int)vector, (int)smp_processor_id());
+	el_process_subpacket(el_ptr);
+	err_print_prefix = saved_err_prefix;
+
+	/* 
+	 * Release the logout frame 
+	 */
+	wrmces(0x7);
+	mb();
+}
+
+static char *el_ev7_processor_subpacket_annotation[] = {
+	"Subpacket Header",	"I_STAT",	"DC_STAT",
+	"C_ADDR",		"C_SYNDROME_1",	"C_SYNDROME_0",
+	"C_STAT",		"C_STS",	"MM_STAT",
+	"EXC_ADDR",		"IER_CM",	"ISUM",
+	"PAL_BASE",		"I_CTL",	"PROCESS_CONTEXT",
+	"CBOX_CTL",		"CBOX_STP_CTL",	"CBOX_ACC_CTL",
+	"CBOX_LCL_SET",		"CBOX_GLB_SET",	"BBOX_CTL",
+	"BBOX_ERR_STS",		"BBOX_ERR_IDX",	"CBOX_DDP_ERR_STS",
+	"BBOX_DAT_RMP",		NULL
+};
+
+static char *el_ev7_zbox_subpacket_annotation[] = {
+	"Subpacket Header", 	
+	"ZBOX(0): DRAM_ERR_STATUS_2 / DRAM_ERR_STATUS_1",
+	"ZBOX(0): DRAM_ERROR_CTL    / DRAM_ERR_STATUS_3",
+	"ZBOX(0): DIFT_TIMEOUT      / DRAM_ERR_ADR",
+	"ZBOX(0): FRC_ERR_ADR       / DRAM_MAPPER_CTL",
+	"ZBOX(0): reserved          / DIFT_ERR_STATUS",
+	"ZBOX(1): DRAM_ERR_STATUS_2 / DRAM_ERR_STATUS_1",
+	"ZBOX(1): DRAM_ERROR_CTL    / DRAM_ERR_STATUS_3",
+	"ZBOX(1): DIFT_TIMEOUT      / DRAM_ERR_ADR",
+	"ZBOX(1): FRC_ERR_ADR       / DRAM_MAPPER_CTL",
+	"ZBOX(1): reserved          / DIFT_ERR_STATUS",
+	"CBOX_CTL",		"CBOX_STP_CTL",
+	"ZBOX(0)_ERROR_PA",	"ZBOX(1)_ERROR_PA",
+	"ZBOX(0)_ORED_SYNDROME","ZBOX(1)_ORED_SYNDROME",
+	NULL
+};
+
+static char *el_ev7_rbox_subpacket_annotation[] = {
+	"Subpacket Header",	"RBOX_CFG",	"RBOX_N_CFG",
+	"RBOX_S_CFG",		"RBOX_E_CFG",	"RBOX_W_CFG",
+	"RBOX_N_ERR",		"RBOX_S_ERR",	"RBOX_E_ERR",
+	"RBOX_W_ERR",		"RBOX_IO_CFG",	"RBOX_IO_ERR",
+	"RBOX_L_ERR",		"RBOX_WHOAMI",	"RBOX_IMASL",
+	"RBOX_INTQ",		"RBOX_INT",	NULL
+};
+
+static char *el_ev7_io_subpacket_annotation[] = {
+	"Subpacket Header",	"IO_ASIC_REV",	"IO_SYS_REV",
+	"IO7_UPH",		"HPI_CTL",	"CRD_CTL",
+	"HEI_CTL",		"PO7_ERROR_SUM","PO7_UNCRR_SYM",
+	"PO7_CRRCT_SYM",	"PO7_UGBGE_SYM","PO7_ERR_PKT0",
+	"PO7_ERR_PKT1",		"reserved",	"reserved",
+	"PO0_ERR_SUM",		"PO0_TLB_ERR",	"PO0_SPL_COMPLT",
+	"PO0_TRANS_SUM",	"PO0_FIRST_ERR","PO0_MULT_ERR",
+	"DM CSR PH",		"DM CSR PH",	"DM CSR PH",
+	"DM CSR PH",		"reserved",
+	"PO1_ERR_SUM",		"PO1_TLB_ERR",	"PO1_SPL_COMPLT",
+	"PO1_TRANS_SUM",	"PO1_FIRST_ERR","PO1_MULT_ERR",
+	"DM CSR PH",		"DM CSR PH",	"DM CSR PH",
+	"DM CSR PH",		"reserved",
+	"PO2_ERR_SUM",		"PO2_TLB_ERR",	"PO2_SPL_COMPLT",
+	"PO2_TRANS_SUM",	"PO2_FIRST_ERR","PO2_MULT_ERR",
+	"DM CSR PH",		"DM CSR PH",	"DM CSR PH",
+	"DM CSR PH",		"reserved",
+	"PO3_ERR_SUM",		"PO3_TLB_ERR",	"PO3_SPL_COMPLT",
+	"PO3_TRANS_SUM",	"PO3_FIRST_ERR","PO3_MULT_ERR",
+	"DM CSR PH",		"DM CSR PH",	"DM CSR PH",
+	"DM CSR PH",		"reserved",	
+	NULL
+};
+	
+static struct el_subpacket_annotation el_ev7_pal_annotations[] = {
+	SUBPACKET_ANNOTATION(EL_CLASS__PAL,
+			     EL_TYPE__PAL__EV7_PROCESSOR,
+			     1,
+			     "EV7 Processor Subpacket",
+			     el_ev7_processor_subpacket_annotation),
+	SUBPACKET_ANNOTATION(EL_CLASS__PAL,
+			     EL_TYPE__PAL__EV7_ZBOX,
+			     1,
+			     "EV7 ZBOX Subpacket",
+			     el_ev7_zbox_subpacket_annotation),
+	SUBPACKET_ANNOTATION(EL_CLASS__PAL,
+			     EL_TYPE__PAL__EV7_RBOX,
+			     1,
+			     "EV7 RBOX Subpacket",
+			     el_ev7_rbox_subpacket_annotation),
+	SUBPACKET_ANNOTATION(EL_CLASS__PAL,
+			     EL_TYPE__PAL__EV7_IO,
+			     1,
+			     "EV7 IO Subpacket",
+			     el_ev7_io_subpacket_annotation)
+};
+
+static struct el_subpacket *
+ev7_process_pal_subpacket(struct el_subpacket *header)
+{
+	struct ev7_pal_subpacket *packet;
+
+	if (header->class != EL_CLASS__PAL) {
+		printk("%s  ** Unexpected header CLASS %d TYPE %d, aborting\n",
+		       err_print_prefix,
+		       header->class, header->type);
+		return NULL;
+	}
+
+	packet = (struct ev7_pal_subpacket *)header->by_type.raw.data_start;
+
+	switch(header->type) {
+	case EL_TYPE__PAL__LOGOUT_FRAME:
+		printk("%s*** MCHK occurred on LPID %lld (RBOX %llx)\n",
+		       err_print_prefix,
+		       packet->by_type.logout.whami, 
+		       packet->by_type.logout.rbox_whami);
+		el_print_timestamp(&packet->by_type.logout.timestamp);
+		printk("%s  EXC_ADDR: %016llx\n"
+		         "  HALT_CODE: %llx\n",
+		       err_print_prefix,
+		       packet->by_type.logout.exc_addr,
+		       packet->by_type.logout.halt_code);
+		el_process_subpackets(header,
+                                      packet->by_type.logout.subpacket_count);
+		break;
+	default:
+		printk("%s  ** PAL TYPE %d SUBPACKET\n", 
+		       err_print_prefix,
+		       header->type);
+		el_annotate_subpacket(header);
+		break;
+	}
+	
+	return (struct el_subpacket *)((unsigned long)header + header->length);
+}
+
+struct el_subpacket_handler ev7_pal_subpacket_handler =
+	SUBPACKET_HANDLER_INIT(EL_CLASS__PAL, ev7_process_pal_subpacket);
+
+void __init
+ev7_register_error_handlers(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(el_ev7_pal_annotations); i++)
+		cdl_register_subpacket_annotation(&el_ev7_pal_annotations[i]);
+
+	cdl_register_subpacket_handler(&ev7_pal_subpacket_handler);
+}
+
diff --git a/arch/alpha/kernel/err_impl.h b/arch/alpha/kernel/err_impl.h
new file mode 100644
index 0000000000..737b958a58
--- /dev/null
+++ b/arch/alpha/kernel/err_impl.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	linux/arch/alpha/kernel/err_impl.h
+ *
+ *	Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation)
+ *
+ *	Contains declarations and macros to support Alpha error handling
+ * 	implementations.
+ */
+
+#include <asm/mce.h>
+
+union el_timestamp;
+struct el_subpacket;
+struct ev7_lf_subpackets;
+
+struct el_subpacket_annotation {
+	struct el_subpacket_annotation *next;
+	u16 class;
+	u16 type;
+	u16 revision;
+	char *description;
+	char **annotation;
+};
+#define SUBPACKET_ANNOTATION(c, t, r, d, a) {NULL, (c), (t), (r), (d), (a)}
+
+struct el_subpacket_handler {
+	struct el_subpacket_handler *next;
+	u16 class;
+	struct el_subpacket *(*handler)(struct el_subpacket *);
+};
+#define SUBPACKET_HANDLER_INIT(c, h) {NULL, (c), (h)}
+
+/*
+ * Manipulate a field from a register given it's name. defines
+ * for the LSB (__S - shift count) and bitmask (__M) are required
+ *
+ * EXTRACT(u, f) - extracts the field and places it at bit position 0
+ * GEN_MASK(f) - creates an in-position mask for the field
+ */
+#define EXTRACT(u, f) (((u) >> f##__S) & f##__M)
+#define GEN_MASK(f) ((u64)f##__M << f##__S)
+
+/*
+ * err_common.c
+ */
+extern char *err_print_prefix;
+
+extern void mchk_dump_mem(void *, size_t, char **);
+extern void mchk_dump_logout_frame(struct el_common *);
+extern void el_print_timestamp(union el_timestamp *);
+extern void el_process_subpackets(struct el_subpacket *, int);
+extern struct el_subpacket *el_process_subpacket(struct el_subpacket *);
+extern void el_annotate_subpacket(struct el_subpacket *);
+extern void cdl_check_console_data_log(void);
+extern int cdl_register_subpacket_annotation(struct el_subpacket_annotation *);
+extern int cdl_register_subpacket_handler(struct el_subpacket_handler *);
+
+/*
+ * err_ev7.c
+ */
+extern struct ev7_lf_subpackets *
+ev7_collect_logout_frame_subpackets(struct el_subpacket *,
+				    struct ev7_lf_subpackets *);
+extern void ev7_register_error_handlers(void);
+extern void ev7_machine_check(unsigned long, unsigned long);
+
+/*
+ * err_ev6.c
+ */
+extern void ev6_register_error_handlers(void);
+extern int ev6_process_logout_frame(struct el_common *, int);
+extern void ev6_machine_check(unsigned long, unsigned long);
+
+/*
+ * err_marvel.c
+ */
+extern void marvel_machine_check(unsigned long, unsigned long);
+extern void marvel_register_error_handlers(void);
+
+/*
+ * err_titan.c
+ */
+extern int titan_process_logout_frame(struct el_common *, int);
+extern void titan_machine_check(unsigned long, unsigned long);
+extern void titan_register_error_handlers(void);
+extern int privateer_process_logout_frame(struct el_common *, int);
+extern void privateer_machine_check(unsigned long, unsigned long);
diff --git a/arch/alpha/kernel/err_marvel.c b/arch/alpha/kernel/err_marvel.c
new file mode 100644
index 0000000000..c0c0ccefc4
--- /dev/null
+++ b/arch/alpha/kernel/err_marvel.c
@@ -0,0 +1,1161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/err_marvel.c
+ *
+ *	Copyright (C) 2001 Jeff Wiedemeier (Compaq Computer Corporation)
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/console.h>
+#include <asm/core_marvel.h>
+#include <asm/hwrpb.h>
+#include <asm/smp.h>
+#include <asm/err_common.h>
+#include <asm/err_ev7.h>
+
+#include "err_impl.h"
+#include "proto.h"
+
+static void
+marvel_print_680_frame(struct ev7_lf_subpackets *lf_subpackets)
+{
+#ifdef CONFIG_VERBOSE_MCHECK
+	struct ev7_pal_environmental_subpacket *env;
+	struct { int type; char *name; } ev_packets[] = {
+		{ EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE,
+		  "Ambient Temperature" },
+		{ EL_TYPE__PAL__ENV__AIRMOVER_FAN,
+		  "AirMover / Fan" },
+		{ EL_TYPE__PAL__ENV__VOLTAGE,
+		  "Voltage" },
+		{ EL_TYPE__PAL__ENV__INTRUSION,
+		  "Intrusion" },
+		{ EL_TYPE__PAL__ENV__POWER_SUPPLY,
+		  "Power Supply" },
+		{ EL_TYPE__PAL__ENV__LAN,
+		  "LAN" },
+		{ EL_TYPE__PAL__ENV__HOT_PLUG,
+		  "Hot Plug" },
+		{ 0, NULL }
+	};
+	int i;
+
+	for (i = 0; ev_packets[i].type != 0; i++) {
+		env = lf_subpackets->env[ev7_lf_env_index(ev_packets[i].type)];
+		if (!env)
+			continue;
+
+		printk("%s**%s event (cabinet %d, drawer %d)\n",
+		       err_print_prefix,
+		       ev_packets[i].name,
+		       env->cabinet,
+		       env->drawer);
+		printk("%s   Module Type: 0x%x - Unit ID 0x%x - "
+		       "Condition 0x%x\n",
+		       err_print_prefix,
+		       env->module_type,
+		       env->unit_id,
+		       env->condition);
+	}
+#endif /* CONFIG_VERBOSE_MCHECK */
+}
+
+static int
+marvel_process_680_frame(struct ev7_lf_subpackets *lf_subpackets, int print)
+{
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+	int i;
+
+	for (i = ev7_lf_env_index(EL_TYPE__PAL__ENV__AMBIENT_TEMPERATURE);
+	     i <= ev7_lf_env_index(EL_TYPE__PAL__ENV__HOT_PLUG);
+	     i++) {
+		if (lf_subpackets->env[i])
+			status = MCHK_DISPOSITION_REPORT;
+	}
+
+	if (print)
+		marvel_print_680_frame(lf_subpackets);
+
+	return status;
+}
+
+#ifdef CONFIG_VERBOSE_MCHECK
+
+static void
+marvel_print_err_cyc(u64 err_cyc)
+{
+	static char *packet_desc[] = {
+		"No Error",
+		"UNKNOWN",
+		"1 cycle (1 or 2 flit packet)",
+		"2 cycles (3 flit packet)",
+		"9 cycles (18 flit packet)",
+		"10 cycles (19 flit packet)",
+		"UNKNOWN",
+		"UNKNOWN",
+		"UNKNOWN"
+	};
+
+#define IO7__ERR_CYC__ODD_FLT	(1UL <<  0)
+#define IO7__ERR_CYC__EVN_FLT	(1UL <<  1)
+#define IO7__ERR_CYC__PACKET__S	(6)
+#define IO7__ERR_CYC__PACKET__M	(0x7)
+#define IO7__ERR_CYC__LOC	(1UL <<  5)
+#define IO7__ERR_CYC__CYCLE__S	(2)
+#define IO7__ERR_CYC__CYCLE__M	(0x7)
+
+	printk("%s        Packet In Error: %s\n"
+	       "%s        Error in %s, cycle %lld%s%s\n",
+	       err_print_prefix, 
+	       packet_desc[EXTRACT(err_cyc, IO7__ERR_CYC__PACKET)],
+	       err_print_prefix,
+	       (err_cyc & IO7__ERR_CYC__LOC) ? "DATA" : "HEADER",
+	       EXTRACT(err_cyc, IO7__ERR_CYC__CYCLE),
+	       (err_cyc & IO7__ERR_CYC__ODD_FLT) ? " [ODD Flit]": "",
+	       (err_cyc & IO7__ERR_CYC__EVN_FLT) ? " [Even Flit]": "");
+}
+
+static void
+marvel_print_po7_crrct_sym(u64 crrct_sym)
+{
+#define IO7__PO7_CRRCT_SYM__SYN__S	(0)
+#define IO7__PO7_CRRCT_SYM__SYN__M	(0x7f)
+#define IO7__PO7_CRRCT_SYM__ERR_CYC__S	(7)   /* ERR_CYC + ODD_FLT + EVN_FLT */
+#define IO7__PO7_CRRCT_SYM__ERR_CYC__M	(0x1ff)
+
+
+	printk("%s      Correctable Error Symptoms:\n"
+	       "%s        Syndrome: 0x%llx\n",
+	       err_print_prefix,
+	       err_print_prefix, EXTRACT(crrct_sym, IO7__PO7_CRRCT_SYM__SYN));
+	marvel_print_err_cyc(EXTRACT(crrct_sym, IO7__PO7_CRRCT_SYM__ERR_CYC));
+}
+
+static void
+marvel_print_po7_uncrr_sym(u64 uncrr_sym, u64 valid_mask)
+{
+	static char *clk_names[] = { "_h[0]", "_h[1]", "_n[0]", "_n[1]" };
+	static char *clk_decode[] = {
+		"No Error",
+		"One extra rising edge",
+		"Two extra rising edges",
+		"Lost one clock"
+	};
+	static char *port_names[] = { "Port 0", 	"Port 1", 
+				      "Port 2", 	"Port 3",
+				      "Unknown Port",	"Unknown Port",
+				      "Unknown Port",	"Port 7" };
+	int scratch, i;
+
+#define IO7__PO7_UNCRR_SYM__SYN__S	    (0)
+#define IO7__PO7_UNCRR_SYM__SYN__M	    (0x7f)
+#define IO7__PO7_UNCRR_SYM__ERR_CYC__S	    (7)      /* ERR_CYC + ODD_FLT... */
+#define IO7__PO7_UNCRR_SYM__ERR_CYC__M	    (0x1ff)  /* ... + EVN_FLT        */
+#define IO7__PO7_UNCRR_SYM__CLK__S	    (16)
+#define IO7__PO7_UNCRR_SYM__CLK__M	    (0xff)
+#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__REQ (1UL << 24)
+#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__RIO (1UL << 25)
+#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__WIO (1UL << 26)
+#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__BLK (1UL << 27)
+#define IO7__PO7_UNCRR_SYM__CDT_OVF_TO__NBK (1UL << 28)
+#define IO7__PO7_UNCRR_SYM__OVF__READIO	    (1UL << 29)
+#define IO7__PO7_UNCRR_SYM__OVF__WRITEIO    (1UL << 30)
+#define IO7__PO7_UNCRR_SYM__OVF__FWD        (1UL << 31)
+#define IO7__PO7_UNCRR_SYM__VICTIM_SP__S    (32)
+#define IO7__PO7_UNCRR_SYM__VICTIM_SP__M    (0xff)
+#define IO7__PO7_UNCRR_SYM__DETECT_SP__S    (40)
+#define IO7__PO7_UNCRR_SYM__DETECT_SP__M    (0xff)
+#define IO7__PO7_UNCRR_SYM__STRV_VTR__S     (48)
+#define IO7__PO7_UNCRR_SYM__STRV_VTR__M     (0x3ff)
+
+#define IO7__STRV_VTR__LSI__INTX__S	    (0)
+#define IO7__STRV_VTR__LSI__INTX__M	    (0x3)
+#define IO7__STRV_VTR__LSI__SLOT__S	    (2)
+#define IO7__STRV_VTR__LSI__SLOT__M	    (0x7)
+#define IO7__STRV_VTR__LSI__BUS__S	    (5)
+#define IO7__STRV_VTR__LSI__BUS__M	    (0x3)
+#define IO7__STRV_VTR__MSI__INTNUM__S	    (0)
+#define IO7__STRV_VTR__MSI__INTNUM__M	    (0x1ff)
+#define IO7__STRV_VTR__IS_MSI		    (1UL << 9)
+
+	printk("%s      Uncorrectable Error Symptoms:\n", err_print_prefix);
+	uncrr_sym &= valid_mask;
+
+	if (EXTRACT(valid_mask, IO7__PO7_UNCRR_SYM__SYN))
+		printk("%s        Syndrome: 0x%llx\n",
+		       err_print_prefix, 
+		       EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__SYN));
+
+	if (EXTRACT(valid_mask, IO7__PO7_UNCRR_SYM__ERR_CYC))
+		marvel_print_err_cyc(EXTRACT(uncrr_sym, 
+					     IO7__PO7_UNCRR_SYM__ERR_CYC));
+
+	scratch = EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__CLK);
+	for (i = 0; i < 4; i++, scratch >>= 2) {
+		if (scratch & 0x3)
+			printk("%s        Clock %s: %s\n",
+			       err_print_prefix,
+			       clk_names[i], clk_decode[scratch & 0x3]);
+	}
+
+	if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__REQ) 
+		printk("%s       REQ Credit Timeout or Overflow\n",
+		       err_print_prefix);
+	if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__RIO) 
+		printk("%s       RIO Credit Timeout or Overflow\n",
+		       err_print_prefix);
+	if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__WIO) 
+		printk("%s       WIO Credit Timeout or Overflow\n",
+		       err_print_prefix);
+	if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__BLK) 
+		printk("%s       BLK Credit Timeout or Overflow\n",
+		       err_print_prefix);
+	if (uncrr_sym & IO7__PO7_UNCRR_SYM__CDT_OVF_TO__NBK) 
+		printk("%s       NBK Credit Timeout or Overflow\n",
+		       err_print_prefix);
+
+	if (uncrr_sym & IO7__PO7_UNCRR_SYM__OVF__READIO) 
+		printk("%s       Read I/O Buffer Overflow\n", 
+		       err_print_prefix);
+	if (uncrr_sym & IO7__PO7_UNCRR_SYM__OVF__WRITEIO) 
+		printk("%s       Write I/O Buffer Overflow\n", 
+		       err_print_prefix);
+	if (uncrr_sym & IO7__PO7_UNCRR_SYM__OVF__FWD) 
+		printk("%s       FWD Buffer Overflow\n", 
+		       err_print_prefix);
+
+	if ((scratch = EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__VICTIM_SP))) {
+		int lost = scratch & (1UL << 4);
+		scratch &= ~lost;
+		for (i = 0; i < 8; i++, scratch >>= 1) {
+			if (!(scratch & 1))
+				continue;
+			printk("%s        Error Response sent to %s",
+			       err_print_prefix, port_names[i]);
+		}
+		if (lost)
+			printk("%s        Lost Error sent somewhere else\n",
+			       err_print_prefix);
+	}
+	
+	if ((scratch = EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__DETECT_SP))) {
+		for (i = 0; i < 8; i++, scratch >>= 1) {
+			if (!(scratch & 1))
+				continue;
+			printk("%s        Error Reported by %s",
+			       err_print_prefix, port_names[i]);
+		}
+	}
+
+	if (EXTRACT(valid_mask, IO7__PO7_UNCRR_SYM__STRV_VTR)) {
+		char starvation_message[80];
+
+		scratch = EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__STRV_VTR);
+		if (scratch & IO7__STRV_VTR__IS_MSI) 
+			sprintf(starvation_message, 
+				"MSI Interrupt 0x%x",
+				EXTRACT(scratch, IO7__STRV_VTR__MSI__INTNUM));
+		else
+			sprintf(starvation_message,
+				"LSI INT%c for Bus:Slot (%d:%d)\n",
+				'A' + EXTRACT(scratch, 
+					      IO7__STRV_VTR__LSI__INTX),
+				EXTRACT(scratch, IO7__STRV_VTR__LSI__BUS),
+				EXTRACT(scratch, IO7__STRV_VTR__LSI__SLOT));
+
+		printk("%s        Starvation Int Trigger By: %s\n",
+		       err_print_prefix, starvation_message);
+	}
+}
+
+static void
+marvel_print_po7_ugbge_sym(u64 ugbge_sym)
+{
+	char opcode_str[10];
+
+#define IO7__PO7_UGBGE_SYM__UPH_PKT_OFF__S	(6)
+#define IO7__PO7_UGBGE_SYM__UPH_PKT_OFF__M	(0xfffffffful)
+#define IO7__PO7_UGBGE_SYM__UPH_OPCODE__S	(40)
+#define IO7__PO7_UGBGE_SYM__UPH_OPCODE__M	(0xff)
+#define IO7__PO7_UGBGE_SYM__UPH_SRC_PORT__S	(48)
+#define IO7__PO7_UGBGE_SYM__UPH_SRC_PORT__M	(0xf)
+#define IO7__PO7_UGBGE_SYM__UPH_DEST_PID__S	(52)
+#define IO7__PO7_UGBGE_SYM__UPH_DEST_PID__M	(0x7ff)
+#define IO7__PO7_UGBGE_SYM__VALID		(1UL << 63)
+
+	if (!(ugbge_sym & IO7__PO7_UGBGE_SYM__VALID))
+		return;
+
+	switch(EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE)) {
+	case 0x51:
+		sprintf(opcode_str, "Wr32");
+		break;
+	case 0x50:
+		sprintf(opcode_str, "WrQW");
+		break;
+	case 0x54:
+		sprintf(opcode_str, "WrIPR");
+		break;
+	case 0xD8:
+		sprintf(opcode_str, "Victim");
+		break;
+	case 0xC5:
+		sprintf(opcode_str, "BlkIO");
+		break;
+	default:
+		sprintf(opcode_str, "0x%llx\n",
+			EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE));
+		break;
+	}
+
+	printk("%s      Up Hose Garbage Symptom:\n"
+	       "%s        Source Port: %lld - Dest PID: %lld - OpCode: %s\n",
+	       err_print_prefix,
+	       err_print_prefix, 
+	       EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_SRC_PORT),
+	       EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_DEST_PID),
+	       opcode_str);
+
+	if (0xC5 != EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE))
+		printk("%s        Packet Offset 0x%08llx\n",
+		       err_print_prefix,
+		       EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_PKT_OFF));
+}
+
+static void
+marvel_print_po7_err_sum(struct ev7_pal_io_subpacket *io)
+{
+	u64	uncrr_sym_valid = 0;
+
+#define IO7__PO7_ERRSUM__CR_SBE		(1UL << 32)
+#define IO7__PO7_ERRSUM__CR_SBE2	(1UL << 33)
+#define IO7__PO7_ERRSUM__CR_PIO_WBYTE	(1UL << 34)
+#define IO7__PO7_ERRSUM__CR_CSR_NXM	(1UL << 35)
+#define IO7__PO7_ERRSUM__CR_RPID_ACV	(1UL << 36)
+#define IO7__PO7_ERRSUM__CR_RSP_NXM	(1UL << 37)
+#define IO7__PO7_ERRSUM__CR_ERR_RESP	(1UL << 38)
+#define IO7__PO7_ERRSUM__CR_CLK_DERR	(1UL << 39)
+#define IO7__PO7_ERRSUM__CR_DAT_DBE	(1UL << 40)
+#define IO7__PO7_ERRSUM__CR_DAT_GRBG	(1UL << 41)
+#define IO7__PO7_ERRSUM__MAF_TO		(1UL << 42)
+#define IO7__PO7_ERRSUM__UGBGE		(1UL << 43)
+#define IO7__PO7_ERRSUM__UN_MAF_LOST	(1UL << 44)
+#define IO7__PO7_ERRSUM__UN_PKT_OVF	(1UL << 45)
+#define IO7__PO7_ERRSUM__UN_CDT_OVF	(1UL << 46)
+#define IO7__PO7_ERRSUM__UN_DEALLOC	(1UL << 47)
+#define IO7__PO7_ERRSUM__BH_CDT_TO	(1UL << 51)
+#define IO7__PO7_ERRSUM__BH_CLK_HDR	(1UL << 52)
+#define IO7__PO7_ERRSUM__BH_DBE_HDR	(1UL << 53)
+#define IO7__PO7_ERRSUM__BH_GBG_HDR	(1UL << 54)
+#define IO7__PO7_ERRSUM__BH_BAD_CMD	(1UL << 55)
+#define IO7__PO7_ERRSUM__HLT_INT	(1UL << 56)
+#define IO7__PO7_ERRSUM__HP_INT		(1UL << 57)
+#define IO7__PO7_ERRSUM__CRD_INT	(1UL << 58)
+#define IO7__PO7_ERRSUM__STV_INT	(1UL << 59)
+#define IO7__PO7_ERRSUM__HRD_INT	(1UL << 60)
+#define IO7__PO7_ERRSUM__BH_SUM		(1UL << 61)
+#define IO7__PO7_ERRSUM__ERR_LST	(1UL << 62)
+#define IO7__PO7_ERRSUM__ERR_VALID	(1UL << 63)
+
+#define IO7__PO7_ERRSUM__ERR_MASK	(IO7__PO7_ERRSUM__ERR_VALID |	\
+					 IO7__PO7_ERRSUM__CR_SBE)
+
+	/*
+	 * Single bit errors aren't covered by ERR_VALID.
+	 */
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_SBE) {
+		printk("%s    %sSingle Bit Error(s) detected/corrected\n",
+		       err_print_prefix,
+		       (io->po7_error_sum & IO7__PO7_ERRSUM__CR_SBE2) 
+		       ? "Multiple " : "");
+		marvel_print_po7_crrct_sym(io->po7_crrct_sym);
+	}
+
+	/*
+	 * Neither are the interrupt status bits
+	 */
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__HLT_INT)
+		printk("%s    Halt Interrupt posted", err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__HP_INT) {
+		printk("%s    Hot Plug Event Interrupt posted", 
+		       err_print_prefix);
+		uncrr_sym_valid |= GEN_MASK(IO7__PO7_UNCRR_SYM__DETECT_SP);
+	}
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CRD_INT)
+		printk("%s    Correctable Error Interrupt posted", 
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__STV_INT) {
+		printk("%s    Starvation Interrupt posted", err_print_prefix);
+		uncrr_sym_valid |= GEN_MASK(IO7__PO7_UNCRR_SYM__STRV_VTR);
+	}
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__HRD_INT) {
+		printk("%s    Hard Error Interrupt posted", err_print_prefix);
+		uncrr_sym_valid |= GEN_MASK(IO7__PO7_UNCRR_SYM__DETECT_SP);
+	}
+
+	/*
+	 * Everything else is valid only with ERR_VALID, so skip to the end
+	 * (uncrr_sym check) unless ERR_VALID is set.
+	 */
+	if (!(io->po7_error_sum & IO7__PO7_ERRSUM__ERR_VALID)) 
+		goto check_uncrr_sym;
+
+	/*
+	 * Since ERR_VALID is set, VICTIM_SP in uncrr_sym is valid.
+	 * For bits [29:0] to also be valid, the following bits must
+	 * not be set:
+	 *	CR_PIO_WBYTE	CR_CSR_NXM	CR_RSP_NXM
+	 *	CR_ERR_RESP	MAF_TO
+	 */
+	uncrr_sym_valid |= GEN_MASK(IO7__PO7_UNCRR_SYM__VICTIM_SP);
+	if (!(io->po7_error_sum & (IO7__PO7_ERRSUM__CR_PIO_WBYTE |
+				   IO7__PO7_ERRSUM__CR_CSR_NXM |
+				   IO7__PO7_ERRSUM__CR_RSP_NXM |
+				   IO7__PO7_ERRSUM__CR_ERR_RESP |
+				   IO7__PO7_ERRSUM__MAF_TO)))
+		uncrr_sym_valid |= 0x3ffffffful;
+
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_PIO_WBYTE)
+		printk("%s    Write byte into IO7 CSR\n", err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_CSR_NXM)
+		printk("%s    PIO to non-existent CSR\n", err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_RPID_ACV)
+		printk("%s    Bus Requester PID (Access Violation)\n",
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_RSP_NXM)
+		printk("%s    Received NXM response from EV7\n",
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_ERR_RESP)
+		printk("%s    Received ERROR RESPONSE\n", err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_CLK_DERR)
+		printk("%s    Clock error on data flit\n", err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_DAT_DBE)
+		printk("%s    Double Bit Error Data Error Detected\n",
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__CR_DAT_GRBG)
+		printk("%s    Garbage Encoding Detected on the data\n",
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__UGBGE) {
+		printk("%s    Garbage Encoding sent up hose\n",
+		       err_print_prefix);
+		marvel_print_po7_ugbge_sym(io->po7_ugbge_sym);
+	}
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__UN_MAF_LOST)
+		printk("%s    Orphan response (unexpected response)\n",
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__UN_PKT_OVF)
+		printk("%s    Down hose packet overflow\n", err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__UN_CDT_OVF)
+		printk("%s    Down hose credit overflow\n", err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__UN_DEALLOC)
+		printk("%s    Unexpected or bad dealloc field\n",
+		       err_print_prefix);
+
+	/*
+	 * The black hole events.
+	 */
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__MAF_TO)
+		printk("%s    BLACK HOLE: Timeout for all responses\n",
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_CDT_TO)
+		printk("%s    BLACK HOLE: Credit Timeout\n", err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_CLK_HDR)
+		printk("%s    BLACK HOLE: Clock check on header\n", 
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_DBE_HDR)
+		printk("%s    BLACK HOLE: Uncorrectable Error on header\n",
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_GBG_HDR)
+		printk("%s    BLACK HOLE: Garbage on header\n", 
+		       err_print_prefix);
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__BH_BAD_CMD)
+		printk("%s    BLACK HOLE: Bad EV7 command\n", 
+		       err_print_prefix);
+
+	if (io->po7_error_sum & IO7__PO7_ERRSUM__ERR_LST) 
+		printk("%s    Lost Error\n", err_print_prefix);
+
+	printk("%s    Failing Packet:\n"
+	       "%s      Cycle 1: %016llx\n"
+	       "%s      Cycle 2: %016llx\n",
+	       err_print_prefix,
+	       err_print_prefix, io->po7_err_pkt0,
+	       err_print_prefix, io->po7_err_pkt1);
+	/*
+	 * If there are any valid bits in UNCRR sym for this err, 
+	 * print UNCRR_SYM as well.
+	 */
+check_uncrr_sym:
+	if (uncrr_sym_valid)
+		marvel_print_po7_uncrr_sym(io->po7_uncrr_sym, uncrr_sym_valid);
+}
+
+static void
+marvel_print_pox_tlb_err(u64 tlb_err)
+{
+	static char *tlb_errors[] = {
+		"No Error",
+		"North Port Signaled Error fetching TLB entry",
+		"PTE invalid or UCC or GBG error on this entry",
+		"Address did not hit any DMA window"
+	};
+
+#define IO7__POX_TLBERR__ERR_VALID		(1UL << 63)
+#define IO7__POX_TLBERR__ERRCODE__S		(0)
+#define IO7__POX_TLBERR__ERRCODE__M		(0x3)
+#define IO7__POX_TLBERR__ERR_TLB_PTR__S		(3)
+#define IO7__POX_TLBERR__ERR_TLB_PTR__M		(0x7)
+#define IO7__POX_TLBERR__FADDR__S		(6)
+#define IO7__POX_TLBERR__FADDR__M		(0x3fffffffffful)
+
+	if (!(tlb_err & IO7__POX_TLBERR__ERR_VALID))
+		return;
+
+	printk("%s      TLB Error on index 0x%llx:\n"
+	       "%s        - %s\n"
+	       "%s        - Addr: 0x%016llx\n",
+	       err_print_prefix,
+	       EXTRACT(tlb_err, IO7__POX_TLBERR__ERR_TLB_PTR),
+	       err_print_prefix,
+	       tlb_errors[EXTRACT(tlb_err, IO7__POX_TLBERR__ERRCODE)],
+	       err_print_prefix,
+	       EXTRACT(tlb_err, IO7__POX_TLBERR__FADDR) << 6);
+}
+
+static  void
+marvel_print_pox_spl_cmplt(u64 spl_cmplt)
+{
+	char message[80];
+
+#define IO7__POX_SPLCMPLT__MESSAGE__S		(0)
+#define IO7__POX_SPLCMPLT__MESSAGE__M		(0x0fffffffful)
+#define IO7__POX_SPLCMPLT__SOURCE_BUS__S	(40)
+#define IO7__POX_SPLCMPLT__SOURCE_BUS__M	(0xfful)
+#define IO7__POX_SPLCMPLT__SOURCE_DEV__S	(35)
+#define IO7__POX_SPLCMPLT__SOURCE_DEV__M	(0x1ful)
+#define IO7__POX_SPLCMPLT__SOURCE_FUNC__S	(32)
+#define IO7__POX_SPLCMPLT__SOURCE_FUNC__M	(0x07ul)
+
+#define IO7__POX_SPLCMPLT__MSG_CLASS__S		(28)
+#define IO7__POX_SPLCMPLT__MSG_CLASS__M		(0xf)
+#define IO7__POX_SPLCMPLT__MSG_INDEX__S		(20)
+#define IO7__POX_SPLCMPLT__MSG_INDEX__M		(0xff)
+#define IO7__POX_SPLCMPLT__MSG_CLASSINDEX__S	(20)
+#define IO7__POX_SPLCMPLT__MSG_CLASSINDEX__M    (0xfff)
+#define IO7__POX_SPLCMPLT__REM_LOWER_ADDR__S	(12)
+#define IO7__POX_SPLCMPLT__REM_LOWER_ADDR__M	(0x7f)
+#define IO7__POX_SPLCMPLT__REM_BYTE_COUNT__S	(0)
+#define IO7__POX_SPLCMPLT__REM_BYTE_COUNT__M	(0xfff)
+
+	printk("%s      Split Completion Error:\n"	
+	       "%s         Source (Bus:Dev:Func): %lld:%lld:%lld\n",
+	       err_print_prefix,
+	       err_print_prefix,
+	       EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__SOURCE_BUS),
+	       EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__SOURCE_DEV),
+	       EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__SOURCE_FUNC));
+
+	switch(EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__MSG_CLASSINDEX)) {
+	case 0x000:
+		sprintf(message, "Normal completion");
+		break;
+	case 0x100:
+		sprintf(message, "Bridge - Master Abort");
+		break;
+	case 0x101:
+		sprintf(message, "Bridge - Target Abort");
+		break;
+	case 0x102:
+		sprintf(message, "Bridge - Uncorrectable Write Data Error");
+		break;
+	case 0x200:
+		sprintf(message, "Byte Count Out of Range");
+		break;
+	case 0x201:
+		sprintf(message, "Uncorrectable Split Write Data Error");
+		break;
+	default:
+		sprintf(message, "%08llx\n",
+			EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__MESSAGE));
+		break;
+	}
+	printk("%s	   Message: %s\n", err_print_prefix, message);
+}
+
+static void
+marvel_print_pox_trans_sum(u64 trans_sum)
+{
+	static const char * const pcix_cmd[] = {
+		"Interrupt Acknowledge",
+		"Special Cycle",
+		"I/O Read",
+		"I/O Write",
+		"Reserved",
+		"Reserved / Device ID Message",
+		"Memory Read",
+		"Memory Write",
+		"Reserved / Alias to Memory Read Block",
+		"Reserved / Alias to Memory Write Block",
+		"Configuration Read",
+		"Configuration Write",
+		"Memory Read Multiple / Split Completion",
+		"Dual Address Cycle",
+		"Memory Read Line / Memory Read Block",
+		"Memory Write and Invalidate / Memory Write Block"
+	};
+
+#define IO7__POX_TRANSUM__PCI_ADDR__S		(0)
+#define IO7__POX_TRANSUM__PCI_ADDR__M		(0x3fffffffffffful)
+#define IO7__POX_TRANSUM__DAC			(1UL << 50)
+#define IO7__POX_TRANSUM__PCIX_MASTER_SLOT__S	(52)
+#define IO7__POX_TRANSUM__PCIX_MASTER_SLOT__M	(0xf)
+#define IO7__POX_TRANSUM__PCIX_CMD__S		(56)
+#define IO7__POX_TRANSUM__PCIX_CMD__M		(0xf)
+#define IO7__POX_TRANSUM__ERR_VALID		(1UL << 63)
+
+	if (!(trans_sum & IO7__POX_TRANSUM__ERR_VALID))
+		return;
+
+	printk("%s      Transaction Summary:\n"
+	       "%s        Command: 0x%llx - %s\n"
+	       "%s        Address: 0x%016llx%s\n"
+	       "%s        PCI-X Master Slot: 0x%llx\n",
+	       err_print_prefix, 
+	       err_print_prefix, 
+	       EXTRACT(trans_sum, IO7__POX_TRANSUM__PCIX_CMD),
+	       pcix_cmd[EXTRACT(trans_sum, IO7__POX_TRANSUM__PCIX_CMD)],
+	       err_print_prefix,
+	       EXTRACT(trans_sum, IO7__POX_TRANSUM__PCI_ADDR),
+	       (trans_sum & IO7__POX_TRANSUM__DAC) ? " (DAC)" : "",
+	       err_print_prefix,
+	       EXTRACT(trans_sum, IO7__POX_TRANSUM__PCIX_MASTER_SLOT));
+}
+
+static void
+marvel_print_pox_err(u64 err_sum, struct ev7_pal_io_one_port *port)
+{
+#define IO7__POX_ERRSUM__AGP_REQQ_OVFL    (1UL <<  4)
+#define IO7__POX_ERRSUM__AGP_SYNC_ERR     (1UL <<  5)
+#define IO7__POX_ERRSUM__MRETRY_TO        (1UL <<  6)
+#define IO7__POX_ERRSUM__PCIX_UX_SPL      (1UL <<  7)
+#define IO7__POX_ERRSUM__PCIX_SPLIT_TO    (1UL <<  8)
+#define IO7__POX_ERRSUM__PCIX_DISCARD_SPL (1UL <<  9)
+#define IO7__POX_ERRSUM__DMA_RD_TO        (1UL << 10)
+#define IO7__POX_ERRSUM__CSR_NXM_RD       (1UL << 11)
+#define IO7__POX_ERRSUM__CSR_NXM_WR       (1UL << 12)
+#define IO7__POX_ERRSUM__DMA_TO           (1UL << 13)
+#define IO7__POX_ERRSUM__ALL_MABORTS      (1UL << 14)
+#define IO7__POX_ERRSUM__MABORT		  (1UL << 15)
+#define IO7__POX_ERRSUM__MABORT_MASK	  (IO7__POX_ERRSUM__ALL_MABORTS|\
+					   IO7__POX_ERRSUM__MABORT)
+#define IO7__POX_ERRSUM__PT_TABORT        (1UL << 16)
+#define IO7__POX_ERRSUM__PM_TABORT        (1UL << 17)
+#define IO7__POX_ERRSUM__TABORT_MASK      (IO7__POX_ERRSUM__PT_TABORT | \
+                                           IO7__POX_ERRSUM__PM_TABORT)
+#define IO7__POX_ERRSUM__SERR             (1UL << 18)
+#define IO7__POX_ERRSUM__ADDRERR_STB      (1UL << 19)
+#define IO7__POX_ERRSUM__DETECTED_SERR    (1UL << 20)
+#define IO7__POX_ERRSUM__PERR             (1UL << 21)
+#define IO7__POX_ERRSUM__DATAERR_STB_NIOW (1UL << 22)
+#define IO7__POX_ERRSUM__DETECTED_PERR    (1UL << 23)
+#define IO7__POX_ERRSUM__PM_PERR          (1UL << 24)
+#define IO7__POX_ERRSUM__PT_SCERROR       (1UL << 26)
+#define IO7__POX_ERRSUM__HUNG_BUS         (1UL << 28)
+#define IO7__POX_ERRSUM__UPE_ERROR__S     (51)
+#define IO7__POX_ERRSUM__UPE_ERROR__M     (0xffUL)
+#define IO7__POX_ERRSUM__UPE_ERROR        GEN_MASK(IO7__POX_ERRSUM__UPE_ERROR)
+#define IO7__POX_ERRSUM__TLB_ERR          (1UL << 59)
+#define IO7__POX_ERRSUM__ERR_VALID        (1UL << 63)
+
+#define IO7__POX_ERRSUM__TRANS_SUM__MASK  (IO7__POX_ERRSUM__MRETRY_TO |       \
+					   IO7__POX_ERRSUM__PCIX_UX_SPL |     \
+					   IO7__POX_ERRSUM__PCIX_SPLIT_TO |   \
+					   IO7__POX_ERRSUM__DMA_TO |          \
+					   IO7__POX_ERRSUM__MABORT_MASK |     \
+					   IO7__POX_ERRSUM__TABORT_MASK |     \
+					   IO7__POX_ERRSUM__SERR |            \
+					   IO7__POX_ERRSUM__ADDRERR_STB |     \
+					   IO7__POX_ERRSUM__PERR |            \
+					   IO7__POX_ERRSUM__DATAERR_STB_NIOW |\
+					   IO7__POX_ERRSUM__DETECTED_PERR |   \
+					   IO7__POX_ERRSUM__PM_PERR |         \
+					   IO7__POX_ERRSUM__PT_SCERROR |      \
+					   IO7__POX_ERRSUM__UPE_ERROR)
+
+	if (!(err_sum & IO7__POX_ERRSUM__ERR_VALID))
+		return;
+
+	/*
+	 * First the transaction summary errors
+	 */
+	if (err_sum & IO7__POX_ERRSUM__MRETRY_TO)
+		printk("%s    IO7 Master Retry Timeout expired\n",
+		       err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__PCIX_UX_SPL)
+		printk("%s    Unexpected Split Completion\n",
+		       err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__PCIX_SPLIT_TO)
+		printk("%s    IO7 Split Completion Timeout expired\n",
+		       err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__DMA_TO)
+		printk("%s    Hung bus during DMA transaction\n",
+		       err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__MABORT_MASK)
+		printk("%s    Master Abort\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__PT_TABORT)
+		printk("%s    IO7 Asserted Target Abort\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__PM_TABORT)
+		printk("%s    IO7 Received Target Abort\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__ADDRERR_STB) {
+		printk("%s    Address or PCI-X Attribute Parity Error\n", 
+		       err_print_prefix);
+		if (err_sum & IO7__POX_ERRSUM__SERR)
+			printk("%s     IO7 Asserted SERR\n", err_print_prefix);
+	}
+	if (err_sum & IO7__POX_ERRSUM__PERR) {
+		if (err_sum & IO7__POX_ERRSUM__DATAERR_STB_NIOW)
+			printk("%s    IO7 Detected Data Parity Error\n",
+			       err_print_prefix);
+		else
+			printk("%s    Split Completion Response with "
+			       "Parity Error\n", err_print_prefix);
+	}
+	if (err_sum & IO7__POX_ERRSUM__DETECTED_PERR)
+		printk("%s    PERR detected\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__PM_PERR)
+		printk("%s    PERR while IO7 is master\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__PT_SCERROR) {
+		printk("%s    IO7 Received Split Completion Error message\n",
+		       err_print_prefix);
+		marvel_print_pox_spl_cmplt(port->pox_spl_cmplt);
+	}
+	if (err_sum & IO7__POX_ERRSUM__UPE_ERROR) {
+		unsigned int upe_error = EXTRACT(err_sum,
+						 IO7__POX_ERRSUM__UPE_ERROR);
+		int i;
+		static char *upe_errors[] = {
+			"Parity Error on MSI write data",
+			"MSI read (MSI window is write only",
+			"TLB - Invalid WR transaction",
+			"TLB - Invalid RD transaction",
+			"DMA - WR error (see north port)",
+			"DMA - RD error (see north port)",
+			"PPR - WR error (see north port)",
+			"PPR - RD error (see north port)"
+		};
+
+		printk("%s    UPE Error:\n", err_print_prefix);
+		for (i = 0; i < 8; i++) {
+			if (upe_error & (1 << i))
+				printk("%s      %s\n", err_print_prefix,
+				       upe_errors[i]);
+		}
+	}
+
+	/*
+	 * POx_TRANS_SUM, if appropriate.
+	 */
+	if (err_sum & IO7__POX_ERRSUM__TRANS_SUM__MASK) 
+		marvel_print_pox_trans_sum(port->pox_trans_sum);
+
+	/*
+	 * Then TLB_ERR.
+	 */
+	if (err_sum & IO7__POX_ERRSUM__TLB_ERR) {
+		printk("%s    TLB ERROR\n", err_print_prefix);
+		marvel_print_pox_tlb_err(port->pox_tlb_err);
+	}
+
+	/*
+	 * And the single bit status errors.
+	 */
+	if (err_sum & IO7__POX_ERRSUM__AGP_REQQ_OVFL)
+		printk("%s    AGP Request Queue Overflow\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__AGP_SYNC_ERR)
+		printk("%s    AGP Sync Error\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__PCIX_DISCARD_SPL)
+		printk("%s    Discarded split completion\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__DMA_RD_TO)
+		printk("%s    DMA Read Timeout\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__CSR_NXM_RD)
+		printk("%s    CSR NXM READ\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__CSR_NXM_WR)
+		printk("%s    CSR NXM WRITE\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__DETECTED_SERR)
+		printk("%s    SERR detected\n", err_print_prefix);
+	if (err_sum & IO7__POX_ERRSUM__HUNG_BUS)
+		printk("%s    HUNG BUS detected\n", err_print_prefix);
+}
+
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+static struct ev7_pal_io_subpacket *
+marvel_find_io7_with_error(struct ev7_lf_subpackets *lf_subpackets)
+{
+	struct ev7_pal_io_subpacket *io = lf_subpackets->io;
+	struct io7 *io7;
+	int i;
+
+	/*
+	 * Caller must provide the packet to fill
+	 */
+	if (!io)
+		return NULL;
+
+	/*
+	 * Fill the subpacket with the console's standard fill pattern
+	 */
+	memset(io, 0x55, sizeof(*io));
+
+	for (io7 = NULL; NULL != (io7 = marvel_next_io7(io7)); ) {
+		unsigned long err_sum = 0;
+
+		err_sum |= io7->csrs->PO7_ERROR_SUM.csr;
+		for (i = 0; i < IO7_NUM_PORTS; i++) {
+			if (!io7->ports[i].enabled)
+				continue;
+			err_sum |= io7->ports[i].csrs->POx_ERR_SUM.csr;
+		}
+
+		/*
+		 * Is there at least one error? 
+		 */
+		if (err_sum & (1UL << 63))
+			break;
+	}
+
+	/*
+	 * Did we find an IO7 with an error?
+	 */
+	if (!io7)
+		return NULL;
+
+	/*
+	 * We have an IO7 with an error. 
+	 *
+	 * Fill in the IO subpacket.
+	 */
+	io->io_asic_rev   = io7->csrs->IO_ASIC_REV.csr;
+	io->io_sys_rev    = io7->csrs->IO_SYS_REV.csr;
+	io->io7_uph       = io7->csrs->IO7_UPH.csr;
+	io->hpi_ctl       = io7->csrs->HPI_CTL.csr;
+	io->crd_ctl       = io7->csrs->CRD_CTL.csr;
+	io->hei_ctl       = io7->csrs->HEI_CTL.csr;
+	io->po7_error_sum = io7->csrs->PO7_ERROR_SUM.csr;
+	io->po7_uncrr_sym = io7->csrs->PO7_UNCRR_SYM.csr;
+	io->po7_crrct_sym = io7->csrs->PO7_CRRCT_SYM.csr;
+	io->po7_ugbge_sym = io7->csrs->PO7_UGBGE_SYM.csr;
+	io->po7_err_pkt0  = io7->csrs->PO7_ERR_PKT[0].csr;
+	io->po7_err_pkt1  = io7->csrs->PO7_ERR_PKT[1].csr;
+	
+	for (i = 0; i < IO7_NUM_PORTS; i++) {
+		io7_ioport_csrs *csrs = io7->ports[i].csrs;
+
+		if (!io7->ports[i].enabled)
+			continue;
+
+		io->ports[i].pox_err_sum   = csrs->POx_ERR_SUM.csr;
+		io->ports[i].pox_tlb_err   = csrs->POx_TLB_ERR.csr;
+		io->ports[i].pox_spl_cmplt = csrs->POx_SPL_COMPLT.csr;
+		io->ports[i].pox_trans_sum = csrs->POx_TRANS_SUM.csr;
+		io->ports[i].pox_first_err = csrs->POx_FIRST_ERR.csr;
+		io->ports[i].pox_mult_err  = csrs->POx_MULT_ERR.csr;
+		io->ports[i].pox_dm_source = csrs->POx_DM_SOURCE.csr;
+		io->ports[i].pox_dm_dest   = csrs->POx_DM_DEST.csr;
+		io->ports[i].pox_dm_size   = csrs->POx_DM_SIZE.csr;
+		io->ports[i].pox_dm_ctrl   = csrs->POx_DM_CTRL.csr;
+
+		/*
+		 * Ack this port's errors, if any. POx_ERR_SUM must be last.
+		 *
+		 * Most of the error registers get cleared and unlocked when
+		 * the associated bits in POx_ERR_SUM are cleared (by writing
+		 * 1). POx_TLB_ERR is an exception and must be explicitly 
+		 * cleared.
+		 */
+		csrs->POx_TLB_ERR.csr = io->ports[i].pox_tlb_err;
+		csrs->POx_ERR_SUM.csr =	io->ports[i].pox_err_sum;
+		mb();
+		csrs->POx_ERR_SUM.csr;		
+	}
+
+	/*
+	 * Ack any port 7 error(s).
+	 */
+	io7->csrs->PO7_ERROR_SUM.csr = io->po7_error_sum;
+	mb();
+	io7->csrs->PO7_ERROR_SUM.csr;
+	
+	/*
+	 * Correct the io7_pid.
+	 */
+	lf_subpackets->io_pid = io7->pe;
+
+	return io;
+}
+
+static int
+marvel_process_io_error(struct ev7_lf_subpackets *lf_subpackets, int print)
+{
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	struct ev7_pal_io_subpacket *io = lf_subpackets->io;
+	int i;
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+#define MARVEL_IO_ERR_VALID(x)  ((x) & (1UL << 63))
+
+	if (!lf_subpackets->logout || !lf_subpackets->io)
+		return status;
+
+	/*
+	 * The PALcode only builds an IO subpacket if there is a 
+	 * locally connected IO7. In the cases of
+	 *	1) a uniprocessor kernel
+	 *	2) an mp kernel before the local secondary has called in
+	 * error interrupts are all directed to the primary processor.
+	 * In that case, we may not have an IO subpacket at all and, event
+	 * if we do, it may not be the right now. 
+	 *
+	 * If the RBOX indicates an I/O error interrupt, make sure we have
+	 * the correct IO7 information. If we don't have an IO subpacket
+	 * or it's the wrong one, try to find the right one.
+	 *
+	 * RBOX I/O error interrupts are indicated by RBOX_INT<29> and
+	 * RBOX_INT<10>.
+	 */
+	if ((lf_subpackets->io->po7_error_sum & (1UL << 32)) ||
+	    ((lf_subpackets->io->po7_error_sum        |
+	      lf_subpackets->io->ports[0].pox_err_sum |
+	      lf_subpackets->io->ports[1].pox_err_sum |
+	      lf_subpackets->io->ports[2].pox_err_sum |
+	      lf_subpackets->io->ports[3].pox_err_sum) & (1UL << 63))) {
+		/*
+		 * Either we have no IO subpacket or no error is
+		 * indicated in the one we do have. Try find the
+		 * one with the error.
+		 */
+		if (!marvel_find_io7_with_error(lf_subpackets))
+			return status;
+	}
+
+	/*
+	 * We have an IO7 indicating an error - we're going to report it
+	 */
+	status = MCHK_DISPOSITION_REPORT;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+
+	if (!print)
+		return status;
+
+	printk("%s*Error occurred on IO7 at PID %u\n", 
+	       err_print_prefix, lf_subpackets->io_pid);
+
+	/*
+	 * Check port 7 first
+	 */
+	if (lf_subpackets->io->po7_error_sum & IO7__PO7_ERRSUM__ERR_MASK) {
+		marvel_print_po7_err_sum(io);
+
+#if 0
+		printk("%s  PORT 7 ERROR:\n"
+		       "%s    PO7_ERROR_SUM: %016llx\n"
+		       "%s    PO7_UNCRR_SYM: %016llx\n"
+		       "%s    PO7_CRRCT_SYM: %016llx\n"
+		       "%s    PO7_UGBGE_SYM: %016llx\n"
+		       "%s    PO7_ERR_PKT0:  %016llx\n"
+		       "%s    PO7_ERR_PKT1:  %016llx\n",
+		       err_print_prefix,
+		       err_print_prefix, io->po7_error_sum,
+		       err_print_prefix, io->po7_uncrr_sym,
+		       err_print_prefix, io->po7_crrct_sym,
+		       err_print_prefix, io->po7_ugbge_sym,
+		       err_print_prefix, io->po7_err_pkt0,
+		       err_print_prefix, io->po7_err_pkt1);
+#endif
+	}
+
+	/*
+	 * Then loop through the ports
+	 */
+	for (i = 0; i < IO7_NUM_PORTS; i++) {
+		if (!MARVEL_IO_ERR_VALID(io->ports[i].pox_err_sum))
+			continue;
+
+		printk("%s  PID %u PORT %d POx_ERR_SUM: %016llx\n",
+		       err_print_prefix, 
+		       lf_subpackets->io_pid, i, io->ports[i].pox_err_sum);
+		marvel_print_pox_err(io->ports[i].pox_err_sum, &io->ports[i]);
+
+		printk("%s  [ POx_FIRST_ERR: %016llx ]\n",
+		       err_print_prefix, io->ports[i].pox_first_err);
+		marvel_print_pox_err(io->ports[i].pox_first_err, 
+				     &io->ports[i]);
+
+	}
+
+
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+	return status;
+}
+
+static int
+marvel_process_logout_frame(struct ev7_lf_subpackets *lf_subpackets, int print)
+{
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+	/*
+	 * I/O error? 
+	 */
+#define EV7__RBOX_INT__IO_ERROR__MASK 0x20000400ul
+	if (lf_subpackets->logout &&
+	    (lf_subpackets->logout->rbox_int & 0x20000400ul))
+		status = marvel_process_io_error(lf_subpackets, print);
+
+	/*
+	 * Probing behind PCI-X bridges can cause machine checks on
+	 * Marvel when the probe is handled by the bridge as a split
+	 * completion transaction. The symptom is an ERROR_RESPONSE 
+	 * to a CONFIG address. Since these errors will happen in
+	 * normal operation, dismiss them.
+	 *
+	 * Dismiss if:
+	 *	C_STAT		= 0x14 		(Error Response)
+	 *	C_STS<3>	= 0    		(C_ADDR valid)
+	 *	C_ADDR<42>	= 1    		(I/O)
+	 *	C_ADDR<31:22>	= 111110xxb	(PCI Config space)
+	 */
+	if (lf_subpackets->ev7 &&
+	    (lf_subpackets->ev7->c_stat == 0x14) &&
+	    !(lf_subpackets->ev7->c_sts & 0x8) &&
+	    ((lf_subpackets->ev7->c_addr & 0x400ff000000ul) 
+	     == 0x400fe000000ul))
+		status = MCHK_DISPOSITION_DISMISS;
+
+	return status;
+}
+
+void
+marvel_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	struct el_subpacket *el_ptr = (struct el_subpacket *)la_ptr;
+	int (*process_frame)(struct ev7_lf_subpackets *, int) = NULL;
+	struct ev7_lf_subpackets subpacket_collection = { NULL, };
+	struct ev7_pal_io_subpacket scratch_io_packet = { 0, };
+	struct ev7_lf_subpackets *lf_subpackets = NULL;
+	int disposition = MCHK_DISPOSITION_UNKNOWN_ERROR;
+	char *saved_err_prefix = err_print_prefix;
+	char *error_type = NULL;
+
+	/*
+	 * Sync the processor
+	 */
+	mb();
+	draina();
+
+	switch(vector) {
+	case SCB_Q_SYSEVENT:
+		process_frame = marvel_process_680_frame;
+		error_type = "System Event";
+		break;
+
+	case SCB_Q_SYSMCHK:
+		process_frame = marvel_process_logout_frame;
+		error_type = "System Uncorrectable Error";
+		break;
+
+	case SCB_Q_SYSERR:
+		process_frame = marvel_process_logout_frame;
+		error_type = "System Correctable Error";
+		break;
+
+	default:
+		/* Don't know it - pass it up.  */
+		ev7_machine_check(vector, la_ptr);
+		return;
+	}	
+
+	/*
+	 * A system event or error has occurred, handle it here.
+	 *
+	 * Any errors in the logout frame have already been cleared by the
+	 * PALcode, so just parse it.
+	 */
+	err_print_prefix = KERN_CRIT;
+
+	/* 
+	 * Parse the logout frame without printing first. If the only error(s)
+	 * found are classified as "dismissable", then just dismiss them and
+	 * don't print any message
+	 */
+	lf_subpackets = 
+		ev7_collect_logout_frame_subpackets(el_ptr,
+						    &subpacket_collection);
+	if (process_frame && lf_subpackets && lf_subpackets->logout) {
+		/*
+		 * We might not have the correct (or any) I/O subpacket.
+		 * [ See marvel_process_io_error() for explanation. ]
+		 * If we don't have one, point the io subpacket in
+		 * lf_subpackets at scratch_io_packet so that 
+		 * marvel_find_io7_with_error() will have someplace to
+		 * store the info.
+		 */
+		if (!lf_subpackets->io)
+			lf_subpackets->io = &scratch_io_packet;
+
+		/*
+		 * Default io_pid to the processor reporting the error
+		 * [this will get changed in marvel_find_io7_with_error()
+		 * if a different one is needed]
+		 */
+		lf_subpackets->io_pid = lf_subpackets->logout->whami;
+
+		/*
+		 * Evaluate the frames.
+		 */
+		disposition = process_frame(lf_subpackets, 0);
+	}
+	switch(disposition) {
+	case MCHK_DISPOSITION_DISMISS:
+		/* Nothing to do. */
+		break;
+
+	case MCHK_DISPOSITION_REPORT:
+		/* Recognized error, report it. */
+		printk("%s*%s (Vector 0x%x) reported on CPU %d\n",
+		       err_print_prefix, error_type,
+		       (unsigned int)vector, (int)smp_processor_id());
+		el_print_timestamp(&lf_subpackets->logout->timestamp);
+		process_frame(lf_subpackets, 1);
+		break;
+
+	default:
+		/* Unknown - dump the annotated subpackets. */
+		printk("%s*%s (Vector 0x%x) reported on CPU %d\n",
+		       err_print_prefix, error_type,
+		       (unsigned int)vector, (int)smp_processor_id());
+		el_process_subpacket(el_ptr);
+		break;
+
+	}
+
+	err_print_prefix = saved_err_prefix;
+
+        /* Release the logout frame.  */
+	wrmces(0x7);
+	mb();
+}
+
+void __init
+marvel_register_error_handlers(void)
+{
+	ev7_register_error_handlers();
+}
diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c
new file mode 100644
index 0000000000..0ffb2feea4
--- /dev/null
+++ b/arch/alpha/kernel/err_titan.c
@@ -0,0 +1,761 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/err_titan.c
+ *
+ *	Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation)
+ *
+ *	Error handling code supporting TITAN systems
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/core_titan.h>
+#include <asm/hwrpb.h>
+#include <asm/smp.h>
+#include <asm/err_common.h>
+#include <asm/err_ev6.h>
+#include <asm/irq_regs.h>
+
+#include "err_impl.h"
+#include "proto.h"
+
+
+static int
+titan_parse_c_misc(u64 c_misc, int print)
+{
+#ifdef CONFIG_VERBOSE_MCHECK
+	char *src;
+	int nxs = 0;
+#endif
+	int status = MCHK_DISPOSITION_REPORT;
+
+#define TITAN__CCHIP_MISC__NXM		(1UL << 28)
+#define TITAN__CCHIP_MISC__NXS__S	(29)
+#define TITAN__CCHIP_MISC__NXS__M	(0x7)
+
+	if (!(c_misc & TITAN__CCHIP_MISC__NXM))
+		return MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (!print)
+		return status;
+
+	nxs = EXTRACT(c_misc, TITAN__CCHIP_MISC__NXS);
+	switch(nxs) {
+	case 0:	/* CPU 0 */
+	case 1:	/* CPU 1 */
+	case 2:	/* CPU 2 */
+	case 3:	/* CPU 3 */
+		src = "CPU";
+		/* num is already the CPU number */
+		break;
+	case 4:	/* Pchip 0 */
+	case 5:	/* Pchip 1 */
+		src = "Pchip";
+		nxs -= 4;
+		break;
+	default:/* reserved */
+		src = "Unknown, NXS =";
+		/* leave num untouched */
+		break;
+	}
+
+	printk("%s    Non-existent memory access from: %s %d\n", 
+	       err_print_prefix, src, nxs);
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+	return status;
+}
+
+static int
+titan_parse_p_serror(int which, u64 serror, int print)
+{
+	int status = MCHK_DISPOSITION_REPORT;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	static const char * const serror_src[] = {
+		"GPCI", "APCI", "AGP HP", "AGP LP"
+	};
+	static const char * const serror_cmd[] = {
+		"DMA Read", "DMA RMW", "SGTE Read", "Reserved"
+	};
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+#define TITAN__PCHIP_SERROR__LOST_UECC	(1UL << 0)
+#define TITAN__PCHIP_SERROR__UECC	(1UL << 1)
+#define TITAN__PCHIP_SERROR__CRE	(1UL << 2)
+#define TITAN__PCHIP_SERROR__NXIO	(1UL << 3)
+#define TITAN__PCHIP_SERROR__LOST_CRE	(1UL << 4)
+#define TITAN__PCHIP_SERROR__ECCMASK	(TITAN__PCHIP_SERROR__UECC |	  \
+					 TITAN__PCHIP_SERROR__CRE)
+#define TITAN__PCHIP_SERROR__ERRMASK	(TITAN__PCHIP_SERROR__LOST_UECC | \
+					 TITAN__PCHIP_SERROR__UECC |	  \
+					 TITAN__PCHIP_SERROR__CRE |	  \
+					 TITAN__PCHIP_SERROR__NXIO |	  \
+					 TITAN__PCHIP_SERROR__LOST_CRE)
+#define TITAN__PCHIP_SERROR__SRC__S	(52)
+#define TITAN__PCHIP_SERROR__SRC__M	(0x3)
+#define TITAN__PCHIP_SERROR__CMD__S	(54)
+#define TITAN__PCHIP_SERROR__CMD__M	(0x3)
+#define TITAN__PCHIP_SERROR__SYN__S	(56)
+#define TITAN__PCHIP_SERROR__SYN__M	(0xff)
+#define TITAN__PCHIP_SERROR__ADDR__S	(15)
+#define TITAN__PCHIP_SERROR__ADDR__M	(0xffffffffUL)
+
+	if (!(serror & TITAN__PCHIP_SERROR__ERRMASK))
+		return MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (!print)
+		return status;
+
+	printk("%s  PChip %d SERROR: %016llx\n",
+	       err_print_prefix, which, serror);
+	if (serror & TITAN__PCHIP_SERROR__ECCMASK) {
+		printk("%s    %sorrectable ECC Error:\n"
+		       "      Source: %-6s  Command: %-8s  Syndrome: 0x%08x\n"
+		       "      Address: 0x%llx\n",
+		       err_print_prefix,
+		       (serror & TITAN__PCHIP_SERROR__UECC) ? "Unc" : "C",
+		       serror_src[EXTRACT(serror, TITAN__PCHIP_SERROR__SRC)],
+		       serror_cmd[EXTRACT(serror, TITAN__PCHIP_SERROR__CMD)],
+		       (unsigned)EXTRACT(serror, TITAN__PCHIP_SERROR__SYN),
+		       EXTRACT(serror, TITAN__PCHIP_SERROR__ADDR));
+	}
+	if (serror & TITAN__PCHIP_SERROR__NXIO)
+		printk("%s    Non Existent I/O Error\n", err_print_prefix);
+	if (serror & TITAN__PCHIP_SERROR__LOST_UECC)
+		printk("%s    Lost Uncorrectable ECC Error\n", 
+		       err_print_prefix);
+	if (serror & TITAN__PCHIP_SERROR__LOST_CRE)
+		printk("%s    Lost Correctable ECC Error\n", err_print_prefix);
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+	return status;
+}
+
+static int 
+titan_parse_p_perror(int which, int port, u64 perror, int print)
+{
+	int cmd;
+	unsigned long addr;
+	int status = MCHK_DISPOSITION_REPORT;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	static const char * const perror_cmd[] = {
+		"Interrupt Acknowledge", "Special Cycle",
+		"I/O Read",		"I/O Write",
+		"Reserved",		"Reserved",
+		"Memory Read",		"Memory Write",
+		"Reserved",		"Reserved",
+		"Configuration Read",	"Configuration Write",
+		"Memory Read Multiple",	"Dual Address Cycle",
+		"Memory Read Line",	"Memory Write and Invalidate"
+	};
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+#define TITAN__PCHIP_PERROR__LOST	(1UL << 0)
+#define TITAN__PCHIP_PERROR__SERR	(1UL << 1)
+#define TITAN__PCHIP_PERROR__PERR	(1UL << 2)
+#define TITAN__PCHIP_PERROR__DCRTO	(1UL << 3)
+#define TITAN__PCHIP_PERROR__SGE	(1UL << 4)
+#define TITAN__PCHIP_PERROR__APE	(1UL << 5)
+#define TITAN__PCHIP_PERROR__TA		(1UL << 6)
+#define TITAN__PCHIP_PERROR__DPE	(1UL << 7)
+#define TITAN__PCHIP_PERROR__NDS	(1UL << 8)
+#define TITAN__PCHIP_PERROR__IPTPR	(1UL << 9)
+#define TITAN__PCHIP_PERROR__IPTPW	(1UL << 10)
+#define TITAN__PCHIP_PERROR__ERRMASK	(TITAN__PCHIP_PERROR__LOST |	\
+					 TITAN__PCHIP_PERROR__SERR |	\
+					 TITAN__PCHIP_PERROR__PERR |	\
+					 TITAN__PCHIP_PERROR__DCRTO |	\
+					 TITAN__PCHIP_PERROR__SGE |	\
+					 TITAN__PCHIP_PERROR__APE |	\
+					 TITAN__PCHIP_PERROR__TA |	\
+					 TITAN__PCHIP_PERROR__DPE |	\
+					 TITAN__PCHIP_PERROR__NDS |	\
+					 TITAN__PCHIP_PERROR__IPTPR |	\
+					 TITAN__PCHIP_PERROR__IPTPW)
+#define TITAN__PCHIP_PERROR__DAC	(1UL << 47)
+#define TITAN__PCHIP_PERROR__MWIN	(1UL << 48)
+#define TITAN__PCHIP_PERROR__CMD__S	(52)
+#define TITAN__PCHIP_PERROR__CMD__M	(0x0f)
+#define TITAN__PCHIP_PERROR__ADDR__S	(14)
+#define TITAN__PCHIP_PERROR__ADDR__M	(0x1fffffffful)
+
+	if (!(perror & TITAN__PCHIP_PERROR__ERRMASK))
+		return MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+	cmd = EXTRACT(perror, TITAN__PCHIP_PERROR__CMD);
+	addr = EXTRACT(perror, TITAN__PCHIP_PERROR__ADDR) << 2;
+
+	/*
+	 * Initializing the BIOS on a video card on a bus without
+	 * a south bridge (subtractive decode agent) can result in 
+	 * master aborts as the BIOS probes the capabilities of the
+	 * card. XFree86 does such initialization. If the error
+	 * is a master abort (No DevSel as PCI Master) and the command
+	 * is an I/O read or write below the address where we start
+	 * assigning PCI I/O spaces (SRM uses 0x1000), then mark the
+	 * error as dismissable so starting XFree86 doesn't result
+	 * in a series of uncorrectable errors being reported. Also
+	 * dismiss master aborts to VGA frame buffer space
+	 * (0xA0000 - 0xC0000) and legacy BIOS space (0xC0000 - 0x100000)
+	 * for the same reason.
+	 *
+	 * Also mark the error dismissible if it looks like the right
+	 * error but only the Lost bit is set. Since the BIOS initialization
+	 * can cause multiple master aborts and the error interrupt can
+	 * be handled on a different CPU than the BIOS code is run on,
+	 * it is possible for a second master abort to occur between the
+	 * time the PALcode reads PERROR and the time it writes PERROR
+	 * to acknowledge the error. If this timing happens, a second
+	 * error will be signalled after the first, and if no additional
+	 * errors occur, will look like a Lost error with no additional 
+	 * errors on the same transaction as the previous error.
+	 */
+	if (((perror & TITAN__PCHIP_PERROR__NDS) || 
+	     ((perror & TITAN__PCHIP_PERROR__ERRMASK) == 
+	      TITAN__PCHIP_PERROR__LOST)) &&
+	    ((((cmd & 0xE) == 2) && (addr < 0x1000)) ||
+	     (((cmd & 0xE) == 6) && (addr >= 0xA0000) && (addr < 0x100000)))) {
+		status = MCHK_DISPOSITION_DISMISS;
+	}
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (!print) 
+		return status;
+
+	printk("%s  PChip %d %cPERROR: %016llx\n",
+	       err_print_prefix, which, 
+	       port ? 'A' : 'G', perror);
+	if (perror & TITAN__PCHIP_PERROR__IPTPW)
+		printk("%s    Invalid Peer-to-Peer Write\n", err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__IPTPR)
+		printk("%s    Invalid Peer-to-Peer Read\n", err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__NDS)
+		printk("%s    No DEVSEL as PCI Master [Master Abort]\n",
+		       err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__DPE)
+		printk("%s    Data Parity Error\n", err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__TA)
+		printk("%s    Target Abort\n", err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__APE)
+		printk("%s    Address Parity Error\n", err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__SGE)
+		printk("%s    Scatter-Gather Error, Invalid PTE\n", 
+		       err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__DCRTO)
+		printk("%s    Delayed-Completion Retry Timeout\n", 
+		       err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__PERR)
+		printk("%s    PERR Asserted\n", err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__SERR)
+		printk("%s    SERR Asserted\n", err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__LOST)
+		printk("%s    Lost Error\n", err_print_prefix);
+	printk("%s      Command: 0x%x - %s\n"
+		 "      Address: 0x%lx\n",
+	       err_print_prefix,
+	       cmd, perror_cmd[cmd],
+	       addr);
+	if (perror & TITAN__PCHIP_PERROR__DAC)
+		printk("%s      Dual Address Cycle\n", err_print_prefix);
+	if (perror & TITAN__PCHIP_PERROR__MWIN)
+		printk("%s      Hit in Monster Window\n", err_print_prefix);
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+	return status;
+}
+
+static int
+titan_parse_p_agperror(int which, u64 agperror, int print)
+{
+	int status = MCHK_DISPOSITION_REPORT;
+#ifdef CONFIG_VERBOSE_MCHECK
+	int cmd, len;
+	unsigned long addr;
+
+	static const char * const agperror_cmd[] = {
+		"Read (low-priority)",	"Read (high-priority)",
+		"Write (low-priority)",	"Write (high-priority)",
+		"Reserved",		"Reserved",
+		"Flush",		"Fence"
+	};
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+#define TITAN__PCHIP_AGPERROR__LOST	(1UL << 0)
+#define TITAN__PCHIP_AGPERROR__LPQFULL	(1UL << 1)
+#define TITAN__PCHIP_AGPERROR__HPQFULL	(1UL << 2)
+#define TITAN__PCHIP_AGPERROR__RESCMD	(1UL << 3)
+#define TITAN__PCHIP_AGPERROR__IPTE	(1UL << 4)
+#define TITAN__PCHIP_AGPERROR__PTP	(1UL << 5)
+#define TITAN__PCHIP_AGPERROR__NOWINDOW	(1UL << 6)
+#define TITAN__PCHIP_AGPERROR__ERRMASK	(TITAN__PCHIP_AGPERROR__LOST |    \
+					 TITAN__PCHIP_AGPERROR__LPQFULL | \
+					 TITAN__PCHIP_AGPERROR__HPQFULL | \
+					 TITAN__PCHIP_AGPERROR__RESCMD |  \
+					 TITAN__PCHIP_AGPERROR__IPTE |    \
+					 TITAN__PCHIP_AGPERROR__PTP |     \
+					 TITAN__PCHIP_AGPERROR__NOWINDOW)
+#define TITAN__PCHIP_AGPERROR__DAC	(1UL << 48)
+#define TITAN__PCHIP_AGPERROR__MWIN	(1UL << 49)
+#define TITAN__PCHIP_AGPERROR__FENCE	(1UL << 59)
+#define TITAN__PCHIP_AGPERROR__CMD__S	(50)
+#define TITAN__PCHIP_AGPERROR__CMD__M	(0x07)
+#define TITAN__PCHIP_AGPERROR__ADDR__S	(15)
+#define TITAN__PCHIP_AGPERROR__ADDR__M  (0xffffffffUL)
+#define TITAN__PCHIP_AGPERROR__LEN__S	(53)
+#define TITAN__PCHIP_AGPERROR__LEN__M	(0x3f)
+
+	if (!(agperror & TITAN__PCHIP_AGPERROR__ERRMASK))
+		return MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (!print)
+		return status;
+
+	cmd = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__CMD);
+	addr = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__ADDR) << 3;
+	len = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__LEN);
+
+	printk("%s  PChip %d AGPERROR: %016llx\n", err_print_prefix,
+	       which, agperror);
+	if (agperror & TITAN__PCHIP_AGPERROR__NOWINDOW)
+		printk("%s    No Window\n", err_print_prefix);
+	if (agperror & TITAN__PCHIP_AGPERROR__PTP)
+		printk("%s    Peer-to-Peer set\n", err_print_prefix);
+	if (agperror & TITAN__PCHIP_AGPERROR__IPTE)
+		printk("%s    Invalid PTE\n", err_print_prefix);
+	if (agperror & TITAN__PCHIP_AGPERROR__RESCMD)
+		printk("%s    Reserved Command\n", err_print_prefix);
+	if (agperror & TITAN__PCHIP_AGPERROR__HPQFULL)
+		printk("%s    HP Transaction Received while Queue Full\n", 
+		       err_print_prefix);
+	if (agperror & TITAN__PCHIP_AGPERROR__LPQFULL)
+		printk("%s    LP Transaction Received while Queue Full\n", 
+		       err_print_prefix);
+	if (agperror & TITAN__PCHIP_AGPERROR__LOST)
+		printk("%s    Lost Error\n", err_print_prefix);
+	printk("%s      Command: 0x%x - %s, %d Quadwords%s\n"
+		 "      Address: 0x%lx\n",
+	       err_print_prefix, cmd, agperror_cmd[cmd], len,
+	       (agperror & TITAN__PCHIP_AGPERROR__FENCE) ? ", FENCE" : "",
+	       addr);
+	if (agperror & TITAN__PCHIP_AGPERROR__DAC)
+		printk("%s      Dual Address Cycle\n", err_print_prefix);
+	if (agperror & TITAN__PCHIP_AGPERROR__MWIN)
+		printk("%s      Hit in Monster Window\n", err_print_prefix);
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+	return status;
+}	
+
+static int
+titan_parse_p_chip(int which, u64 serror, u64 gperror, 
+		   u64 aperror, u64 agperror, int print)
+{
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+	status |= titan_parse_p_serror(which, serror, print);
+	status |= titan_parse_p_perror(which, 0, gperror, print);
+	status |= titan_parse_p_perror(which, 1, aperror, print);
+	status |= titan_parse_p_agperror(which, agperror, print);
+	return status;
+}
+
+int
+titan_process_logout_frame(struct el_common *mchk_header, int print)
+{
+	struct el_TITAN_sysdata_mcheck *tmchk =
+		(struct el_TITAN_sysdata_mcheck *)
+		((unsigned long)mchk_header + mchk_header->sys_offset);
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+	status |= titan_parse_c_misc(tmchk->c_misc, print);
+	status |= titan_parse_p_chip(0, tmchk->p0_serror, tmchk->p0_gperror,
+				     tmchk->p0_aperror, tmchk->p0_agperror, 
+				     print);
+	status |= titan_parse_p_chip(1, tmchk->p1_serror, tmchk->p1_gperror,
+				     tmchk->p1_aperror, tmchk->p1_agperror, 
+				     print);
+
+	return status;
+}
+
+void
+titan_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	struct el_common *mchk_header = (struct el_common *)la_ptr;
+	struct el_TITAN_sysdata_mcheck *tmchk =
+		(struct el_TITAN_sysdata_mcheck *)
+		((unsigned long)mchk_header + mchk_header->sys_offset);
+	u64 irqmask;
+
+	/*
+	 * Mask of Titan interrupt sources which are reported as machine checks
+	 *
+	 * 63 - CChip Error
+	 * 62 - PChip 0 H_Error
+	 * 61 - PChip 1 H_Error
+	 * 60 - PChip 0 C_Error
+	 * 59 - PChip 1 C_Error
+	 */
+#define TITAN_MCHECK_INTERRUPT_MASK	0xF800000000000000UL
+
+	/*
+	 * Sync the processor
+	 */
+	mb();
+	draina();
+	
+	/*
+	 * Only handle system errors here 
+	 */
+	if ((vector != SCB_Q_SYSMCHK) && (vector != SCB_Q_SYSERR)) {
+		ev6_machine_check(vector, la_ptr);
+		return;
+	}
+
+	/* 
+	 * It's a system error, handle it here
+	 *
+	 * The PALcode has already cleared the error, so just parse it
+	 */
+	
+	/* 
+	 * Parse the logout frame without printing first. If the only error(s)
+	 * found are classified as "dismissable", then just dismiss them and
+	 * don't print any message
+	 */
+	if (titan_process_logout_frame(mchk_header, 0) != 
+	    MCHK_DISPOSITION_DISMISS) {
+		char *saved_err_prefix = err_print_prefix;
+		err_print_prefix = KERN_CRIT;
+
+		/*
+		 * Either a nondismissable error was detected or no
+		 * recognized error was detected  in the logout frame 
+		 * -- report the error in either case
+		 */
+		printk("%s"
+		       "*System %s Error (Vector 0x%x) reported on CPU %d:\n", 
+		       err_print_prefix,
+		       (vector == SCB_Q_SYSERR)?"Correctable":"Uncorrectable",
+		       (unsigned int)vector, (int)smp_processor_id());
+		
+#ifdef CONFIG_VERBOSE_MCHECK
+		titan_process_logout_frame(mchk_header, alpha_verbose_mcheck);
+		if (alpha_verbose_mcheck)
+			dik_show_regs(get_irq_regs(), NULL);
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+		err_print_prefix = saved_err_prefix;
+
+		/*
+		 * Convert any pending interrupts which report as system
+		 * machine checks to interrupts
+		 */
+		irqmask = tmchk->c_dirx & TITAN_MCHECK_INTERRUPT_MASK;
+		titan_dispatch_irqs(irqmask);
+	}	
+
+
+	/* 
+	 * Release the logout frame 
+	 */
+	wrmces(0x7);
+	mb();
+}
+
+/*
+ * Subpacket Annotations
+ */
+static char *el_titan_pchip0_extended_annotation[] = {
+	"Subpacket Header", 	"P0_SCTL",	"P0_SERREN",
+	"P0_APCTL",		"P0_APERREN",	"P0_AGPERREN",
+	"P0_ASPRST",		"P0_AWSBA0",	"P0_AWSBA1",
+	"P0_AWSBA2",		"P0_AWSBA3",	"P0_AWSM0",
+	"P0_AWSM1",		"P0_AWSM2",	"P0_AWSM3",
+	"P0_ATBA0",		"P0_ATBA1",	"P0_ATBA2",
+	"P0_ATBA3",		"P0_GPCTL",	"P0_GPERREN",
+	"P0_GSPRST",		"P0_GWSBA0",	"P0_GWSBA1",
+	"P0_GWSBA2",		"P0_GWSBA3",	"P0_GWSM0",
+	"P0_GWSM1",		"P0_GWSM2",	"P0_GWSM3",
+	"P0_GTBA0",		"P0_GTBA1",	"P0_GTBA2",
+	"P0_GTBA3",		NULL 
+};
+static char *el_titan_pchip1_extended_annotation[] = {
+	"Subpacket Header", 	"P1_SCTL",	"P1_SERREN",
+	"P1_APCTL",		"P1_APERREN",	"P1_AGPERREN",
+	"P1_ASPRST",		"P1_AWSBA0",	"P1_AWSBA1",
+	"P1_AWSBA2",		"P1_AWSBA3",	"P1_AWSM0",
+	"P1_AWSM1",		"P1_AWSM2",	"P1_AWSM3",
+	"P1_ATBA0",		"P1_ATBA1",	"P1_ATBA2",
+	"P1_ATBA3",		"P1_GPCTL",	"P1_GPERREN",
+	"P1_GSPRST",		"P1_GWSBA0",	"P1_GWSBA1",
+	"P1_GWSBA2",		"P1_GWSBA3",	"P1_GWSM0",
+	"P1_GWSM1",		"P1_GWSM2",	"P1_GWSM3",
+	"P1_GTBA0",		"P1_GTBA1",	"P1_GTBA2",
+	"P1_GTBA3",		NULL 
+};
+static char *el_titan_memory_extended_annotation[] = {
+	"Subpacket Header", 	"AAR0",		"AAR1",
+	"AAR2",			"AAR3",		"P0_SCTL",
+	"P0_GPCTL",		"P0_APCTL",	"P1_SCTL",
+	"P1_GPCTL",		"P1_SCTL",	NULL 
+};
+
+static struct el_subpacket_annotation el_titan_annotations[] = {
+	SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY,
+			     EL_TYPE__REGATTA__TITAN_PCHIP0_EXTENDED,
+			     1,
+			     "Titan PChip 0 Extended Frame",
+			     el_titan_pchip0_extended_annotation),
+	SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY,
+			     EL_TYPE__REGATTA__TITAN_PCHIP1_EXTENDED,
+			     1,
+			     "Titan PChip 1 Extended Frame",
+			     el_titan_pchip1_extended_annotation),
+	SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY,
+			     EL_TYPE__REGATTA__TITAN_MEMORY_EXTENDED,
+			     1,
+			     "Titan Memory Extended Frame",
+			     el_titan_memory_extended_annotation),
+	SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY,
+			     EL_TYPE__TERMINATION__TERMINATION,
+			     1,
+			     "Termination Subpacket",
+			     NULL)
+};
+
+static struct el_subpacket *
+el_process_regatta_subpacket(struct el_subpacket *header)
+{
+	if (header->class != EL_CLASS__REGATTA_FAMILY) {
+		printk("%s  ** Unexpected header CLASS %d TYPE %d, aborting\n",
+		       err_print_prefix,
+		       header->class, header->type);
+		return NULL;
+	}
+
+	switch(header->type) {
+	case EL_TYPE__REGATTA__PROCESSOR_ERROR_FRAME:
+	case EL_TYPE__REGATTA__SYSTEM_ERROR_FRAME:
+	case EL_TYPE__REGATTA__ENVIRONMENTAL_FRAME:
+	case EL_TYPE__REGATTA__PROCESSOR_DBL_ERROR_HALT:
+	case EL_TYPE__REGATTA__SYSTEM_DBL_ERROR_HALT:
+		printk("%s  ** Occurred on CPU %d:\n", 
+		       err_print_prefix,
+		       (int)header->by_type.regatta_frame.cpuid);
+		privateer_process_logout_frame((struct el_common *)
+			header->by_type.regatta_frame.data_start, 1);
+		break;
+	default:
+		printk("%s  ** REGATTA TYPE %d SUBPACKET\n", 
+		       err_print_prefix, header->type);
+		el_annotate_subpacket(header);
+		break;
+	}
+
+
+	return (struct el_subpacket *)((unsigned long)header + header->length);
+} 
+
+static struct el_subpacket_handler titan_subpacket_handler = 
+	SUBPACKET_HANDLER_INIT(EL_CLASS__REGATTA_FAMILY, 
+			       el_process_regatta_subpacket);
+
+void __init
+titan_register_error_handlers(void)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE (el_titan_annotations); i++)
+		cdl_register_subpacket_annotation(&el_titan_annotations[i]);
+
+	cdl_register_subpacket_handler(&titan_subpacket_handler);
+
+	ev6_register_error_handlers();
+}
+
+
+/*
+ * Privateer
+ */
+
+static int
+privateer_process_680_frame(struct el_common *mchk_header, int print)
+{
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+#ifdef CONFIG_VERBOSE_MCHECK
+	struct el_PRIVATEER_envdata_mcheck *emchk =
+		(struct el_PRIVATEER_envdata_mcheck *)
+		((unsigned long)mchk_header + mchk_header->sys_offset);
+
+	/* TODO - categorize errors, for now, no error */
+
+	if (!print)
+		return status;
+
+	/* TODO - decode instead of just dumping... */
+	printk("%s  Summary Flags:         %016llx\n"
+ 	         "  CChip DIRx:            %016llx\n"
+		 "  System Management IR:  %016llx\n"
+		 "  CPU IR:                %016llx\n"
+		 "  Power Supply IR:       %016llx\n"
+		 "  LM78 Fault Status:     %016llx\n"
+		 "  System Doors:          %016llx\n"
+		 "  Temperature Warning:   %016llx\n"
+		 "  Fan Control:           %016llx\n"
+		 "  Fatal Power Down Code: %016llx\n",
+	       err_print_prefix,
+	       emchk->summary,
+	       emchk->c_dirx,
+	       emchk->smir,
+	       emchk->cpuir,
+	       emchk->psir,
+	       emchk->fault,
+	       emchk->sys_doors,
+	       emchk->temp_warn,
+	       emchk->fan_ctrl,
+	       emchk->code);
+#endif /* CONFIG_VERBOSE_MCHECK */
+
+	return status;
+}
+
+int
+privateer_process_logout_frame(struct el_common *mchk_header, int print)
+{
+	struct el_common_EV6_mcheck *ev6mchk = 
+		(struct el_common_EV6_mcheck *)mchk_header;
+	int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
+
+	/*
+	 * Machine check codes
+	 */
+#define PRIVATEER_MCHK__CORR_ECC		0x86	/* 630 */
+#define PRIVATEER_MCHK__DC_TAG_PERR		0x9E	/* 630 */
+#define PRIVATEER_MCHK__PAL_BUGCHECK		0x8E	/* 670 */
+#define PRIVATEER_MCHK__OS_BUGCHECK		0x90	/* 670 */
+#define PRIVATEER_MCHK__PROC_HRD_ERR		0x98	/* 670 */
+#define PRIVATEER_MCHK__ISTREAM_CMOV_PRX	0xA0	/* 670 */
+#define PRIVATEER_MCHK__ISTREAM_CMOV_FLT	0xA2	/* 670 */
+#define PRIVATEER_MCHK__SYS_HRD_ERR		0x202	/* 660 */
+#define PRIVATEER_MCHK__SYS_CORR_ERR		0x204	/* 620 */
+#define PRIVATEER_MCHK__SYS_ENVIRON		0x206	/* 680 */
+
+	switch(ev6mchk->MCHK_Code) {
+	/*
+	 * Vector 630 - Processor, Correctable
+	 */
+	case PRIVATEER_MCHK__CORR_ECC:
+	case PRIVATEER_MCHK__DC_TAG_PERR:
+		/*
+		 * Fall through to vector 670 for processing...
+		 */
+	/*
+	 * Vector 670 - Processor, Uncorrectable
+	 */
+	case PRIVATEER_MCHK__PAL_BUGCHECK:
+	case PRIVATEER_MCHK__OS_BUGCHECK:
+	case PRIVATEER_MCHK__PROC_HRD_ERR:
+	case PRIVATEER_MCHK__ISTREAM_CMOV_PRX:
+	case PRIVATEER_MCHK__ISTREAM_CMOV_FLT:
+		status |= ev6_process_logout_frame(mchk_header, print);
+		break;
+
+	/*
+	 * Vector 620 - System, Correctable
+	 */
+	case PRIVATEER_MCHK__SYS_CORR_ERR:
+		/*
+		 * Fall through to vector 660 for processing...
+		 */
+	/*
+	 * Vector 660 - System, Uncorrectable
+	 */
+	case PRIVATEER_MCHK__SYS_HRD_ERR:
+		status |= titan_process_logout_frame(mchk_header, print);
+		break;
+
+	/* 
+	 * Vector 680 - System, Environmental
+	 */
+	case PRIVATEER_MCHK__SYS_ENVIRON:	/* System, Environmental */
+		status |= privateer_process_680_frame(mchk_header, print);
+		break;
+
+	/* 
+	 * Unknown
+	 */
+	default:
+		status |= MCHK_DISPOSITION_REPORT;
+		if (print) {
+			printk("%s** Unknown Error, frame follows\n", 
+			       err_print_prefix);
+			mchk_dump_logout_frame(mchk_header);
+		}
+
+	}
+
+	return status;
+}
+
+void
+privateer_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	struct el_common *mchk_header = (struct el_common *)la_ptr;
+	struct el_TITAN_sysdata_mcheck *tmchk =
+		(struct el_TITAN_sysdata_mcheck *)
+		(la_ptr + mchk_header->sys_offset);
+	u64 irqmask;
+	char *saved_err_prefix = err_print_prefix;
+
+#define PRIVATEER_680_INTERRUPT_MASK		(0xE00UL)
+#define PRIVATEER_HOTPLUG_INTERRUPT_MASK	(0xE00UL)
+
+	/*
+	 * Sync the processor.
+	 */
+	mb();
+	draina();
+
+	/* 
+	 * Only handle system events here.
+	 */
+	if (vector != SCB_Q_SYSEVENT) 
+		return titan_machine_check(vector, la_ptr);
+
+	/*
+	 * Report the event - System Events should be reported even if no
+	 * error is indicated since the event could indicate the return
+	 * to normal status.
+	 */
+	err_print_prefix = KERN_CRIT;
+	printk("%s*System Event (Vector 0x%x) reported on CPU %d:\n", 
+	       err_print_prefix,
+	       (unsigned int)vector, (int)smp_processor_id());
+	privateer_process_680_frame(mchk_header, 1);
+	err_print_prefix = saved_err_prefix;
+	
+	/* 
+	 * Convert any pending interrupts which report as 680 machine
+	 * checks to interrupts.
+	 */
+	irqmask = tmchk->c_dirx & PRIVATEER_680_INTERRUPT_MASK;
+
+	/*
+	 * Dispatch the interrupt(s).
+	 */
+	titan_dispatch_irqs(irqmask);
+
+	/* 
+	 * Release the logout frame.
+	 */
+	wrmces(0x7);
+	mb();
+}
diff --git a/arch/alpha/kernel/es1888.c b/arch/alpha/kernel/es1888.c
new file mode 100644
index 0000000000..297476bb08
--- /dev/null
+++ b/arch/alpha/kernel/es1888.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/es1888.c
+ *
+ * Init the built-in ES1888 sound chip (SB16 compatible)
+ */
+
+#include <linux/init.h>
+#include <asm/io.h>
+#include "proto.h"
+
+void __init
+es1888_init(void)
+{
+	/* Sequence of IO reads to init the audio controller */
+	inb(0x0229);
+	inb(0x0229);
+	inb(0x0229);
+	inb(0x022b);
+	inb(0x0229);
+	inb(0x022b);
+	inb(0x0229);
+	inb(0x0229);
+	inb(0x022b);
+	inb(0x0229);
+	inb(0x0220); /* This sets the base address to 0x220 */
+
+	/* Sequence to set DMA channels */
+	outb(0x01, 0x0226);		/* reset */
+	inb(0x0226);			/* pause */
+	outb(0x00, 0x0226);		/* release reset */
+	while (!(inb(0x022e) & 0x80))	/* wait for bit 7 to assert*/
+		continue;
+	inb(0x022a);			/* pause */
+	outb(0xc6, 0x022c);		/* enable extended mode */
+	inb(0x022a);			/* pause, also forces the write */
+	while (inb(0x022c) & 0x80)	/* wait for bit 7 to deassert */
+		continue;
+	outb(0xb1, 0x022c);		/* setup for write to Interrupt CR */
+	while (inb(0x022c) & 0x80)	/* wait for bit 7 to deassert */
+		continue;
+	outb(0x14, 0x022c);		/* set IRQ 5 */
+	while (inb(0x022c) & 0x80)	/* wait for bit 7 to deassert */
+		continue;
+	outb(0xb2, 0x022c);		/* setup for write to DMA CR */
+	while (inb(0x022c) & 0x80)	/* wait for bit 7 to deassert */
+		continue;
+	outb(0x18, 0x022c);		/* set DMA channel 1 */
+	inb(0x022c);			/* force the write */
+}
diff --git a/arch/alpha/kernel/gct.c b/arch/alpha/kernel/gct.c
new file mode 100644
index 0000000000..8ac0088dca
--- /dev/null
+++ b/arch/alpha/kernel/gct.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/gct.c
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+
+#include <asm/hwrpb.h>
+#include <asm/gct.h>
+
+int
+gct6_find_nodes(gct6_node *node, gct6_search_struct *search)
+{
+	gct6_search_struct *wanted;
+	int status = 0;
+
+	/* First check the magic number.  */
+	if (node->magic != GCT_NODE_MAGIC) {
+		printk(KERN_ERR "GCT Node MAGIC incorrect - GCT invalid\n");
+		return -EINVAL;
+	}
+
+	/* Check against the search struct.  */
+	for (wanted = search; 
+	     wanted && (wanted->type | wanted->subtype); 
+	     wanted++) {
+		if (node->type != wanted->type)
+			continue;
+		if (node->subtype != wanted->subtype)
+			continue;
+
+		/* Found it -- call out.  */
+		if (wanted->callout)
+			wanted->callout(node);
+	}
+
+	/* Now walk the tree, siblings first.  */
+	if (node->next) 
+		status |= gct6_find_nodes(GCT_NODE_PTR(node->next), search);
+
+	/* Then the children.  */
+	if (node->child) 
+		status |= gct6_find_nodes(GCT_NODE_PTR(node->child), search);
+
+	return status;
+}
diff --git a/arch/alpha/kernel/head.S b/arch/alpha/kernel/head.S
new file mode 100644
index 0000000000..bb48a8ae4e
--- /dev/null
+++ b/arch/alpha/kernel/head.S
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/kernel/head.S
+ *
+ * initial boot stuff.. At this point, the bootloader has already
+ * switched into OSF/1 PAL-code, and loaded us at the correct address
+ * (START_ADDR).  So there isn't much left for us to do: just set up
+ * the kernel global pointer and jump to the kernel entry-point.
+ */
+
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/pal.h>
+#include <asm/setup.h>
+
+__HEAD
+.globl _stext
+	.set noreorder
+	.globl	__start
+	.ent	__start
+_stext:
+__start:
+	.prologue 0
+	br	$27,1f
+1:	ldgp	$29,0($27)
+	/* We need to get current_task_info loaded up...  */
+	lda	$8,init_thread_union
+	/* ... and find our stack ... */
+	lda	$30,0x4000 - SIZEOF_PT_REGS($8)
+	/* ... and then we can start the kernel.  */
+	jsr	$26,start_kernel
+	call_pal PAL_halt
+	.end __start
+
+#ifdef CONFIG_SMP
+	.align 3
+	.globl	__smp_callin
+	.ent	__smp_callin
+	/* On entry here from SRM console, the HWPCB of the per-cpu
+	   slot for this processor has been loaded.  We've arranged
+	   for the UNIQUE value for this process to contain the PCBB
+	   of the target idle task.  */
+__smp_callin:
+	.prologue 1
+	ldgp	$29,0($27)	# First order of business, load the GP.
+
+	call_pal PAL_rduniq	# Grab the target PCBB.
+	mov	$0,$16		# Install it.
+	call_pal PAL_swpctx
+
+	lda	$8,0x3fff	# Find "current".
+	bic	$30,$8,$8
+	
+	jsr	$26,smp_callin
+	call_pal PAL_halt
+	.end __smp_callin
+#endif /* CONFIG_SMP */
+
+	#
+	# The following two functions are needed for supporting SRM PALcode
+	# on the PC164 (at least), since that PALcode manages the interrupt
+	# masking, and we cannot duplicate the effort without causing problems
+	#
+
+	.align 3
+	.globl	cserve_ena
+	.ent	cserve_ena
+cserve_ena:
+	.prologue 0
+	bis	$16,$16,$17
+	lda	$16,52($31)
+	call_pal PAL_cserve
+	ret	($26)
+	.end	cserve_ena
+
+	.align 3
+	.globl	cserve_dis
+	.ent	cserve_dis
+cserve_dis:
+	.prologue 0
+	bis	$16,$16,$17
+	lda	$16,53($31)
+	call_pal PAL_cserve
+	ret	($26)
+	.end	cserve_dis
+
+	#
+	# It is handy, on occasion, to make halt actually just loop. 
+	# Putting it here means we dont have to recompile the whole
+	# kernel.
+	#
+
+	.align 3
+	.globl	halt
+	.ent	halt
+halt:
+	.prologue 0
+	call_pal PAL_halt
+	.end	halt
diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c
new file mode 100644
index 0000000000..838586abb1
--- /dev/null
+++ b/arch/alpha/kernel/io.c
@@ -0,0 +1,677 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Alpha IO and memory functions.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/io.h>
+
+/* Out-of-line versions of the i/o routines that redirect into the 
+   platform-specific version.  Note that "platform-specific" may mean
+   "generic", which bumps through the machine vector.  */
+
+unsigned int
+ioread8(const void __iomem *addr)
+{
+	unsigned int ret;
+	mb();
+	ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr);
+	mb();
+	return ret;
+}
+
+unsigned int ioread16(const void __iomem *addr)
+{
+	unsigned int ret;
+	mb();
+	ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr);
+	mb();
+	return ret;
+}
+
+unsigned int ioread32(const void __iomem *addr)
+{
+	unsigned int ret;
+	mb();
+	ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr);
+	mb();
+	return ret;
+}
+
+void iowrite8(u8 b, void __iomem *addr)
+{
+	mb();
+	IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr);
+}
+
+void iowrite16(u16 b, void __iomem *addr)
+{
+	mb();
+	IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr);
+}
+
+void iowrite32(u32 b, void __iomem *addr)
+{
+	mb();
+	IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr);
+}
+
+EXPORT_SYMBOL(ioread8);
+EXPORT_SYMBOL(ioread16);
+EXPORT_SYMBOL(ioread32);
+EXPORT_SYMBOL(iowrite8);
+EXPORT_SYMBOL(iowrite16);
+EXPORT_SYMBOL(iowrite32);
+
+u8 inb(unsigned long port)
+{
+	return ioread8(ioport_map(port, 1));
+}
+
+u16 inw(unsigned long port)
+{
+	return ioread16(ioport_map(port, 2));
+}
+
+u32 inl(unsigned long port)
+{
+	return ioread32(ioport_map(port, 4));
+}
+
+void outb(u8 b, unsigned long port)
+{
+	iowrite8(b, ioport_map(port, 1));
+}
+
+void outw(u16 b, unsigned long port)
+{
+	iowrite16(b, ioport_map(port, 2));
+}
+
+void outl(u32 b, unsigned long port)
+{
+	iowrite32(b, ioport_map(port, 4));
+}
+
+EXPORT_SYMBOL(inb);
+EXPORT_SYMBOL(inw);
+EXPORT_SYMBOL(inl);
+EXPORT_SYMBOL(outb);
+EXPORT_SYMBOL(outw);
+EXPORT_SYMBOL(outl);
+
+u8 __raw_readb(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,readb)(addr);
+}
+
+u16 __raw_readw(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,readw)(addr);
+}
+
+u32 __raw_readl(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,readl)(addr);
+}
+
+u64 __raw_readq(const volatile void __iomem *addr)
+{
+	return IO_CONCAT(__IO_PREFIX,readq)(addr);
+}
+
+void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,writeb)(b, addr);
+}
+
+void __raw_writew(u16 b, volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,writew)(b, addr);
+}
+
+void __raw_writel(u32 b, volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,writel)(b, addr);
+}
+
+void __raw_writeq(u64 b, volatile void __iomem *addr)
+{
+	IO_CONCAT(__IO_PREFIX,writeq)(b, addr);
+}
+
+EXPORT_SYMBOL(__raw_readb); 
+EXPORT_SYMBOL(__raw_readw); 
+EXPORT_SYMBOL(__raw_readl); 
+EXPORT_SYMBOL(__raw_readq); 
+EXPORT_SYMBOL(__raw_writeb); 
+EXPORT_SYMBOL(__raw_writew); 
+EXPORT_SYMBOL(__raw_writel); 
+EXPORT_SYMBOL(__raw_writeq); 
+
+u8 readb(const volatile void __iomem *addr)
+{
+	u8 ret;
+	mb();
+	ret = __raw_readb(addr);
+	mb();
+	return ret;
+}
+
+u16 readw(const volatile void __iomem *addr)
+{
+	u16 ret;
+	mb();
+	ret = __raw_readw(addr);
+	mb();
+	return ret;
+}
+
+u32 readl(const volatile void __iomem *addr)
+{
+	u32 ret;
+	mb();
+	ret = __raw_readl(addr);
+	mb();
+	return ret;
+}
+
+u64 readq(const volatile void __iomem *addr)
+{
+	u64 ret;
+	mb();
+	ret = __raw_readq(addr);
+	mb();
+	return ret;
+}
+
+void writeb(u8 b, volatile void __iomem *addr)
+{
+	mb();
+	__raw_writeb(b, addr);
+}
+
+void writew(u16 b, volatile void __iomem *addr)
+{
+	mb();
+	__raw_writew(b, addr);
+}
+
+void writel(u32 b, volatile void __iomem *addr)
+{
+	mb();
+	__raw_writel(b, addr);
+}
+
+void writeq(u64 b, volatile void __iomem *addr)
+{
+	mb();
+	__raw_writeq(b, addr);
+}
+
+EXPORT_SYMBOL(readb);
+EXPORT_SYMBOL(readw);
+EXPORT_SYMBOL(readl);
+EXPORT_SYMBOL(readq);
+EXPORT_SYMBOL(writeb);
+EXPORT_SYMBOL(writew);
+EXPORT_SYMBOL(writel);
+EXPORT_SYMBOL(writeq);
+
+/*
+ * The _relaxed functions must be ordered w.r.t. each other, but they don't
+ * have to be ordered w.r.t. other memory accesses.
+ */
+u8 readb_relaxed(const volatile void __iomem *addr)
+{
+	mb();
+	return __raw_readb(addr);
+}
+
+u16 readw_relaxed(const volatile void __iomem *addr)
+{
+	mb();
+	return __raw_readw(addr);
+}
+
+u32 readl_relaxed(const volatile void __iomem *addr)
+{
+	mb();
+	return __raw_readl(addr);
+}
+
+u64 readq_relaxed(const volatile void __iomem *addr)
+{
+	mb();
+	return __raw_readq(addr);
+}
+
+EXPORT_SYMBOL(readb_relaxed);
+EXPORT_SYMBOL(readw_relaxed);
+EXPORT_SYMBOL(readl_relaxed);
+EXPORT_SYMBOL(readq_relaxed);
+
+/*
+ * Read COUNT 8-bit bytes from port PORT into memory starting at SRC.
+ */
+void ioread8_rep(const void __iomem *port, void *dst, unsigned long count)
+{
+	while ((unsigned long)dst & 0x3) {
+		if (!count)
+			return;
+		count--;
+		*(unsigned char *)dst = ioread8(port);
+		dst += 1;
+	}
+
+	while (count >= 4) {
+		unsigned int w;
+		count -= 4;
+		w = ioread8(port);
+		w |= ioread8(port) << 8;
+		w |= ioread8(port) << 16;
+		w |= ioread8(port) << 24;
+		*(unsigned int *)dst = w;
+		dst += 4;
+	}
+
+	while (count) {
+		--count;
+		*(unsigned char *)dst = ioread8(port);
+		dst += 1;
+	}
+}
+
+void insb(unsigned long port, void *dst, unsigned long count)
+{
+	ioread8_rep(ioport_map(port, 1), dst, count);
+}
+
+EXPORT_SYMBOL(ioread8_rep);
+EXPORT_SYMBOL(insb);
+
+/*
+ * Read COUNT 16-bit words from port PORT into memory starting at
+ * SRC.  SRC must be at least short aligned.  This is used by the
+ * IDE driver to read disk sectors.  Performance is important, but
+ * the interfaces seems to be slow: just using the inlined version
+ * of the inw() breaks things.
+ */
+void ioread16_rep(const void __iomem *port, void *dst, unsigned long count)
+{
+	if (unlikely((unsigned long)dst & 0x3)) {
+		if (!count)
+			return;
+		BUG_ON((unsigned long)dst & 0x1);
+		count--;
+		*(unsigned short *)dst = ioread16(port);
+		dst += 2;
+	}
+
+	while (count >= 2) {
+		unsigned int w;
+		count -= 2;
+		w = ioread16(port);
+		w |= ioread16(port) << 16;
+		*(unsigned int *)dst = w;
+		dst += 4;
+	}
+
+	if (count) {
+		*(unsigned short*)dst = ioread16(port);
+	}
+}
+
+void insw(unsigned long port, void *dst, unsigned long count)
+{
+	ioread16_rep(ioport_map(port, 2), dst, count);
+}
+
+EXPORT_SYMBOL(ioread16_rep);
+EXPORT_SYMBOL(insw);
+
+
+/*
+ * Read COUNT 32-bit words from port PORT into memory starting at
+ * SRC. Now works with any alignment in SRC. Performance is important,
+ * but the interfaces seems to be slow: just using the inlined version
+ * of the inl() breaks things.
+ */
+void ioread32_rep(const void __iomem *port, void *dst, unsigned long count)
+{
+	if (unlikely((unsigned long)dst & 0x3)) {
+		while (count--) {
+			struct S { int x __attribute__((packed)); };
+			((struct S *)dst)->x = ioread32(port);
+			dst += 4;
+		}
+	} else {
+		/* Buffer 32-bit aligned.  */
+		while (count--) {
+			*(unsigned int *)dst = ioread32(port);
+			dst += 4;
+		}
+	}
+}
+
+void insl(unsigned long port, void *dst, unsigned long count)
+{
+	ioread32_rep(ioport_map(port, 4), dst, count);
+}
+
+EXPORT_SYMBOL(ioread32_rep);
+EXPORT_SYMBOL(insl);
+
+
+/*
+ * Like insb but in the opposite direction.
+ * Don't worry as much about doing aligned memory transfers:
+ * doing byte reads the "slow" way isn't nearly as slow as
+ * doing byte writes the slow way (no r-m-w cycle).
+ */
+void iowrite8_rep(void __iomem *port, const void *xsrc, unsigned long count)
+{
+	const unsigned char *src = xsrc;
+	while (count--)
+		iowrite8(*src++, port);
+}
+
+void outsb(unsigned long port, const void *src, unsigned long count)
+{
+	iowrite8_rep(ioport_map(port, 1), src, count);
+}
+
+EXPORT_SYMBOL(iowrite8_rep);
+EXPORT_SYMBOL(outsb);
+
+
+/*
+ * Like insw but in the opposite direction.  This is used by the IDE
+ * driver to write disk sectors.  Performance is important, but the
+ * interfaces seems to be slow: just using the inlined version of the
+ * outw() breaks things.
+ */
+void iowrite16_rep(void __iomem *port, const void *src, unsigned long count)
+{
+	if (unlikely((unsigned long)src & 0x3)) {
+		if (!count)
+			return;
+		BUG_ON((unsigned long)src & 0x1);
+		iowrite16(*(unsigned short *)src, port);
+		src += 2;
+		--count;
+	}
+
+	while (count >= 2) {
+		unsigned int w;
+		count -= 2;
+		w = *(unsigned int *)src;
+		src += 4;
+		iowrite16(w >>  0, port);
+		iowrite16(w >> 16, port);
+	}
+
+	if (count) {
+		iowrite16(*(unsigned short *)src, port);
+	}
+}
+
+void outsw(unsigned long port, const void *src, unsigned long count)
+{
+	iowrite16_rep(ioport_map(port, 2), src, count);
+}
+
+EXPORT_SYMBOL(iowrite16_rep);
+EXPORT_SYMBOL(outsw);
+
+
+/*
+ * Like insl but in the opposite direction.  This is used by the IDE
+ * driver to write disk sectors.  Works with any alignment in SRC.
+ * Performance is important, but the interfaces seems to be slow:
+ * just using the inlined version of the outl() breaks things.
+ */
+void iowrite32_rep(void __iomem *port, const void *src, unsigned long count)
+{
+	if (unlikely((unsigned long)src & 0x3)) {
+		while (count--) {
+			struct S { int x __attribute__((packed)); };
+			iowrite32(((struct S *)src)->x, port);
+			src += 4;
+		}
+	} else {
+		/* Buffer 32-bit aligned.  */
+		while (count--) {
+			iowrite32(*(unsigned int *)src, port);
+			src += 4;
+		}
+	}
+}
+
+void outsl(unsigned long port, const void *src, unsigned long count)
+{
+	iowrite32_rep(ioport_map(port, 4), src, count);
+}
+
+EXPORT_SYMBOL(iowrite32_rep);
+EXPORT_SYMBOL(outsl);
+
+
+/*
+ * Copy data from IO memory space to "real" memory space.
+ * This needs to be optimized.
+ */
+void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
+{
+	/* Optimize co-aligned transfers.  Everything else gets handled
+	   a byte at a time. */
+
+	if (count >= 8 && ((u64)to & 7) == ((u64)from & 7)) {
+		count -= 8;
+		do {
+			*(u64 *)to = __raw_readq(from);
+			count -= 8;
+			to += 8;
+			from += 8;
+		} while (count >= 0);
+		count += 8;
+	}
+
+	if (count >= 4 && ((u64)to & 3) == ((u64)from & 3)) {
+		count -= 4;
+		do {
+			*(u32 *)to = __raw_readl(from);
+			count -= 4;
+			to += 4;
+			from += 4;
+		} while (count >= 0);
+		count += 4;
+	}
+
+	if (count >= 2 && ((u64)to & 1) == ((u64)from & 1)) {
+		count -= 2;
+		do {
+			*(u16 *)to = __raw_readw(from);
+			count -= 2;
+			to += 2;
+			from += 2;
+		} while (count >= 0);
+		count += 2;
+	}
+
+	while (count > 0) {
+		*(u8 *) to = __raw_readb(from);
+		count--;
+		to++;
+		from++;
+	}
+	mb();
+}
+
+EXPORT_SYMBOL(memcpy_fromio);
+
+
+/*
+ * Copy data from "real" memory space to IO memory space.
+ * This needs to be optimized.
+ */
+void memcpy_toio(volatile void __iomem *to, const void *from, long count)
+{
+	/* Optimize co-aligned transfers.  Everything else gets handled
+	   a byte at a time. */
+	/* FIXME -- align FROM.  */
+
+	if (count >= 8 && ((u64)to & 7) == ((u64)from & 7)) {
+		count -= 8;
+		do {
+			__raw_writeq(*(const u64 *)from, to);
+			count -= 8;
+			to += 8;
+			from += 8;
+		} while (count >= 0);
+		count += 8;
+	}
+
+	if (count >= 4 && ((u64)to & 3) == ((u64)from & 3)) {
+		count -= 4;
+		do {
+			__raw_writel(*(const u32 *)from, to);
+			count -= 4;
+			to += 4;
+			from += 4;
+		} while (count >= 0);
+		count += 4;
+	}
+
+	if (count >= 2 && ((u64)to & 1) == ((u64)from & 1)) {
+		count -= 2;
+		do {
+			__raw_writew(*(const u16 *)from, to);
+			count -= 2;
+			to += 2;
+			from += 2;
+		} while (count >= 0);
+		count += 2;
+	}
+
+	while (count > 0) {
+		__raw_writeb(*(const u8 *) from, to);
+		count--;
+		to++;
+		from++;
+	}
+	mb();
+}
+
+EXPORT_SYMBOL(memcpy_toio);
+
+
+/*
+ * "memset" on IO memory space.
+ */
+void _memset_c_io(volatile void __iomem *to, unsigned long c, long count)
+{
+	/* Handle any initial odd byte */
+	if (count > 0 && ((u64)to & 1)) {
+		__raw_writeb(c, to);
+		to++;
+		count--;
+	}
+
+	/* Handle any initial odd halfword */
+	if (count >= 2 && ((u64)to & 2)) {
+		__raw_writew(c, to);
+		to += 2;
+		count -= 2;
+	}
+
+	/* Handle any initial odd word */
+	if (count >= 4 && ((u64)to & 4)) {
+		__raw_writel(c, to);
+		to += 4;
+		count -= 4;
+	}
+
+	/* Handle all full-sized quadwords: we're aligned
+	   (or have a small count) */
+	count -= 8;
+	if (count >= 0) {
+		do {
+			__raw_writeq(c, to);
+			to += 8;
+			count -= 8;
+		} while (count >= 0);
+	}
+	count += 8;
+
+	/* The tail is word-aligned if we still have count >= 4 */
+	if (count >= 4) {
+		__raw_writel(c, to);
+		to += 4;
+		count -= 4;
+	}
+
+	/* The tail is half-word aligned if we have count >= 2 */
+	if (count >= 2) {
+		__raw_writew(c, to);
+		to += 2;
+		count -= 2;
+	}
+
+	/* And finally, one last byte.. */
+	if (count) {
+		__raw_writeb(c, to);
+	}
+	mb();
+}
+
+EXPORT_SYMBOL(_memset_c_io);
+
+/* A version of memcpy used by the vga console routines to move data around
+   arbitrarily between screen and main memory.  */
+
+void
+scr_memcpyw(u16 *d, const u16 *s, unsigned int count)
+{
+	const u16 __iomem *ios = (const u16 __iomem *) s;
+	u16 __iomem *iod = (u16 __iomem *) d;
+	int s_isio = __is_ioaddr(s);
+	int d_isio = __is_ioaddr(d);
+
+	if (s_isio) {
+		if (d_isio) {
+			/* FIXME: Should handle unaligned ops and
+			   operation widening.  */
+
+			count /= 2;
+			while (count--) {
+				u16 tmp = __raw_readw(ios++);
+				__raw_writew(tmp, iod++);
+			}
+		}
+		else
+			memcpy_fromio(d, ios, count);
+	} else {
+		if (d_isio)
+			memcpy_toio(iod, s, count);
+		else
+			memcpy(d, s, count);
+	}
+}
+
+EXPORT_SYMBOL(scr_memcpyw);
+
+void __iomem *ioport_map(unsigned long port, unsigned int size)
+{
+	return IO_CONCAT(__IO_PREFIX,ioportmap) (port);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+}
+
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
new file mode 100644
index 0000000000..f6d2946edb
--- /dev/null
+++ b/arch/alpha/kernel/irq.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/irq.c
+ *
+ *	Copyright (C) 1995 Linus Torvalds
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+#include <linux/random.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+
+#include <asm/io.h>
+#include <linux/uaccess.h>
+
+volatile unsigned long irq_err_count;
+DEFINE_PER_CPU(unsigned long, irq_pmi_count);
+
+void ack_bad_irq(unsigned int irq)
+{
+	irq_err_count++;
+	printk(KERN_CRIT "Unexpected IRQ trap at vector %u\n", irq);
+}
+
+#ifdef CONFIG_SMP 
+static char irq_user_affinity[NR_IRQS];
+
+int irq_select_affinity(unsigned int irq)
+{
+	struct irq_data *data = irq_get_irq_data(irq);
+	struct irq_chip *chip;
+	static int last_cpu;
+	int cpu = last_cpu + 1;
+
+	if (!data)
+		return 1;
+	chip = irq_data_get_irq_chip(data);
+
+	if (!chip->irq_set_affinity || irq_user_affinity[irq])
+		return 1;
+
+	while (!cpu_possible(cpu) ||
+	       !cpumask_test_cpu(cpu, irq_default_affinity))
+		cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
+	last_cpu = cpu;
+
+	cpumask_copy(irq_data_get_affinity_mask(data), cpumask_of(cpu));
+	chip->irq_set_affinity(data, cpumask_of(cpu), false);
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	int j;
+
+#ifdef CONFIG_SMP
+	seq_puts(p, "IPI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10lu ", cpu_data[j].ipi_count);
+	seq_putc(p, '\n');
+#endif
+	seq_puts(p, "PMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10lu ", per_cpu(irq_pmi_count, j));
+	seq_puts(p, "          Performance Monitoring\n");
+	seq_printf(p, "ERR: %10lu\n", irq_err_count);
+	return 0;
+}
+
+/*
+ * handle_irq handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+
+#define MAX_ILLEGAL_IRQS 16
+
+void
+handle_irq(int irq)
+{	
+	/* 
+	 * We ack quickly, we don't want the irq controller
+	 * thinking we're snobs just because some other CPU has
+	 * disabled global interrupts (we have already done the
+	 * INT_ACK cycles, it's too late to try to pretend to the
+	 * controller that we aren't taking the interrupt).
+	 *
+	 * 0 return value means that this irq is already being
+	 * handled by some other CPU. (or is disabled)
+	 */
+	static unsigned int illegal_count=0;
+	struct irq_desc *desc = irq_to_desc(irq);
+	
+	if (!desc || ((unsigned) irq > ACTUAL_NR_IRQS &&
+	    illegal_count < MAX_ILLEGAL_IRQS)) {
+		irq_err_count++;
+		illegal_count++;
+		printk(KERN_CRIT "device_interrupt: invalid interrupt %d\n",
+		       irq);
+		return;
+	}
+
+	irq_enter();
+	generic_handle_irq_desc(desc);
+	irq_exit();
+}
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
new file mode 100644
index 0000000000..d17e44c99d
--- /dev/null
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Alpha specific irq code.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+
+#include <asm/machvec.h>
+#include <asm/dma.h>
+#include <asm/perf_event.h>
+#include <asm/mce.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+
+/* Hack minimum IPL during interrupt processing for broken hardware.  */
+#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK
+int __min_ipl;
+EXPORT_SYMBOL(__min_ipl);
+#endif
+
+/*
+ * Performance counter hook.  A module can override this to
+ * do something useful.
+ */
+static void
+dummy_perf(unsigned long vector, struct pt_regs *regs)
+{
+	irq_err_count++;
+	printk(KERN_CRIT "Performance counter interrupt!\n");
+}
+
+void (*perf_irq)(unsigned long, struct pt_regs *) = dummy_perf;
+EXPORT_SYMBOL(perf_irq);
+
+/*
+ * The main interrupt entry point.
+ */
+
+asmlinkage void 
+do_entInt(unsigned long type, unsigned long vector,
+	  unsigned long la_ptr, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	/*
+	 * Disable interrupts during IRQ handling.
+	 * Note that there is no matching local_irq_enable() due to
+	 * severe problems with RTI at IPL0 and some MILO PALcode
+	 * (namely LX164).
+	 */
+	local_irq_disable();
+	switch (type) {
+	case 0:
+#ifdef CONFIG_SMP
+		handle_ipi(regs);
+		return;
+#else
+		irq_err_count++;
+		printk(KERN_CRIT "Interprocessor interrupt? "
+		       "You must be kidding!\n");
+#endif
+		break;
+	case 1:
+		old_regs = set_irq_regs(regs);
+		handle_irq(RTC_IRQ);
+		set_irq_regs(old_regs);
+		return;
+	case 2:
+		old_regs = set_irq_regs(regs);
+		alpha_mv.machine_check(vector, la_ptr);
+		set_irq_regs(old_regs);
+		return;
+	case 3:
+		old_regs = set_irq_regs(regs);
+		alpha_mv.device_interrupt(vector);
+		set_irq_regs(old_regs);
+		return;
+	case 4:
+		perf_irq(la_ptr, regs);
+		return;
+	default:
+		printk(KERN_CRIT "Hardware intr %ld %lx? Huh?\n",
+		       type, vector);
+	}
+	printk(KERN_CRIT "PC = %016lx PS=%04lx\n", regs->pc, regs->ps);
+}
+
+void __init
+common_init_isa_dma(void)
+{
+	outb(0, DMA1_RESET_REG);
+	outb(0, DMA2_RESET_REG);
+	outb(0, DMA1_CLR_MASK_REG);
+	outb(0, DMA2_CLR_MASK_REG);
+}
+
+void __init
+init_IRQ(void)
+{
+	/* Just in case the platform init_irq() causes interrupts/mchecks
+	   (as is the case with RAWHIDE, at least).  */
+	wrent(entInt, 0);
+
+	alpha_mv.init_irq();
+}
+
+/*
+ * machine error checks
+ */
+#define MCHK_K_TPERR           0x0080
+#define MCHK_K_TCPERR          0x0082
+#define MCHK_K_HERR            0x0084
+#define MCHK_K_ECC_C           0x0086
+#define MCHK_K_ECC_NC          0x0088
+#define MCHK_K_OS_BUGCHECK     0x008A
+#define MCHK_K_PAL_BUGCHECK    0x0090
+
+#ifndef CONFIG_SMP
+struct mcheck_info __mcheck_info;
+#endif
+
+void
+process_mcheck_info(unsigned long vector, unsigned long la_ptr,
+		    const char *machine, int expected)
+{
+	struct el_common *mchk_header;
+	const char *reason;
+
+	/*
+	 * See if the machine check is due to a badaddr() and if so,
+	 * ignore it.
+	 */
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (alpha_verbose_mcheck > 1) {
+		printk(KERN_CRIT "%s machine check %s\n", machine,
+		       expected ? "expected." : "NOT expected!!!");
+	}
+#endif
+
+	if (expected) {
+		int cpu = smp_processor_id();
+		mcheck_expected(cpu) = 0;
+		mcheck_taken(cpu) = 1;
+		return;
+	}
+
+	mchk_header = (struct el_common *)la_ptr;
+
+	printk(KERN_CRIT "%s machine check: vector=0x%lx pc=0x%lx code=0x%x\n",
+	       machine, vector, get_irq_regs()->pc, mchk_header->code);
+
+	switch (mchk_header->code) {
+	/* Machine check reasons.  Defined according to PALcode sources.  */
+	case 0x80: reason = "tag parity error"; break;
+	case 0x82: reason = "tag control parity error"; break;
+	case 0x84: reason = "generic hard error"; break;
+	case 0x86: reason = "correctable ECC error"; break;
+	case 0x88: reason = "uncorrectable ECC error"; break;
+	case 0x8A: reason = "OS-specific PAL bugcheck"; break;
+	case 0x90: reason = "callsys in kernel mode"; break;
+	case 0x96: reason = "i-cache read retryable error"; break;
+	case 0x98: reason = "processor detected hard error"; break;
+	
+	/* System specific (these are for Alcor, at least): */
+	case 0x202: reason = "system detected hard error"; break;
+	case 0x203: reason = "system detected uncorrectable ECC error"; break;
+	case 0x204: reason = "SIO SERR occurred on PCI bus"; break;
+	case 0x205: reason = "parity error detected by core logic"; break;
+	case 0x206: reason = "SIO IOCHK occurred on ISA bus"; break;
+	case 0x207: reason = "non-existent memory error"; break;
+	case 0x208: reason = "MCHK_K_DCSR"; break;
+	case 0x209: reason = "PCI SERR detected"; break;
+	case 0x20b: reason = "PCI data parity error detected"; break;
+	case 0x20d: reason = "PCI address parity error detected"; break;
+	case 0x20f: reason = "PCI master abort error"; break;
+	case 0x211: reason = "PCI target abort error"; break;
+	case 0x213: reason = "scatter/gather PTE invalid error"; break;
+	case 0x215: reason = "flash ROM write error"; break;
+	case 0x217: reason = "IOA timeout detected"; break;
+	case 0x219: reason = "IOCHK#, EISA add-in board parity or other catastrophic error"; break;
+	case 0x21b: reason = "EISA fail-safe timer timeout"; break;
+	case 0x21d: reason = "EISA bus time-out"; break;
+	case 0x21f: reason = "EISA software generated NMI"; break;
+	case 0x221: reason = "unexpected ev5 IRQ[3] interrupt"; break;
+	default: reason = "unknown"; break;
+	}
+
+	printk(KERN_CRIT "machine check type: %s%s\n",
+	       reason, mchk_header->retry ? " (retryable)" : "");
+
+	dik_show_regs(get_irq_regs(), NULL);
+
+#ifdef CONFIG_VERBOSE_MCHECK
+	if (alpha_verbose_mcheck > 1) {
+		/* Dump the logout area to give all info.  */
+		unsigned long *ptr = (unsigned long *)la_ptr;
+		long i;
+		for (i = 0; i < mchk_header->size / sizeof(long); i += 2) {
+			printk(KERN_CRIT "   +%8lx %016lx %016lx\n",
+			       i*sizeof(long), ptr[i], ptr[i+1]);
+		}
+	}
+#endif /* CONFIG_VERBOSE_MCHECK */
+}
+
+/*
+ * The special RTC interrupt type.  The interrupt itself was
+ * processed by PALcode, and comes in via entInt vector 1.
+ */
+void __init
+init_rtc_irq(irq_handler_t handler)
+{
+	irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
+				      handle_percpu_irq, "RTC");
+	if (!handler)
+		handler = rtc_timer_interrupt;
+	if (request_irq(RTC_IRQ, handler, 0, "timer", NULL))
+		pr_err("Failed to register timer interrupt\n");
+}
diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c
new file mode 100644
index 0000000000..1dcf0d9038
--- /dev/null
+++ b/arch/alpha/kernel/irq_i8259.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *      linux/arch/alpha/kernel/irq_i8259.c
+ *
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ *
+ * Started hacking from linux-2.3.30pre6/arch/i386/kernel/i8259.c.
+ */
+
+#include <linux/init.h>
+#include <linux/cache.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include <asm/io.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+
+
+/* Note mask bit is true for DISABLED irqs.  */
+static unsigned int cached_irq_mask = 0xffff;
+static DEFINE_SPINLOCK(i8259_irq_lock);
+
+static inline void
+i8259_update_irq_hw(unsigned int irq, unsigned long mask)
+{
+	int port = 0x21;
+	if (irq & 8) mask >>= 8;
+	if (irq & 8) port = 0xA1;
+	outb(mask, port);
+}
+
+inline void
+i8259a_enable_irq(struct irq_data *d)
+{
+	spin_lock(&i8259_irq_lock);
+	i8259_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq));
+	spin_unlock(&i8259_irq_lock);
+}
+
+static inline void
+__i8259a_disable_irq(unsigned int irq)
+{
+	i8259_update_irq_hw(irq, cached_irq_mask |= 1 << irq);
+}
+
+void
+i8259a_disable_irq(struct irq_data *d)
+{
+	spin_lock(&i8259_irq_lock);
+	__i8259a_disable_irq(d->irq);
+	spin_unlock(&i8259_irq_lock);
+}
+
+void
+i8259a_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+
+	spin_lock(&i8259_irq_lock);
+	__i8259a_disable_irq(irq);
+
+	/* Ack the interrupt making it the lowest priority.  */
+	if (irq >= 8) {
+		outb(0xE0 | (irq - 8), 0xa0);   /* ack the slave */
+		irq = 2;
+	}
+	outb(0xE0 | irq, 0x20);			/* ack the master */
+	spin_unlock(&i8259_irq_lock);
+}
+
+struct irq_chip i8259a_irq_type = {
+	.name		= "XT-PIC",
+	.irq_unmask	= i8259a_enable_irq,
+	.irq_mask	= i8259a_disable_irq,
+	.irq_mask_ack	= i8259a_mask_and_ack_irq,
+};
+
+void __init
+init_i8259a_irqs(void)
+{
+	long i;
+
+	outb(0xff, 0x21);	/* mask all of 8259A-1 */
+	outb(0xff, 0xA1);	/* mask all of 8259A-2 */
+
+	for (i = 0; i < 16; i++) {
+		irq_set_chip_and_handler(i, &i8259a_irq_type, handle_level_irq);
+	}
+
+	if (request_irq(2, no_action, 0, "cascade", NULL))
+		pr_err("Failed to request irq 2 (cascade)\n");
+}
+
+
+#if defined(CONFIG_ALPHA_GENERIC)
+# define IACK_SC	alpha_mv.iack_sc
+#elif defined(CONFIG_ALPHA_APECS)
+# define IACK_SC	APECS_IACK_SC
+#elif defined(CONFIG_ALPHA_LCA)
+# define IACK_SC	LCA_IACK_SC
+#elif defined(CONFIG_ALPHA_CIA)
+# define IACK_SC	CIA_IACK_SC
+#elif defined(CONFIG_ALPHA_PYXIS)
+# define IACK_SC	PYXIS_IACK_SC
+#elif defined(CONFIG_ALPHA_TITAN)
+# define IACK_SC	TITAN_IACK_SC
+#elif defined(CONFIG_ALPHA_TSUNAMI)
+# define IACK_SC	TSUNAMI_IACK_SC
+#elif defined(CONFIG_ALPHA_IRONGATE)
+# define IACK_SC        IRONGATE_IACK_SC
+#endif
+/* Note that CONFIG_ALPHA_POLARIS is intentionally left out here, since
+   sys_rx164 wants to use isa_no_iack_sc_device_interrupt for some reason.  */
+
+#if defined(IACK_SC)
+void
+isa_device_interrupt(unsigned long vector)
+{
+	/*
+	 * Generate a PCI interrupt acknowledge cycle.  The PIC will
+	 * respond with the interrupt vector of the highest priority
+	 * interrupt that is pending.  The PALcode sets up the
+	 * interrupts vectors such that irq level L generates vector L.
+	 */
+	int j = *(vuip) IACK_SC;
+	j &= 0xff;
+	handle_irq(j);
+}
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || !defined(IACK_SC)
+void
+isa_no_iack_sc_device_interrupt(unsigned long vector)
+{
+	unsigned long pic;
+
+	/*
+	 * It seems to me that the probability of two or more *device*
+	 * interrupts occurring at almost exactly the same time is
+	 * pretty low.  So why pay the price of checking for
+	 * additional interrupts here if the common case can be
+	 * handled so much easier?
+	 */
+	/* 
+	 *  The first read of gives you *all* interrupting lines.
+	 *  Therefore, read the mask register and and out those lines
+	 *  not enabled.  Note that some documentation has 21 and a1 
+	 *  write only.  This is not true.
+	 */
+	pic = inb(0x20) | (inb(0xA0) << 8);	/* read isr */
+	pic &= 0xFFFB;				/* mask out cascade & hibits */
+
+	while (pic) {
+		int j = ffz(~pic);
+		pic &= pic - 1;
+		handle_irq(j);
+	}
+}
+#endif
diff --git a/arch/alpha/kernel/irq_impl.h b/arch/alpha/kernel/irq_impl.h
new file mode 100644
index 0000000000..fbf21892e6
--- /dev/null
+++ b/arch/alpha/kernel/irq_impl.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	linux/arch/alpha/kernel/irq_impl.h
+ *
+ *	Copyright (C) 1995 Linus Torvalds
+ *	Copyright (C) 1998, 2000 Richard Henderson
+ *
+ * This file contains declarations and inline functions for interfacing
+ * with the IRQ handling routines in irq.c.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/profile.h>
+
+
+#define RTC_IRQ    8
+
+extern void isa_device_interrupt(unsigned long);
+extern void isa_no_iack_sc_device_interrupt(unsigned long);
+extern void srm_device_interrupt(unsigned long);
+extern void pyxis_device_interrupt(unsigned long);
+
+extern void init_srm_irqs(long, unsigned long);
+extern void init_pyxis_irqs(unsigned long);
+extern void init_rtc_irq(irq_handler_t  handler);
+
+extern void common_init_isa_dma(void);
+
+extern void i8259a_enable_irq(struct irq_data *d);
+extern void i8259a_disable_irq(struct irq_data *d);
+extern void i8259a_mask_and_ack_irq(struct irq_data *d);
+extern struct irq_chip i8259a_irq_type;
+extern void init_i8259a_irqs(void);
+
+extern void handle_irq(int irq);
diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c
new file mode 100644
index 0000000000..27070b5bd3
--- /dev/null
+++ b/arch/alpha/kernel/irq_pyxis.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/irq_pyxis.c
+ *
+ * Based on code written by David A Rusling (david.rusling@reo.mts.dec.com).
+ *
+ * IRQ Code common to all PYXIS core logic chips.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+#include <asm/io.h>
+#include <asm/core_cia.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+
+
+/* Note mask bit is true for ENABLED irqs.  */
+static unsigned long cached_irq_mask;
+
+static inline void
+pyxis_update_irq_hw(unsigned long mask)
+{
+	*(vulp)PYXIS_INT_MASK = mask;
+	mb();
+	*(vulp)PYXIS_INT_MASK;
+}
+
+static inline void
+pyxis_enable_irq(struct irq_data *d)
+{
+	pyxis_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
+}
+
+static void
+pyxis_disable_irq(struct irq_data *d)
+{
+	pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
+}
+
+static void
+pyxis_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned long bit = 1UL << (d->irq - 16);
+	unsigned long mask = cached_irq_mask &= ~bit;
+
+	/* Disable the interrupt.  */
+	*(vulp)PYXIS_INT_MASK = mask;
+	wmb();
+	/* Ack PYXIS PCI interrupt.  */
+	*(vulp)PYXIS_INT_REQ = bit;
+	mb();
+	/* Re-read to force both writes.  */
+	*(vulp)PYXIS_INT_MASK;
+}
+
+static struct irq_chip pyxis_irq_type = {
+	.name		= "PYXIS",
+	.irq_mask_ack	= pyxis_mask_and_ack_irq,
+	.irq_mask	= pyxis_disable_irq,
+	.irq_unmask	= pyxis_enable_irq,
+};
+
+void 
+pyxis_device_interrupt(unsigned long vector)
+{
+	unsigned long pld;
+	unsigned int i;
+
+	/* Read the interrupt summary register of PYXIS */
+	pld = *(vulp)PYXIS_INT_REQ;
+	pld &= cached_irq_mask;
+
+	/*
+	 * Now for every possible bit set, work through them and call
+	 * the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1; /* clear least bit set */
+		if (i == 7)
+			isa_device_interrupt(vector);
+		else
+			handle_irq(16+i);
+	}
+}
+
+void __init
+init_pyxis_irqs(unsigned long ignore_mask)
+{
+	long i;
+
+	*(vulp)PYXIS_INT_MASK = 0;		/* disable all */
+	*(vulp)PYXIS_INT_REQ  = -1;		/* flush all */
+	mb();
+
+	/* Send -INTA pulses to clear any pending interrupts ...*/
+	*(vuip) CIA_IACK_SC;
+
+	for (i = 16; i < 48; ++i) {
+		if ((ignore_mask >> i) & 1)
+			continue;
+		irq_set_chip_and_handler(i, &pyxis_irq_type, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	if (request_irq(16 + 7, no_action, 0, "isa-cascade", NULL))
+		pr_err("Failed to register isa-cascade interrupt\n");
+}
diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c
new file mode 100644
index 0000000000..bfacd6a061
--- /dev/null
+++ b/arch/alpha/kernel/irq_srm.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle interrupts from the SRM, assuming no additional weirdness.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+
+
+/*
+ * Is the palcode SMP safe? In other words: can we call cserve_ena/dis
+ * at the same time in multiple CPUs? To be safe I added a spinlock
+ * but it can be removed trivially if the palcode is robust against smp.
+ */
+DEFINE_SPINLOCK(srm_irq_lock);
+
+static inline void
+srm_enable_irq(struct irq_data *d)
+{
+	spin_lock(&srm_irq_lock);
+	cserve_ena(d->irq - 16);
+	spin_unlock(&srm_irq_lock);
+}
+
+static void
+srm_disable_irq(struct irq_data *d)
+{
+	spin_lock(&srm_irq_lock);
+	cserve_dis(d->irq - 16);
+	spin_unlock(&srm_irq_lock);
+}
+
+/* Handle interrupts from the SRM, assuming no additional weirdness.  */
+static struct irq_chip srm_irq_type = {
+	.name		= "SRM",
+	.irq_unmask	= srm_enable_irq,
+	.irq_mask	= srm_disable_irq,
+	.irq_mask_ack	= srm_disable_irq,
+};
+
+void __init
+init_srm_irqs(long max, unsigned long ignore_mask)
+{
+	long i;
+
+	if (NR_IRQS <= 16)
+		return;
+	for (i = 16; i < max; ++i) {
+		if (i < 64 && ((ignore_mask >> i) & 1))
+			continue;
+		irq_set_chip_and_handler(i, &srm_irq_type, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+}
+
+void 
+srm_device_interrupt(unsigned long vector)
+{
+	int irq = (vector - 0x800) >> 4;
+	handle_irq(irq);
+}
diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h
new file mode 100644
index 0000000000..393d5d6ca5
--- /dev/null
+++ b/arch/alpha/kernel/machvec_impl.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	linux/arch/alpha/kernel/machvec_impl.h
+ *
+ *	Copyright (C) 1997, 1998  Richard Henderson
+ *
+ * This file has goodies to help simplify instantiation of machine vectors.
+ */
+
+/* Whee.  These systems don't have an HAE:
+       IRONGATE, MARVEL, POLARIS, TSUNAMI, TITAN, WILDFIRE
+   Fix things up for the GENERIC kernel by defining the HAE address
+   to be that of the cache. Now we can read and write it as we like.  ;-)  */
+#define IRONGATE_HAE_ADDRESS	(&alpha_mv.hae_cache)
+#define MARVEL_HAE_ADDRESS	(&alpha_mv.hae_cache)
+#define POLARIS_HAE_ADDRESS	(&alpha_mv.hae_cache)
+#define TSUNAMI_HAE_ADDRESS	(&alpha_mv.hae_cache)
+#define TITAN_HAE_ADDRESS	(&alpha_mv.hae_cache)
+#define WILDFIRE_HAE_ADDRESS	(&alpha_mv.hae_cache)
+
+#ifdef CIA_ONE_HAE_WINDOW
+#define CIA_HAE_ADDRESS		(&alpha_mv.hae_cache)
+#endif
+#ifdef MCPCIA_ONE_HAE_WINDOW
+#define MCPCIA_HAE_ADDRESS	(&alpha_mv.hae_cache)
+#endif
+#ifdef T2_ONE_HAE_WINDOW
+#define T2_HAE_ADDRESS		(&alpha_mv.hae_cache)
+#endif
+
+/* Only a few systems don't define IACK_SC, handling all interrupts through
+   the SRM console.  But splitting out that one case from IO() below
+   seems like such a pain.  Define this to get things to compile.  */
+#define JENSEN_IACK_SC		1
+#define T2_IACK_SC		1
+#define WILDFIRE_IACK_SC	1 /* FIXME */
+
+/*
+ * Some helpful macros for filling in the blanks.
+ */
+
+#define CAT1(x,y)  x##y
+#define CAT(x,y)   CAT1(x,y)
+
+#define DO_DEFAULT_RTC			.rtc_port = 0x70
+
+#define DO_EV4_MMU							\
+	.max_asn =			EV4_MAX_ASN,			\
+	.mv_switch_mm =			ev4_switch_mm,			\
+	.mv_activate_mm =		ev4_activate_mm,		\
+	.mv_flush_tlb_current =		ev4_flush_tlb_current,		\
+	.mv_flush_tlb_current_page =	ev4_flush_tlb_current_page
+
+#define DO_EV5_MMU							\
+	.max_asn =			EV5_MAX_ASN,			\
+	.mv_switch_mm =			ev5_switch_mm,			\
+	.mv_activate_mm =		ev5_activate_mm,		\
+	.mv_flush_tlb_current =		ev5_flush_tlb_current,		\
+	.mv_flush_tlb_current_page =	ev5_flush_tlb_current_page
+
+#define DO_EV6_MMU							\
+	.max_asn =			EV6_MAX_ASN,			\
+	.mv_switch_mm =			ev5_switch_mm,			\
+	.mv_activate_mm =		ev5_activate_mm,		\
+	.mv_flush_tlb_current =		ev5_flush_tlb_current,		\
+	.mv_flush_tlb_current_page =	ev5_flush_tlb_current_page
+
+#define DO_EV7_MMU							\
+	.max_asn =			EV6_MAX_ASN,			\
+	.mv_switch_mm =			ev5_switch_mm,			\
+	.mv_activate_mm =		ev5_activate_mm,		\
+	.mv_flush_tlb_current =		ev5_flush_tlb_current,		\
+	.mv_flush_tlb_current_page =	ev5_flush_tlb_current_page
+
+#define IO_LITE(UP,low)							\
+	.hae_register =		(unsigned long *) CAT(UP,_HAE_ADDRESS),	\
+	.iack_sc =		CAT(UP,_IACK_SC),			\
+	.mv_ioread8 =		CAT(low,_ioread8),			\
+	.mv_ioread16 =		CAT(low,_ioread16),			\
+	.mv_ioread32 =		CAT(low,_ioread32),			\
+	.mv_iowrite8 =		CAT(low,_iowrite8),			\
+	.mv_iowrite16 =		CAT(low,_iowrite16),			\
+	.mv_iowrite32 =		CAT(low,_iowrite32),			\
+	.mv_readb =		CAT(low,_readb),			\
+	.mv_readw =		CAT(low,_readw),			\
+	.mv_readl =		CAT(low,_readl),			\
+	.mv_readq =		CAT(low,_readq),			\
+	.mv_writeb =		CAT(low,_writeb),			\
+	.mv_writew =		CAT(low,_writew),			\
+	.mv_writel =		CAT(low,_writel),			\
+	.mv_writeq =		CAT(low,_writeq),			\
+	.mv_ioportmap =		CAT(low,_ioportmap),			\
+	.mv_ioremap =		CAT(low,_ioremap),			\
+	.mv_iounmap =		CAT(low,_iounmap),			\
+	.mv_is_ioaddr =		CAT(low,_is_ioaddr),			\
+	.mv_is_mmio =		CAT(low,_is_mmio)			\
+
+#define IO(UP,low)							\
+	IO_LITE(UP,low),						\
+	.pci_ops =		&CAT(low,_pci_ops),			\
+	.mv_pci_tbi =		CAT(low,_pci_tbi)
+
+#define DO_APECS_IO	IO(APECS,apecs)
+#define DO_CIA_IO	IO(CIA,cia)
+#define DO_IRONGATE_IO	IO(IRONGATE,irongate)
+#define DO_LCA_IO	IO(LCA,lca)
+#define DO_MARVEL_IO	IO(MARVEL,marvel)
+#define DO_MCPCIA_IO	IO(MCPCIA,mcpcia)
+#define DO_POLARIS_IO	IO(POLARIS,polaris)
+#define DO_T2_IO	IO(T2,t2)
+#define DO_TSUNAMI_IO	IO(TSUNAMI,tsunami)
+#define DO_TITAN_IO	IO(TITAN,titan)
+#define DO_WILDFIRE_IO	IO(WILDFIRE,wildfire)
+
+#define DO_PYXIS_IO	IO_LITE(CIA,cia_bwx), \
+			.pci_ops = &cia_pci_ops, \
+			.mv_pci_tbi = cia_pci_tbi
+
+/*
+ * In a GENERIC kernel, we have lots of these vectors floating about,
+ * all but one of which we want to go away.  In a non-GENERIC kernel,
+ * we want only one, ever.
+ *
+ * Accomplish this in the GENERIC kernel by putting all of the vectors
+ * in the .init.data section where they'll go away.  We'll copy the
+ * one we want to the real alpha_mv vector in setup_arch.
+ *
+ * Accomplish this in a non-GENERIC kernel by ifdef'ing out all but
+ * one of the vectors, which will not reside in .init.data.  We then
+ * alias this one vector to alpha_mv, so no copy is needed.
+ *
+ * Upshot: set __initdata to nothing for non-GENERIC kernels.
+ */
+
+#ifdef CONFIG_ALPHA_GENERIC
+#define __initmv __initdata
+#define ALIAS_MV(x)
+#else
+#define __initmv __refdata
+
+/* GCC actually has a syntax for defining aliases, but is under some
+   delusion that you shouldn't be able to declare it extern somewhere
+   else beforehand.  Fine.  We'll do it ourselves.  */
+#if 0
+#define ALIAS_MV(system) \
+  struct alpha_machine_vector alpha_mv __attribute__((alias(#system "_mv"))); \
+  EXPORT_SYMBOL(alpha_mv);
+#else
+#define ALIAS_MV(system) \
+  asm(".global alpha_mv\nalpha_mv = " #system "_mv"); \
+  EXPORT_SYMBOL(alpha_mv);
+#endif
+#endif /* GENERIC */
diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c
new file mode 100644
index 0000000000..5b60c248de
--- /dev/null
+++ b/arch/alpha/kernel/module.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*  Kernel module help for Alpha.
+    Copyright (C) 2002 Richard Henderson.
+
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+/* Allocate the GOT at the end of the core sections.  */
+
+struct got_entry {
+	struct got_entry *next;
+	Elf64_Sxword r_addend;
+	int got_offset;
+};
+
+static inline void
+process_reloc_for_got(Elf64_Rela *rela,
+		      struct got_entry *chains, Elf64_Xword *poffset)
+{
+	unsigned long r_sym = ELF64_R_SYM (rela->r_info);
+	unsigned long r_type = ELF64_R_TYPE (rela->r_info);
+	Elf64_Sxword r_addend = rela->r_addend;
+	struct got_entry *g;
+
+	if (r_type != R_ALPHA_LITERAL)
+		return;
+
+	for (g = chains + r_sym; g ; g = g->next)
+		if (g->r_addend == r_addend) {
+			if (g->got_offset == 0) {
+				g->got_offset = *poffset;
+				*poffset += 8;
+			}
+			goto found_entry;
+		}
+
+	g = kmalloc (sizeof (*g), GFP_KERNEL);
+	g->next = chains[r_sym].next;
+	g->r_addend = r_addend;
+	g->got_offset = *poffset;
+	*poffset += 8;
+	chains[r_sym].next = g;
+
+ found_entry:
+	/* Trick: most of the ELF64_R_TYPE field is unused.  There are
+	   42 valid relocation types, and a 32-bit field.  Co-opt the
+	   bits above 256 to store the got offset for this reloc.  */
+	rela->r_info |= g->got_offset << 8;
+}
+
+int
+module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs,
+			  char *secstrings, struct module *me)
+{
+	struct got_entry *chains;
+	Elf64_Rela *rela;
+	Elf64_Shdr *esechdrs, *symtab, *s, *got;
+	unsigned long nsyms, nrela, i;
+
+	esechdrs = sechdrs + hdr->e_shnum;
+	symtab = got = NULL;
+
+	/* Find out how large the symbol table is.  Allocate one got_entry
+	   head per symbol.  Normally this will be enough, but not always.
+	   We'll chain different offsets for the symbol down each head.  */
+	for (s = sechdrs; s < esechdrs; ++s)
+		if (s->sh_type == SHT_SYMTAB)
+			symtab = s;
+		else if (!strcmp(".got", secstrings + s->sh_name)) {
+			got = s;
+			me->arch.gotsecindex = s - sechdrs;
+		}
+
+	if (!symtab) {
+		printk(KERN_ERR "module %s: no symbol table\n", me->name);
+		return -ENOEXEC;
+	}
+	if (!got) {
+		printk(KERN_ERR "module %s: no got section\n", me->name);
+		return -ENOEXEC;
+	}
+
+	nsyms = symtab->sh_size / sizeof(Elf64_Sym);
+	chains = kcalloc(nsyms, sizeof(struct got_entry), GFP_KERNEL);
+	if (!chains) {
+		printk(KERN_ERR
+		       "module %s: no memory for symbol chain buffer\n",
+		       me->name);
+		return -ENOMEM;
+	}
+
+	got->sh_size = 0;
+	got->sh_addralign = 8;
+	got->sh_type = SHT_NOBITS;
+
+	/* Examine all LITERAL relocations to find out what GOT entries
+	   are required.  This sizes the GOT section as well.  */
+	for (s = sechdrs; s < esechdrs; ++s)
+		if (s->sh_type == SHT_RELA) {
+			nrela = s->sh_size / sizeof(Elf64_Rela);
+			rela = (void *)hdr + s->sh_offset;
+			for (i = 0; i < nrela; ++i)
+				process_reloc_for_got(rela+i, chains,
+						      &got->sh_size);
+		}
+
+	/* Free the memory we allocated.  */
+	for (i = 0; i < nsyms; ++i) {
+		struct got_entry *g, *n;
+		for (g = chains[i].next; g ; g = n) {
+			n = g->next;
+			kfree(g);
+		}
+	}
+	kfree(chains);
+
+	return 0;
+}
+
+int
+apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
+		   unsigned int symindex, unsigned int relsec,
+		   struct module *me)
+{
+	Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+	unsigned long i, n = sechdrs[relsec].sh_size / sizeof(*rela);
+	Elf64_Sym *symtab, *sym;
+	void *base, *location;
+	unsigned long got, gp;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+
+	base = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr;
+	symtab = (Elf64_Sym *)sechdrs[symindex].sh_addr;
+
+	/* The small sections were sorted to the end of the segment.
+	   The following should definitely cover them.  */
+	gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000;
+	got = sechdrs[me->arch.gotsecindex].sh_addr;
+
+	for (i = 0; i < n; i++) {
+		unsigned long r_sym = ELF64_R_SYM (rela[i].r_info);
+		unsigned long r_type = ELF64_R_TYPE (rela[i].r_info);
+		unsigned long r_got_offset = r_type >> 8;
+		unsigned long value, hi, lo;
+		r_type &= 0xff;
+
+		/* This is where to make the change.  */
+		location = base + rela[i].r_offset;
+
+		/* This is the symbol it is referring to.  Note that all
+		   unresolved symbols have been resolved.  */
+		sym = symtab + r_sym;
+		value = sym->st_value + rela[i].r_addend;
+
+		switch (r_type) {
+		case R_ALPHA_NONE:
+			break;
+		case R_ALPHA_REFLONG:
+			*(u32 *)location = value;
+			break;
+		case R_ALPHA_REFQUAD:
+			/* BUG() can produce misaligned relocations. */
+			((u32 *)location)[0] = value;
+			((u32 *)location)[1] = value >> 32;
+			break;
+		case R_ALPHA_GPREL32:
+			value -= gp;
+			if ((int)value != value)
+				goto reloc_overflow;
+			*(u32 *)location = value;
+			break;
+		case R_ALPHA_LITERAL:
+			hi = got + r_got_offset;
+			lo = hi - gp;
+			if ((short)lo != lo)
+				goto reloc_overflow;
+			*(u16 *)location = lo;
+			*(u64 *)hi = value;
+			break;
+		case R_ALPHA_LITUSE:
+			break;
+		case R_ALPHA_GPDISP:
+			value = gp - (u64)location;
+			lo = (short)value;
+			hi = (int)(value - lo);
+			if (hi + lo != value)
+				goto reloc_overflow;
+			*(u16 *)location = hi >> 16;
+			*(u16 *)(location + rela[i].r_addend) = lo;
+			break;
+		case R_ALPHA_BRSGP:
+			/* BRSGP is only allowed to bind to local symbols.
+			   If the section is undef, this means that the
+			   value was resolved from somewhere else.  */
+			if (sym->st_shndx == SHN_UNDEF)
+				goto reloc_overflow;
+			if ((sym->st_other & STO_ALPHA_STD_GPLOAD) ==
+			    STO_ALPHA_STD_GPLOAD)
+				/* Omit the prologue. */
+				value += 8;
+			fallthrough;
+		case R_ALPHA_BRADDR:
+			value -= (u64)location + 4;
+			if (value & 3)
+				goto reloc_overflow;
+			value = (long)value >> 2;
+			if (value + (1<<21) >= 1<<22)
+				goto reloc_overflow;
+			value &= 0x1fffff;
+			value |= *(u32 *)location & ~0x1fffff;
+			*(u32 *)location = value;
+			break;
+		case R_ALPHA_HINT:
+			break;
+		case R_ALPHA_SREL32:
+			value -= (u64)location;
+			if ((int)value != value)
+				goto reloc_overflow;
+			*(u32 *)location = value;
+			break;
+		case R_ALPHA_SREL64:
+			value -= (u64)location;
+			*(u64 *)location = value;
+			break;
+		case R_ALPHA_GPRELHIGH:
+			value = (long)(value - gp + 0x8000) >> 16;
+			if ((short) value != value)
+				goto reloc_overflow;
+			*(u16 *)location = value;
+			break;
+		case R_ALPHA_GPRELLOW:
+			value -= gp;
+			*(u16 *)location = value;
+			break;
+		case R_ALPHA_GPREL16:
+			value -= gp;
+			if ((short) value != value)
+				goto reloc_overflow;
+			*(u16 *)location = value;
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %lu\n",
+			       me->name, r_type);
+			return -ENOEXEC;
+		reloc_overflow:
+			if (ELF64_ST_TYPE (sym->st_info) == STT_SECTION)
+			  printk(KERN_ERR
+			         "module %s: Relocation (type %lu) overflow vs section %d\n",
+			         me->name, r_type, sym->st_shndx);
+			else
+			  printk(KERN_ERR
+			         "module %s: Relocation (type %lu) overflow vs %s\n",
+			         me->name, r_type, strtab + sym->st_name);
+			return -ENOEXEC;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
new file mode 100644
index 0000000000..8bbeebb73c
--- /dev/null
+++ b/arch/alpha/kernel/osf_sys.c
@@ -0,0 +1,1369 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/kernel/osf_sys.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/*
+ * This file handles some of the stranger OSF/1 system call interfaces.
+ * Some of the system calls expect a non-C calling standard, others have
+ * special parameter blocks..
+ */
+
+#include <linux/errno.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/utsname.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/types.h>
+#include <linux/ipc.h>
+#include <linux/namei.h>
+#include <linux/uio.h>
+#include <linux/vfs.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+
+#include <asm/fpu.h>
+#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <asm/sysinfo.h>
+#include <asm/thread_info.h>
+#include <asm/hwrpb.h>
+#include <asm/processor.h>
+
+/*
+ * Brk needs to return an error.  Still support Linux's brk(0) query idiom,
+ * which OSF programs just shouldn't be doing.  We're still not quite
+ * identical to OSF as we don't return 0 on success, but doing otherwise
+ * would require changes to libc.  Hopefully this is good enough.
+ */
+SYSCALL_DEFINE1(osf_brk, unsigned long, brk)
+{
+	unsigned long retval = sys_brk(brk);
+	if (brk && brk != retval)
+		retval = -ENOMEM;
+	return retval;
+}
+ 
+/*
+ * This is pure guess-work..
+ */
+SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start,
+		unsigned long, text_len, unsigned long, bss_start,
+		unsigned long, bss_len)
+{
+	struct mm_struct *mm;
+
+	mm = current->mm;
+	mm->end_code = bss_start + bss_len;
+	mm->start_brk = bss_start + bss_len;
+	mm->brk = bss_start + bss_len;
+#if 0
+	printk("set_program_attributes(%lx %lx %lx %lx)\n",
+		text_start, text_len, bss_start, bss_len);
+#endif
+	return 0;
+}
+
+/*
+ * OSF/1 directory handling functions...
+ *
+ * The "getdents()" interface is much more sane: the "basep" stuff is
+ * braindamage (it can't really handle filesystems where the directory
+ * offset differences aren't the same as "d_reclen").
+ */
+#define NAME_OFFSET	offsetof (struct osf_dirent, d_name)
+
+struct osf_dirent {
+	unsigned int d_ino;
+	unsigned short d_reclen;
+	unsigned short d_namlen;
+	char d_name[1];
+};
+
+struct osf_dirent_callback {
+	struct dir_context ctx;
+	struct osf_dirent __user *dirent;
+	long __user *basep;
+	unsigned int count;
+	int error;
+};
+
+static int
+osf_filldir(struct dir_context *ctx, const char *name, int namlen,
+	    loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct osf_dirent __user *dirent;
+	struct osf_dirent_callback *buf =
+		container_of(ctx, struct osf_dirent_callback, ctx);
+	unsigned int reclen = ALIGN(NAME_OFFSET + namlen + 1, sizeof(u32));
+	unsigned int d_ino;
+
+	buf->error = -EINVAL;	/* only used if we fail */
+	if (reclen > buf->count)
+		return -EINVAL;
+	d_ino = ino;
+	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+		buf->error = -EOVERFLOW;
+		return -EOVERFLOW;
+	}
+	if (buf->basep) {
+		if (put_user(offset, buf->basep))
+			goto Efault;
+		buf->basep = NULL;
+	}
+	dirent = buf->dirent;
+	if (put_user(d_ino, &dirent->d_ino) ||
+	    put_user(namlen, &dirent->d_namlen) ||
+	    put_user(reclen, &dirent->d_reclen) ||
+	    copy_to_user(dirent->d_name, name, namlen) ||
+	    put_user(0, dirent->d_name + namlen))
+		goto Efault;
+	dirent = (void __user *)dirent + reclen;
+	buf->dirent = dirent;
+	buf->count -= reclen;
+	return 0;
+Efault:
+	buf->error = -EFAULT;
+	return -EFAULT;
+}
+
+SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd,
+		struct osf_dirent __user *, dirent, unsigned int, count,
+		long __user *, basep)
+{
+	int error;
+	struct fd arg = fdget_pos(fd);
+	struct osf_dirent_callback buf = {
+		.ctx.actor = osf_filldir,
+		.dirent = dirent,
+		.basep = basep,
+		.count = count
+	};
+
+	if (!arg.file)
+		return -EBADF;
+
+	error = iterate_dir(arg.file, &buf.ctx);
+	if (error >= 0)
+		error = buf.error;
+	if (count != buf.count)
+		error = count - buf.count;
+
+	fdput_pos(arg);
+	return error;
+}
+
+#undef NAME_OFFSET
+
+SYSCALL_DEFINE6(osf_mmap, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags, unsigned long, fd,
+		unsigned long, off)
+{
+	unsigned long ret = -EINVAL;
+
+#if 0
+	if (flags & (_MAP_HASSEMAPHORE | _MAP_INHERIT | _MAP_UNALIGNED))
+		printk("%s: unimplemented OSF mmap flags %04lx\n", 
+			current->comm, flags);
+#endif
+	if ((off + PAGE_ALIGN(len)) < off)
+		goto out;
+	if (off & ~PAGE_MASK)
+		goto out;
+	ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+ out:
+	return ret;
+}
+
+struct osf_stat {
+	int		st_dev;
+	int		st_pad1;
+	unsigned	st_mode;
+	unsigned short	st_nlink;
+	short		st_nlink_reserved;
+	unsigned	st_uid;
+	unsigned	st_gid;
+	int		st_rdev;
+	int		st_ldev;
+	long		st_size;
+	int		st_pad2;
+	int		st_uatime;
+	int		st_pad3;
+	int		st_umtime;
+	int		st_pad4;
+	int		st_uctime;
+	int		st_pad5;
+	int		st_pad6;
+	unsigned	st_flags;
+	unsigned	st_gen;
+	long		st_spare[4];
+	unsigned	st_ino;
+	int		st_ino_reserved;
+	int		st_atime;
+	int		st_atime_reserved;
+	int		st_mtime;
+	int		st_mtime_reserved;
+	int		st_ctime;
+	int		st_ctime_reserved;
+	long		st_blksize;
+	long		st_blocks;
+};
+
+/*
+ * The OSF/1 statfs structure is much larger, but this should
+ * match the beginning, at least.
+ */
+struct osf_statfs {
+	short f_type;
+	short f_flags;
+	int f_fsize;
+	int f_bsize;
+	int f_blocks;
+	int f_bfree;
+	int f_bavail;
+	int f_files;
+	int f_ffree;
+	__kernel_fsid_t f_fsid;
+};
+
+struct osf_statfs64 {
+	short f_type;
+	short f_flags;
+	int f_pad1;
+	int f_pad2;
+	int f_pad3;
+	int f_pad4;
+	int f_pad5;
+	int f_pad6;
+	int f_pad7;
+	__kernel_fsid_t f_fsid;
+	u_short f_namemax;
+	short f_reserved1;
+	int f_spare[8];
+	char f_pad8[90];
+	char f_pad9[90];
+	long mount_info[10];
+	u_long f_flags2;
+	long f_spare2[14];
+	long f_fsize;
+	long f_bsize;
+	long f_blocks;
+	long f_bfree;
+	long f_bavail;
+	long f_files;
+	long f_ffree;
+};
+
+static int
+linux_to_osf_stat(struct kstat *lstat, struct osf_stat __user *osf_stat)
+{
+	struct osf_stat tmp = { 0 };
+
+	tmp.st_dev	= lstat->dev;
+	tmp.st_mode	= lstat->mode;
+	tmp.st_nlink	= lstat->nlink;
+	tmp.st_uid	= from_kuid_munged(current_user_ns(), lstat->uid);
+	tmp.st_gid	= from_kgid_munged(current_user_ns(), lstat->gid);
+	tmp.st_rdev	= lstat->rdev;
+	tmp.st_ldev	= lstat->rdev;
+	tmp.st_size	= lstat->size;
+	tmp.st_uatime	= lstat->atime.tv_nsec / 1000;
+	tmp.st_umtime	= lstat->mtime.tv_nsec / 1000;
+	tmp.st_uctime	= lstat->ctime.tv_nsec / 1000;
+	tmp.st_ino	= lstat->ino;
+	tmp.st_atime	= lstat->atime.tv_sec;
+	tmp.st_mtime	= lstat->mtime.tv_sec;
+	tmp.st_ctime	= lstat->ctime.tv_sec;
+	tmp.st_blksize	= lstat->blksize;
+	tmp.st_blocks	= lstat->blocks;
+
+	return copy_to_user(osf_stat, &tmp, sizeof(tmp)) ? -EFAULT : 0;
+}
+
+static int
+linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_stat,
+		    unsigned long bufsiz)
+{
+	struct osf_statfs tmp_stat;
+
+	tmp_stat.f_type = linux_stat->f_type;
+	tmp_stat.f_flags = 0;	/* mount flags */
+	tmp_stat.f_fsize = linux_stat->f_frsize;
+	tmp_stat.f_bsize = linux_stat->f_bsize;
+	tmp_stat.f_blocks = linux_stat->f_blocks;
+	tmp_stat.f_bfree = linux_stat->f_bfree;
+	tmp_stat.f_bavail = linux_stat->f_bavail;
+	tmp_stat.f_files = linux_stat->f_files;
+	tmp_stat.f_ffree = linux_stat->f_ffree;
+	tmp_stat.f_fsid = linux_stat->f_fsid;
+	if (bufsiz > sizeof(tmp_stat))
+		bufsiz = sizeof(tmp_stat);
+	return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
+}
+
+static int
+linux_to_osf_statfs64(struct kstatfs *linux_stat, struct osf_statfs64 __user *osf_stat,
+		      unsigned long bufsiz)
+{
+	struct osf_statfs64 tmp_stat = { 0 };
+
+	tmp_stat.f_type = linux_stat->f_type;
+	tmp_stat.f_fsize = linux_stat->f_frsize;
+	tmp_stat.f_bsize = linux_stat->f_bsize;
+	tmp_stat.f_blocks = linux_stat->f_blocks;
+	tmp_stat.f_bfree = linux_stat->f_bfree;
+	tmp_stat.f_bavail = linux_stat->f_bavail;
+	tmp_stat.f_files = linux_stat->f_files;
+	tmp_stat.f_ffree = linux_stat->f_ffree;
+	tmp_stat.f_fsid = linux_stat->f_fsid;
+	if (bufsiz > sizeof(tmp_stat))
+		bufsiz = sizeof(tmp_stat);
+	return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
+}
+
+SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
+		struct osf_statfs __user *, buffer, unsigned long, bufsiz)
+{
+	struct kstatfs linux_stat;
+	int error = user_statfs(pathname, &linux_stat);
+	if (!error)
+		error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
+	return error;	
+}
+
+SYSCALL_DEFINE2(osf_stat, char __user *, name, struct osf_stat __user *, buf)
+{
+	struct kstat stat;
+	int error;
+
+	error = vfs_stat(name, &stat);
+	if (error)
+		return error;
+
+	return linux_to_osf_stat(&stat, buf);
+}
+
+SYSCALL_DEFINE2(osf_lstat, char __user *, name, struct osf_stat __user *, buf)
+{
+	struct kstat stat;
+	int error;
+
+	error = vfs_lstat(name, &stat);
+	if (error)
+		return error;
+
+	return linux_to_osf_stat(&stat, buf);
+}
+
+SYSCALL_DEFINE2(osf_fstat, int, fd, struct osf_stat __user *, buf)
+{
+	struct kstat stat;
+	int error;
+
+	error = vfs_fstat(fd, &stat);
+	if (error)
+		return error;
+
+	return linux_to_osf_stat(&stat, buf);
+}
+
+SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd,
+		struct osf_statfs __user *, buffer, unsigned long, bufsiz)
+{
+	struct kstatfs linux_stat;
+	int error = fd_statfs(fd, &linux_stat);
+	if (!error)
+		error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
+	return error;
+}
+
+SYSCALL_DEFINE3(osf_statfs64, char __user *, pathname,
+		struct osf_statfs64 __user *, buffer, unsigned long, bufsiz)
+{
+	struct kstatfs linux_stat;
+	int error = user_statfs(pathname, &linux_stat);
+	if (!error)
+		error = linux_to_osf_statfs64(&linux_stat, buffer, bufsiz);
+	return error;
+}
+
+SYSCALL_DEFINE3(osf_fstatfs64, unsigned long, fd,
+		struct osf_statfs64 __user *, buffer, unsigned long, bufsiz)
+{
+	struct kstatfs linux_stat;
+	int error = fd_statfs(fd, &linux_stat);
+	if (!error)
+		error = linux_to_osf_statfs64(&linux_stat, buffer, bufsiz);
+	return error;
+}
+
+/*
+ * Uhh.. OSF/1 mount parameters aren't exactly obvious..
+ *
+ * Although to be frank, neither are the native Linux/i386 ones..
+ */
+struct ufs_args {
+	char __user *devname;
+	int flags;
+	uid_t exroot;
+};
+
+struct cdfs_args {
+	char __user *devname;
+	int flags;
+	uid_t exroot;
+
+	/* This has lots more here, which Linux handles with the option block
+	   but I'm too lazy to do the translation into ASCII.  */
+};
+
+struct procfs_args {
+	char __user *devname;
+	int flags;
+	uid_t exroot;
+};
+
+/*
+ * We can't actually handle ufs yet, so we translate UFS mounts to
+ * ext2fs mounts. I wouldn't mind a UFS filesystem, but the UFS
+ * layout is so braindead it's a major headache doing it.
+ *
+ * Just how long ago was it written? OTOH our UFS driver may be still
+ * unhappy with OSF UFS. [CHECKME]
+ */
+static int
+osf_ufs_mount(const char __user *dirname,
+	      struct ufs_args __user *args, int flags)
+{
+	int retval;
+	struct cdfs_args tmp;
+	struct filename *devname;
+
+	retval = -EFAULT;
+	if (copy_from_user(&tmp, args, sizeof(tmp)))
+		goto out;
+	devname = getname(tmp.devname);
+	retval = PTR_ERR(devname);
+	if (IS_ERR(devname))
+		goto out;
+	retval = do_mount(devname->name, dirname, "ext2", flags, NULL);
+	putname(devname);
+ out:
+	return retval;
+}
+
+static int
+osf_cdfs_mount(const char __user *dirname,
+	       struct cdfs_args __user *args, int flags)
+{
+	int retval;
+	struct cdfs_args tmp;
+	struct filename *devname;
+
+	retval = -EFAULT;
+	if (copy_from_user(&tmp, args, sizeof(tmp)))
+		goto out;
+	devname = getname(tmp.devname);
+	retval = PTR_ERR(devname);
+	if (IS_ERR(devname))
+		goto out;
+	retval = do_mount(devname->name, dirname, "iso9660", flags, NULL);
+	putname(devname);
+ out:
+	return retval;
+}
+
+static int
+osf_procfs_mount(const char __user *dirname,
+		 struct procfs_args __user *args, int flags)
+{
+	struct procfs_args tmp;
+
+	if (copy_from_user(&tmp, args, sizeof(tmp)))
+		return -EFAULT;
+
+	return do_mount("", dirname, "proc", flags, NULL);
+}
+
+SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path,
+		int, flag, void __user *, data)
+{
+	int retval;
+
+	switch (typenr) {
+	case 1:
+		retval = osf_ufs_mount(path, data, flag);
+		break;
+	case 6:
+		retval = osf_cdfs_mount(path, data, flag);
+		break;
+	case 9:
+		retval = osf_procfs_mount(path, data, flag);
+		break;
+	default:
+		retval = -EINVAL;
+		printk("osf_mount(%ld, %x)\n", typenr, flag);
+	}
+
+	return retval;
+}
+
+SYSCALL_DEFINE1(osf_utsname, char __user *, name)
+{
+	char tmp[5 * 32];
+
+	down_read(&uts_sem);
+	memcpy(tmp + 0 * 32, utsname()->sysname, 32);
+	memcpy(tmp + 1 * 32, utsname()->nodename, 32);
+	memcpy(tmp + 2 * 32, utsname()->release, 32);
+	memcpy(tmp + 3 * 32, utsname()->version, 32);
+	memcpy(tmp + 4 * 32, utsname()->machine, 32);
+	up_read(&uts_sem);
+
+	if (copy_to_user(name, tmp, sizeof(tmp)))
+		return -EFAULT;
+	return 0;
+}
+
+SYSCALL_DEFINE0(getpagesize)
+{
+	return PAGE_SIZE;
+}
+
+SYSCALL_DEFINE0(getdtablesize)
+{
+	return sysctl_nr_open;
+}
+
+/*
+ * For compatibility with OSF/1 only.  Use utsname(2) instead.
+ */
+SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen)
+{
+	int len;
+	char *kname;
+	char tmp[32];
+
+	if (namelen < 0 || namelen > 32)
+		namelen = 32;
+
+	down_read(&uts_sem);
+	kname = utsname()->domainname;
+	len = strnlen(kname, namelen);
+	len = min(len + 1, namelen);
+	memcpy(tmp, kname, len);
+	up_read(&uts_sem);
+
+	if (copy_to_user(name, tmp, len))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * The following stuff should move into a header file should it ever
+ * be labeled "officially supported."  Right now, there is just enough
+ * support to avoid applications (such as tar) printing error
+ * messages.  The attributes are not really implemented.
+ */
+
+/*
+ * Values for Property list entry flag
+ */
+#define PLE_PROPAGATE_ON_COPY		0x1	/* cp(1) will copy entry
+						   by default */
+#define PLE_FLAG_MASK			0x1	/* Valid flag values */
+#define PLE_FLAG_ALL			-1	/* All flag value */
+
+struct proplistname_args {
+	unsigned int pl_mask;
+	unsigned int pl_numnames;
+	char **pl_names;
+};
+
+union pl_args {
+	struct setargs {
+		char __user *path;
+		long follow;
+		long nbytes;
+		char __user *buf;
+	} set;
+	struct fsetargs {
+		long fd;
+		long nbytes;
+		char __user *buf;
+	} fset;
+	struct getargs {
+		char __user *path;
+		long follow;
+		struct proplistname_args __user *name_args;
+		long nbytes;
+		char __user *buf;
+		int __user *min_buf_size;
+	} get;
+	struct fgetargs {
+		long fd;
+		struct proplistname_args __user *name_args;
+		long nbytes;
+		char __user *buf;
+		int __user *min_buf_size;
+	} fget;
+	struct delargs {
+		char __user *path;
+		long follow;
+		struct proplistname_args __user *name_args;
+	} del;
+	struct fdelargs {
+		long fd;
+		struct proplistname_args __user *name_args;
+	} fdel;
+};
+
+enum pl_code {
+	PL_SET = 1, PL_FSET = 2,
+	PL_GET = 3, PL_FGET = 4,
+	PL_DEL = 5, PL_FDEL = 6
+};
+
+SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code,
+		union pl_args __user *, args)
+{
+	long error;
+	int __user *min_buf_size_ptr;
+
+	switch (code) {
+	case PL_SET:
+		if (get_user(error, &args->set.nbytes))
+			error = -EFAULT;
+		break;
+	case PL_FSET:
+		if (get_user(error, &args->fset.nbytes))
+			error = -EFAULT;
+		break;
+	case PL_GET:
+		error = get_user(min_buf_size_ptr, &args->get.min_buf_size);
+		if (error)
+			break;
+		error = put_user(0, min_buf_size_ptr);
+		break;
+	case PL_FGET:
+		error = get_user(min_buf_size_ptr, &args->fget.min_buf_size);
+		if (error)
+			break;
+		error = put_user(0, min_buf_size_ptr);
+		break;
+	case PL_DEL:
+	case PL_FDEL:
+		error = 0;
+		break;
+	default:
+		error = -EOPNOTSUPP;
+		break;
+	}
+	return error;
+}
+
+SYSCALL_DEFINE2(osf_sigstack, struct sigstack __user *, uss,
+		struct sigstack __user *, uoss)
+{
+	unsigned long usp = rdusp();
+	unsigned long oss_sp = current->sas_ss_sp + current->sas_ss_size;
+	unsigned long oss_os = on_sig_stack(usp);
+	int error;
+
+	if (uss) {
+		void __user *ss_sp;
+
+		error = -EFAULT;
+		if (get_user(ss_sp, &uss->ss_sp))
+			goto out;
+
+		/* If the current stack was set with sigaltstack, don't
+		   swap stacks while we are on it.  */
+		error = -EPERM;
+		if (current->sas_ss_sp && on_sig_stack(usp))
+			goto out;
+
+		/* Since we don't know the extent of the stack, and we don't
+		   track onstack-ness, but rather calculate it, we must 
+		   presume a size.  Ho hum this interface is lossy.  */
+		current->sas_ss_sp = (unsigned long)ss_sp - SIGSTKSZ;
+		current->sas_ss_size = SIGSTKSZ;
+	}
+
+	if (uoss) {
+		error = -EFAULT;
+		if (put_user(oss_sp, &uoss->ss_sp) ||
+		    put_user(oss_os, &uoss->ss_onstack))
+			goto out;
+	}
+
+	error = 0;
+ out:
+	return error;
+}
+
+SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
+{
+	const char *sysinfo_table[] = {
+		utsname()->sysname,
+		utsname()->nodename,
+		utsname()->release,
+		utsname()->version,
+		utsname()->machine,
+		"alpha",	/* instruction set architecture */
+		"dummy",	/* hardware serial number */
+		"dummy",	/* hardware manufacturer */
+		"dummy",	/* secure RPC domain */
+	};
+	unsigned long offset;
+	const char *res;
+	long len;
+	char tmp[__NEW_UTS_LEN + 1];
+
+	offset = command-1;
+	if (offset >= ARRAY_SIZE(sysinfo_table)) {
+		/* Digital UNIX has a few unpublished interfaces here */
+		printk("sysinfo(%d)", command);
+		return -EINVAL;
+	}
+
+	down_read(&uts_sem);
+	res = sysinfo_table[offset];
+	len = strlen(res)+1;
+	if ((unsigned long)len > (unsigned long)count)
+		len = count;
+	memcpy(tmp, res, len);
+	up_read(&uts_sem);
+	if (copy_to_user(buf, tmp, len))
+		return -EFAULT;
+	return 0;
+}
+
+SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
+		unsigned long, nbytes, int __user *, start, void __user *, arg)
+{
+	unsigned long w;
+	struct percpu_struct *cpu;
+
+	switch (op) {
+	case GSI_IEEE_FP_CONTROL:
+		/* Return current software fp control & status bits.  */
+		/* Note that DU doesn't verify available space here.  */
+
+ 		w = current_thread_info()->ieee_state & IEEE_SW_MASK;
+ 		w = swcr_update_status(w, rdfpcr());
+		if (put_user(w, (unsigned long __user *) buffer))
+			return -EFAULT;
+		return 0;
+
+	case GSI_IEEE_STATE_AT_SIGNAL:
+		/*
+		 * Not sure anybody will ever use this weird stuff.  These
+		 * ops can be used (under OSF/1) to set the fpcr that should
+		 * be used when a signal handler starts executing.
+		 */
+		break;
+
+ 	case GSI_UACPROC:
+		if (nbytes < sizeof(unsigned int))
+			return -EINVAL;
+		w = current_thread_info()->status & UAC_BITMASK;
+		if (put_user(w, (unsigned int __user *)buffer))
+			return -EFAULT;
+ 		return 1;
+
+	case GSI_PROC_TYPE:
+		if (nbytes < sizeof(unsigned long))
+			return -EINVAL;
+		cpu = (struct percpu_struct*)
+		  ((char*)hwrpb + hwrpb->processor_offset);
+		w = cpu->type;
+		if (put_user(w, (unsigned long  __user*)buffer))
+			return -EFAULT;
+		return 1;
+
+	case GSI_GET_HWRPB:
+		if (nbytes > sizeof(*hwrpb))
+			return -EINVAL;
+		if (copy_to_user(buffer, hwrpb, nbytes) != 0)
+			return -EFAULT;
+		return 1;
+
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
+		unsigned long, nbytes, int __user *, start, void __user *, arg)
+{
+	switch (op) {
+	case SSI_IEEE_FP_CONTROL: {
+		unsigned long swcr, fpcr;
+		unsigned int *state;
+
+		/* 
+		 * Alpha Architecture Handbook 4.7.7.3:
+		 * To be fully IEEE compiant, we must track the current IEEE
+		 * exception state in software, because spurious bits can be
+		 * set in the trap shadow of a software-complete insn.
+		 */
+
+		if (get_user(swcr, (unsigned long __user *)buffer))
+			return -EFAULT;
+		state = &current_thread_info()->ieee_state;
+
+		/* Update software trap enable bits.  */
+		*state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK);
+
+		/* Update the real fpcr.  */
+		fpcr = rdfpcr() & FPCR_DYN_MASK;
+		fpcr |= ieee_swcr_to_fpcr(swcr);
+		wrfpcr(fpcr);
+
+		return 0;
+	}
+
+	case SSI_IEEE_RAISE_EXCEPTION: {
+		unsigned long exc, swcr, fpcr, fex;
+		unsigned int *state;
+
+		if (get_user(exc, (unsigned long __user *)buffer))
+			return -EFAULT;
+		state = &current_thread_info()->ieee_state;
+		exc &= IEEE_STATUS_MASK;
+
+		/* Update software trap enable bits.  */
+ 		swcr = (*state & IEEE_SW_MASK) | exc;
+		*state |= exc;
+
+		/* Update the real fpcr.  */
+		fpcr = rdfpcr();
+		fpcr |= ieee_swcr_to_fpcr(swcr);
+		wrfpcr(fpcr);
+
+ 		/* If any exceptions set by this call, and are unmasked,
+		   send a signal.  Old exceptions are not signaled.  */
+		fex = (exc >> IEEE_STATUS_TO_EXCSUM_SHIFT) & swcr;
+ 		if (fex) {
+			int si_code = FPE_FLTUNK;
+
+			if (fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND;
+			if (fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES;
+			if (fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND;
+			if (fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF;
+			if (fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV;
+			if (fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV;
+
+			send_sig_fault_trapno(SIGFPE, si_code,
+				       (void __user *)NULL,  /* FIXME */
+				       0, current);
+ 		}
+		return 0;
+	}
+
+	case SSI_IEEE_STATE_AT_SIGNAL:
+	case SSI_IEEE_IGNORE_STATE_AT_SIGNAL:
+		/*
+		 * Not sure anybody will ever use this weird stuff.  These
+		 * ops can be used (under OSF/1) to set the fpcr that should
+		 * be used when a signal handler starts executing.
+		 */
+		break;
+
+ 	case SSI_NVPAIRS: {
+		unsigned __user *p = buffer;
+		unsigned i;
+		
+		for (i = 0, p = buffer; i < nbytes; ++i, p += 2) {
+			unsigned v, w, status;
+
+			if (get_user(v, p) || get_user(w, p + 1))
+ 				return -EFAULT;
+ 			switch (v) {
+ 			case SSIN_UACPROC:
+				w &= UAC_BITMASK;
+				status = current_thread_info()->status;
+				status = (status & ~UAC_BITMASK) | w;
+				current_thread_info()->status = status;
+ 				break;
+ 
+ 			default:
+ 				return -EOPNOTSUPP;
+ 			}
+ 		}
+ 		return 0;
+	}
+ 
+	case SSI_LMF:
+		return 0;
+
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+/* Translations due to the fact that OSF's time_t is an int.  Which
+   affects all sorts of things, like timeval and itimerval.  */
+
+extern struct timezone sys_tz;
+
+struct timeval32
+{
+    int tv_sec, tv_usec;
+};
+
+struct itimerval32
+{
+    struct timeval32 it_interval;
+    struct timeval32 it_value;
+};
+
+static inline long
+get_tv32(struct timespec64 *o, struct timeval32 __user *i)
+{
+	struct timeval32 tv;
+	if (copy_from_user(&tv, i, sizeof(struct timeval32)))
+		return -EFAULT;
+	o->tv_sec = tv.tv_sec;
+	o->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
+	return 0;
+}
+
+static inline long
+put_tv32(struct timeval32 __user *o, struct timespec64 *i)
+{
+	return copy_to_user(o, &(struct timeval32){
+				.tv_sec = i->tv_sec,
+				.tv_usec = i->tv_nsec / NSEC_PER_USEC},
+			    sizeof(struct timeval32));
+}
+
+static inline long
+put_tv_to_tv32(struct timeval32 __user *o, struct __kernel_old_timeval *i)
+{
+	return copy_to_user(o, &(struct timeval32){
+				.tv_sec = i->tv_sec,
+				.tv_usec = i->tv_usec},
+			    sizeof(struct timeval32));
+}
+
+static inline void
+jiffies_to_timeval32(unsigned long jiffies, struct timeval32 *value)
+{
+	value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
+	value->tv_sec = jiffies / HZ;
+}
+
+SYSCALL_DEFINE2(osf_gettimeofday, struct timeval32 __user *, tv,
+		struct timezone __user *, tz)
+{
+	if (tv) {
+		struct timespec64 kts;
+
+		ktime_get_real_ts64(&kts);
+		if (put_tv32(tv, &kts))
+			return -EFAULT;
+	}
+	if (tz) {
+		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
+		struct timezone __user *, tz)
+{
+	struct timespec64 kts;
+	struct timezone ktz;
+
+ 	if (tv) {
+		if (get_tv32(&kts, tv))
+			return -EFAULT;
+	}
+	if (tz) {
+		if (copy_from_user(&ktz, tz, sizeof(*tz)))
+			return -EFAULT;
+	}
+
+	return do_sys_settimeofday64(tv ? &kts : NULL, tz ? &ktz : NULL);
+}
+
+asmlinkage long sys_ni_posix_timers(void);
+
+SYSCALL_DEFINE2(osf_utimes, const char __user *, filename,
+		struct timeval32 __user *, tvs)
+{
+	struct timespec64 tv[2];
+
+	if (tvs) {
+		if (get_tv32(&tv[0], &tvs[0]) ||
+		    get_tv32(&tv[1], &tvs[1]))
+			return -EFAULT;
+
+		if (tv[0].tv_nsec < 0 || tv[0].tv_nsec >= 1000000000 ||
+		    tv[1].tv_nsec < 0 || tv[1].tv_nsec >= 1000000000)
+			return -EINVAL;
+	}
+
+	return do_utimes(AT_FDCWD, filename, tvs ? tv : NULL, 0);
+}
+
+SYSCALL_DEFINE5(osf_select, int, n, fd_set __user *, inp, fd_set __user *, outp,
+		fd_set __user *, exp, struct timeval32 __user *, tvp)
+{
+	struct timespec64 end_time, *to = NULL;
+	if (tvp) {
+		struct timespec64 tv;
+		to = &end_time;
+
+		if (get_tv32(&tv, tvp))
+		    	return -EFAULT;
+
+		if (tv.tv_sec < 0 || tv.tv_nsec < 0)
+			return -EINVAL;
+
+		if (poll_select_set_timeout(to, tv.tv_sec, tv.tv_nsec))
+			return -EINVAL;		
+
+	}
+
+	/* OSF does not copy back the remaining time.  */
+	return core_sys_select(n, inp, outp, exp, to);
+}
+
+struct rusage32 {
+	struct timeval32 ru_utime;	/* user time used */
+	struct timeval32 ru_stime;	/* system time used */
+	long	ru_maxrss;		/* maximum resident set size */
+	long	ru_ixrss;		/* integral shared memory size */
+	long	ru_idrss;		/* integral unshared data size */
+	long	ru_isrss;		/* integral unshared stack size */
+	long	ru_minflt;		/* page reclaims */
+	long	ru_majflt;		/* page faults */
+	long	ru_nswap;		/* swaps */
+	long	ru_inblock;		/* block input operations */
+	long	ru_oublock;		/* block output operations */
+	long	ru_msgsnd;		/* messages sent */
+	long	ru_msgrcv;		/* messages received */
+	long	ru_nsignals;		/* signals received */
+	long	ru_nvcsw;		/* voluntary context switches */
+	long	ru_nivcsw;		/* involuntary " */
+};
+
+SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
+{
+	struct rusage32 r;
+	u64 utime, stime;
+	unsigned long utime_jiffies, stime_jiffies;
+
+	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
+		return -EINVAL;
+
+	memset(&r, 0, sizeof(r));
+	switch (who) {
+	case RUSAGE_SELF:
+		task_cputime(current, &utime, &stime);
+		utime_jiffies = nsecs_to_jiffies(utime);
+		stime_jiffies = nsecs_to_jiffies(stime);
+		jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
+		jiffies_to_timeval32(stime_jiffies, &r.ru_stime);
+		r.ru_minflt = current->min_flt;
+		r.ru_majflt = current->maj_flt;
+		break;
+	case RUSAGE_CHILDREN:
+		utime_jiffies = nsecs_to_jiffies(current->signal->cutime);
+		stime_jiffies = nsecs_to_jiffies(current->signal->cstime);
+		jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
+		jiffies_to_timeval32(stime_jiffies, &r.ru_stime);
+		r.ru_minflt = current->signal->cmin_flt;
+		r.ru_majflt = current->signal->cmaj_flt;
+		break;
+	}
+
+	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
+}
+
+SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
+		struct rusage32 __user *, ur)
+{
+	struct rusage r;
+	long err = kernel_wait4(pid, ustatus, options, &r);
+	if (err <= 0)
+		return err;
+	if (!ur)
+		return err;
+	if (put_tv_to_tv32(&ur->ru_utime, &r.ru_utime))
+		return -EFAULT;
+	if (put_tv_to_tv32(&ur->ru_stime, &r.ru_stime))
+		return -EFAULT;
+	if (copy_to_user(&ur->ru_maxrss, &r.ru_maxrss,
+	      sizeof(struct rusage32) - offsetof(struct rusage32, ru_maxrss)))
+		return -EFAULT;
+	return err;
+}
+
+/*
+ * I don't know what the parameters are: the first one
+ * seems to be a timeval pointer, and I suspect the second
+ * one is the time remaining.. Ho humm.. No documentation.
+ */
+SYSCALL_DEFINE2(osf_usleep_thread, struct timeval32 __user *, sleep,
+		struct timeval32 __user *, remain)
+{
+	struct timespec64 tmp;
+	unsigned long ticks;
+
+	if (get_tv32(&tmp, sleep))
+		goto fault;
+
+	ticks = timespec64_to_jiffies(&tmp);
+
+	ticks = schedule_timeout_interruptible(ticks);
+
+	if (remain) {
+		jiffies_to_timespec64(ticks, &tmp);
+		if (put_tv32(remain, &tmp))
+			goto fault;
+	}
+	
+	return 0;
+ fault:
+	return -EFAULT;
+}
+
+
+struct timex32 {
+	unsigned int modes;	/* mode selector */
+	long offset;		/* time offset (usec) */
+	long freq;		/* frequency offset (scaled ppm) */
+	long maxerror;		/* maximum error (usec) */
+	long esterror;		/* estimated error (usec) */
+	int status;		/* clock command/status */
+	long constant;		/* pll time constant */
+	long precision;		/* clock precision (usec) (read only) */
+	long tolerance;		/* clock frequency tolerance (ppm)
+				 * (read only)
+				 */
+	struct timeval32 time;	/* (read only) */
+	long tick;		/* (modified) usecs between clock ticks */
+
+	long ppsfreq;           /* pps frequency (scaled ppm) (ro) */
+	long jitter;            /* pps jitter (us) (ro) */
+	int shift;              /* interval duration (s) (shift) (ro) */
+	long stabil;            /* pps stability (scaled ppm) (ro) */
+	long jitcnt;            /* jitter limit exceeded (ro) */
+	long calcnt;            /* calibration intervals (ro) */
+	long errcnt;            /* calibration errors (ro) */
+	long stbcnt;            /* stability limit exceeded (ro) */
+
+	int  :32; int  :32; int  :32; int  :32;
+	int  :32; int  :32; int  :32; int  :32;
+	int  :32; int  :32; int  :32; int  :32;
+};
+
+SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
+{
+	struct __kernel_timex txc;
+	int ret;
+
+	/* copy relevant bits of struct timex. */
+	if (copy_from_user(&txc, txc_p, offsetof(struct timex32, time)) ||
+	    copy_from_user(&txc.tick, &txc_p->tick, sizeof(struct timex32) - 
+			   offsetof(struct timex32, tick)))
+	  return -EFAULT;
+
+	ret = do_adjtimex(&txc);	
+	if (ret < 0)
+	  return ret;
+	
+	/* copy back to timex32 */
+	if (copy_to_user(txc_p, &txc, offsetof(struct timex32, time)) ||
+	    (copy_to_user(&txc_p->tick, &txc.tick, sizeof(struct timex32) - 
+			  offsetof(struct timex32, tick))) ||
+	    (put_user(txc.time.tv_sec, &txc_p->time.tv_sec)) ||
+	    (put_user(txc.time.tv_usec, &txc_p->time.tv_usec)))
+	  return -EFAULT;
+
+	return ret;
+}
+
+/* Get an address range which is currently unmapped.  Similar to the
+   generic version except that we know how to honor ADDR_LIMIT_32BIT.  */
+
+static unsigned long
+arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
+		         unsigned long limit)
+{
+	struct vm_unmapped_area_info info;
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = addr;
+	info.high_limit = limit;
+	info.align_mask = 0;
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
+}
+
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		       unsigned long len, unsigned long pgoff,
+		       unsigned long flags)
+{
+	unsigned long limit;
+
+	/* "32 bit" actually means 31 bit, since pointers sign extend.  */
+	if (current->personality & ADDR_LIMIT_32BIT)
+		limit = 0x80000000;
+	else
+		limit = TASK_SIZE;
+
+	if (len > limit)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	/* First, see if the given suggestion fits.
+
+	   The OSF/1 loader (/sbin/loader) relies on us returning an
+	   address larger than the requested if one exists, which is
+	   a terribly broken way to program.
+
+	   That said, I can see the use in being able to suggest not
+	   merely specific addresses, but regions of memory -- perhaps
+	   this feature should be incorporated into all ports?  */
+
+	if (addr) {
+		addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit);
+		if (addr != (unsigned long) -ENOMEM)
+			return addr;
+	}
+
+	/* Next, try allocating at TASK_UNMAPPED_BASE.  */
+	addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE),
+					 len, limit);
+	if (addr != (unsigned long) -ENOMEM)
+		return addr;
+
+	/* Finally, try allocating in low memory.  */
+	addr = arch_get_unmapped_area_1 (PAGE_SIZE, len, limit);
+
+	return addr;
+}
+
+#ifdef CONFIG_OSF4_COMPAT
+/* Clear top 32 bits of iov_len in the user's buffer for
+   compatibility with old versions of OSF/1 where iov_len
+   was defined as int. */
+static int
+osf_fix_iov_len(const struct iovec __user *iov, unsigned long count)
+{
+	unsigned long i;
+
+	for (i = 0 ; i < count ; i++) {
+		int __user *iov_len_high = (int __user *)&iov[i].iov_len + 1;
+
+		if (put_user(0, iov_len_high))
+			return -EFAULT;
+	}
+	return 0;
+}
+#endif
+
+SYSCALL_DEFINE3(osf_readv, unsigned long, fd,
+		const struct iovec __user *, vector, unsigned long, count)
+{
+#ifdef CONFIG_OSF4_COMPAT
+	if (unlikely(personality(current->personality) == PER_OSF4))
+		if (osf_fix_iov_len(vector, count))
+			return -EFAULT;
+#endif
+
+	return sys_readv(fd, vector, count);
+}
+
+SYSCALL_DEFINE3(osf_writev, unsigned long, fd,
+		const struct iovec __user *, vector, unsigned long, count)
+{
+#ifdef CONFIG_OSF4_COMPAT
+	if (unlikely(personality(current->personality) == PER_OSF4))
+		if (osf_fix_iov_len(vector, count))
+			return -EFAULT;
+#endif
+	return sys_writev(fd, vector, count);
+}
+
+SYSCALL_DEFINE2(osf_getpriority, int, which, int, who)
+{
+	int prio = sys_getpriority(which, who);
+	if (prio >= 0) {
+		/* Return value is the unbiased priority, i.e. 20 - prio.
+		   This does result in negative return values, so signal
+		   no error */
+		force_successful_syscall_return();
+		prio = 20 - prio;
+	}
+	return prio;
+}
+
+SYSCALL_DEFINE0(getxuid)
+{
+	current_pt_regs()->r20 = sys_geteuid();
+	return sys_getuid();
+}
+
+SYSCALL_DEFINE0(getxgid)
+{
+	current_pt_regs()->r20 = sys_getegid();
+	return sys_getgid();
+}
+
+SYSCALL_DEFINE0(getxpid)
+{
+	current_pt_regs()->r20 = sys_getppid();
+	return sys_getpid();
+}
+
+SYSCALL_DEFINE0(alpha_pipe)
+{
+	int fd[2];
+	int res = do_pipe_flags(fd, 0);
+	if (!res) {
+		/* The return values are in $0 and $20.  */
+		current_pt_regs()->r20 = fd[1];
+		res = fd[0];
+	}
+	return res;
+}
+
+SYSCALL_DEFINE1(sethae, unsigned long, val)
+{
+	current_pt_regs()->hae = val;
+	return 0;
+}
diff --git a/arch/alpha/kernel/pc873xx.c b/arch/alpha/kernel/pc873xx.c
new file mode 100644
index 0000000000..82b19c9e59
--- /dev/null
+++ b/arch/alpha/kernel/pc873xx.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ioport.h>
+#include <asm/io.h>
+
+#include "pc873xx.h"
+
+static unsigned pc873xx_probelist[] = {0x398, 0x26e, 0};
+
+static char *pc873xx_names[] = {
+	"PC87303", "PC87306", "PC87312", "PC87332", "PC87334"
+};
+
+static unsigned int base, model;
+
+
+unsigned int __init pc873xx_get_base(void)
+{
+	return base;
+}
+
+char *__init pc873xx_get_model(void)
+{
+	return pc873xx_names[model];
+}
+
+static unsigned char __init pc873xx_read(unsigned int base, int reg)
+{
+	outb(reg, base);
+	return inb(base + 1);
+}
+
+static void __init pc873xx_write(unsigned int base, int reg, unsigned char data)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	outb(reg, base);
+	outb(data, base + 1);
+	outb(data, base + 1);		/* Must be written twice */
+	local_irq_restore(flags);
+}
+
+int __init pc873xx_probe(void)
+{
+	int val, index = 0;
+
+	while ((base = pc873xx_probelist[index++])) {
+
+		if (request_region(base, 2, "Super IO PC873xx") == NULL)
+			continue;
+
+		val = pc873xx_read(base, REG_SID);
+		if ((val & 0xf0) == 0x10) {
+			model = PC87332;
+			break;
+		} else if ((val & 0xf8) == 0x70) {
+			model = PC87306;
+			break;
+		} else if ((val & 0xf8) == 0x50) {
+			model = PC87334;
+			break;
+		} else if ((val & 0xf8) == 0x40) {
+			model = PC87303;
+			break;
+		}
+
+		release_region(base, 2);
+	}
+
+	return (base == 0) ? -1 : 1;
+}
+
+void __init pc873xx_enable_epp19(void)
+{
+	unsigned char data;
+
+	printk(KERN_INFO "PC873xx enabling EPP v1.9\n");
+	data = pc873xx_read(base, REG_PCR);
+	pc873xx_write(base, REG_PCR, (data & 0xFC) | 0x02);
+}
+
+void __init pc873xx_enable_ide(void)
+{
+	unsigned char data;
+
+	printk(KERN_INFO "PC873xx enabling IDE interrupt\n");
+	data = pc873xx_read(base, REG_FER);
+	pc873xx_write(base, REG_FER, data | 0x40);
+}
diff --git a/arch/alpha/kernel/pc873xx.h b/arch/alpha/kernel/pc873xx.h
new file mode 100644
index 0000000000..0435330275
--- /dev/null
+++ b/arch/alpha/kernel/pc873xx.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PC873xx_H_
+#define _PC873xx_H_
+
+/*
+ * Control Register Values
+ */
+#define REG_FER	0x00
+#define REG_FAR	0x01
+#define REG_PTR	0x02
+#define REG_FCR	0x03
+#define REG_PCR	0x04
+#define REG_KRR	0x05
+#define REG_PMC	0x06
+#define REG_TUP	0x07
+#define REG_SID	0x08
+#define REG_ASC	0x09
+#define REG_IRC	0x0e
+
+/*
+ * Model numbers
+ */
+#define PC87303	0
+#define PC87306	1
+#define PC87312	2
+#define PC87332	3
+#define PC87334	4
+
+int pc873xx_probe(void);
+unsigned int pc873xx_get_base(void);
+char *pc873xx_get_model(void);
+void pc873xx_enable_epp19(void);
+void pc873xx_enable_ide(void);
+
+#endif
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
new file mode 100644
index 0000000000..ae82061eda
--- /dev/null
+++ b/arch/alpha/kernel/pci-noop.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/pci-noop.c
+ *
+ * Stub PCI interfaces for Jensen-specific kernels.
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/gfp.h>
+#include <linux/capability.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/syscalls.h>
+
+#include "proto.h"
+
+
+/*
+ * The PCI controller list.
+ */
+
+struct pci_controller *hose_head, **hose_tail = &hose_head;
+struct pci_controller *pci_isa_hose;
+
+
+struct pci_controller * __init
+alloc_pci_controller(void)
+{
+	struct pci_controller *hose;
+
+	hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES);
+	if (!hose)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(*hose));
+
+	*hose_tail = hose;
+	hose_tail = &hose->next;
+
+	return hose;
+}
+
+struct resource * __init
+alloc_resource(void)
+{
+	void *ptr = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
+
+	if (!ptr)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(struct resource));
+
+	return ptr;
+}
+
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
+		unsigned long, dfn)
+{
+	struct pci_controller *hose;
+
+	/* from hose or from bus.devfn */
+	if (which & IOBASE_FROM_HOSE) {
+		for (hose = hose_head; hose; hose = hose->next)
+			if (hose->index == bus)
+				break;
+		if (!hose)
+			return -ENODEV;
+	} else {
+		/* Special hook for ISA access.  */
+		if (bus == 0 && dfn == 0)
+			hose = pci_isa_hose;
+		else
+			return -ENODEV;
+	}
+
+	switch (which & ~IOBASE_FROM_HOSE) {
+	case IOBASE_HOSE:
+		return hose->index;
+	case IOBASE_SPARSE_MEM:
+		return hose->sparse_mem_base;
+	case IOBASE_DENSE_MEM:
+		return hose->dense_mem_base;
+	case IOBASE_SPARSE_IO:
+		return hose->sparse_io_base;
+	case IOBASE_DENSE_IO:
+		return hose->dense_io_base;
+	case IOBASE_ROOT_BUS:
+		return hose->bus->number;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
+		unsigned long, off, unsigned long, len, void __user *, buf)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	else
+		return -ENODEV;
+}
+
+SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
+		unsigned long, off, unsigned long, len, void __user *, buf)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	else
+		return -ENODEV;
+}
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
new file mode 100644
index 0000000000..5808a66e2a
--- /dev/null
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/alpha/kernel/pci-sysfs.c
+ *
+ * Copyright (C) 2009 Ivan Kokshaysky
+ *
+ * Alpha PCI resource files.
+ *
+ * Loosely based on generic HAVE_PCI_MMAP implementation in
+ * drivers/pci/pci-sysfs.c
+ */
+
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+
+static int hose_mmap_page_range(struct pci_controller *hose,
+				struct vm_area_struct *vma,
+				enum pci_mmap_state mmap_type, int sparse)
+{
+	unsigned long base;
+
+	if (mmap_type == pci_mmap_mem)
+		base = sparse ? hose->sparse_mem_base : hose->dense_mem_base;
+	else
+		base = sparse ? hose->sparse_io_base : hose->dense_io_base;
+
+	vma->vm_pgoff += base >> PAGE_SHIFT;
+
+	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+static int __pci_mmap_fits(struct pci_dev *pdev, int num,
+			   struct vm_area_struct *vma, int sparse)
+{
+	unsigned long nr, start, size;
+	int shift = sparse ? 5 : 0;
+
+	nr = vma_pages(vma);
+	start = vma->vm_pgoff;
+	size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1;
+
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on %s BAR %d "
+		"(size 0x%08lx)\n",
+		current->comm, sparse ? " sparse" : "", start, start + nr,
+		pci_name(pdev), num, size);
+	return 0;
+}
+
+/**
+ * pci_mmap_resource - map a PCI resource into user memory space
+ * @kobj: kobject for mapping
+ * @attr: struct bin_attribute for the file being mapped
+ * @vma: struct vm_area_struct passed into the mmap
+ * @sparse: address space type
+ *
+ * Use the bus mapping routines to map a PCI resource into userspace.
+ *
+ * Return: %0 on success, negative error code otherwise
+ */
+static int pci_mmap_resource(struct kobject *kobj,
+			     struct bin_attribute *attr,
+			     struct vm_area_struct *vma, int sparse)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+	struct resource *res = attr->private;
+	enum pci_mmap_state mmap_type;
+	struct pci_bus_region bar;
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++)
+		if (res == &pdev->resource[i])
+			break;
+	if (i >= PCI_STD_NUM_BARS)
+		return -ENODEV;
+
+	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
+		return -EINVAL;
+
+	if (!__pci_mmap_fits(pdev, i, vma, sparse))
+		return -EINVAL;
+
+	pcibios_resource_to_bus(pdev->bus, &bar, res);
+	vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0));
+	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+
+	return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse);
+}
+
+static int pci_mmap_resource_sparse(struct file *filp, struct kobject *kobj,
+				    struct bin_attribute *attr,
+				    struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 1);
+}
+
+static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *attr,
+				   struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 0);
+}
+
+/**
+ * pci_remove_resource_files - cleanup resource files
+ * @pdev: pci_dev to cleanup
+ *
+ * If we created resource files for @dev, remove them from sysfs and
+ * free their resources.
+ */
+void pci_remove_resource_files(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		struct bin_attribute *res_attr;
+
+		res_attr = pdev->res_attr[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+
+		res_attr = pdev->res_attr_wc[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+	}
+}
+
+static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num)
+{
+	struct pci_bus_region bar;
+	struct pci_controller *hose = pdev->sysdata;
+	long dense_offset;
+	unsigned long sparse_size;
+
+	pcibios_resource_to_bus(pdev->bus, &bar, &pdev->resource[num]);
+
+	/* All core logic chips have 4G sparse address space, except
+	   CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM
+	   definitions in asm/core_xxx.h files). This corresponds
+	   to 128M or 512M of the bus space. */
+	dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base);
+	sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000;
+
+	return bar.end < sparse_size;
+}
+
+static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name,
+			       char *suffix, struct bin_attribute *res_attr,
+			       unsigned long sparse)
+{
+	size_t size = pci_resource_len(pdev, num);
+
+	sprintf(name, "resource%d%s", num, suffix);
+	res_attr->mmap = sparse ? pci_mmap_resource_sparse :
+				  pci_mmap_resource_dense;
+	res_attr->attr.name = name;
+	res_attr->attr.mode = S_IRUSR | S_IWUSR;
+	res_attr->size = sparse ? size << 5 : size;
+	res_attr->private = &pdev->resource[num];
+	return sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+}
+
+static int pci_create_attr(struct pci_dev *pdev, int num)
+{
+	/* allocate attribute structure, piggyback attribute name */
+	int retval, nlen1, nlen2 = 0, res_count = 1;
+	unsigned long sparse_base, dense_base;
+	struct bin_attribute *attr;
+	struct pci_controller *hose = pdev->sysdata;
+	char *suffix, *attr_name;
+
+	suffix = "";	/* Assume bwx machine, normal resourceN files. */
+	nlen1 = 10;
+
+	if (pdev->resource[num].flags & IORESOURCE_MEM) {
+		sparse_base = hose->sparse_mem_base;
+		dense_base = hose->dense_mem_base;
+		if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) {
+			sparse_base = 0;
+			suffix = "_dense";
+			nlen1 = 16;	/* resourceN_dense */
+		}
+	} else {
+		sparse_base = hose->sparse_io_base;
+		dense_base = hose->dense_io_base;
+	}
+
+	if (sparse_base) {
+		suffix = "_sparse";
+		nlen1 = 17;
+		if (dense_base) {
+			nlen2 = 16;	/* resourceN_dense */
+			res_count = 2;
+		}
+	}
+
+	attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC);
+	if (!attr)
+		return -ENOMEM;
+
+	/* Create bwx, sparse or single dense file */
+	attr_name = (char *)(attr + res_count);
+	pdev->res_attr[num] = attr;
+	retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr,
+				     sparse_base);
+	if (retval || res_count == 1)
+		return retval;
+
+	/* Create dense file */
+	attr_name += nlen1;
+	attr++;
+	pdev->res_attr_wc[num] = attr;
+	return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0);
+}
+
+/**
+ * pci_create_resource_files - create resource files in sysfs for @pdev
+ * @pdev: pci_dev in question
+ *
+ * Walk the resources in @dev creating files for each resource available.
+ *
+ * Return: %0 on success, or negative error code
+ */
+int pci_create_resource_files(struct pci_dev *pdev)
+{
+	int i;
+	int retval;
+
+	/* Expose the PCI resources from this device as files */
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+
+		/* skip empty resources */
+		if (!pci_resource_len(pdev, i))
+			continue;
+
+		retval = pci_create_attr(pdev, i);
+		if (retval) {
+			pci_remove_resource_files(pdev);
+			return retval;
+		}
+	}
+	return 0;
+}
+
+/* Legacy I/O bus mapping stuff. */
+
+static int __legacy_mmap_fits(struct pci_controller *hose,
+			      struct vm_area_struct *vma,
+			      unsigned long res_size, int sparse)
+{
+	unsigned long nr, start, size;
+
+	nr = vma_pages(vma);
+	start = vma->vm_pgoff;
+	size = ((res_size - 1) >> PAGE_SHIFT) + 1;
+
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on hose %d "
+		"(size 0x%08lx)\n",
+		current->comm, sparse ? " sparse" : "", start, start + nr,
+		hose->index, size);
+	return 0;
+}
+
+static inline int has_sparse(struct pci_controller *hose,
+			     enum pci_mmap_state mmap_type)
+{
+	unsigned long base;
+
+	base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base :
+					     hose->sparse_io_base;
+
+	return base != 0;
+}
+
+int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_type)
+{
+	struct pci_controller *hose = bus->sysdata;
+	int sparse = has_sparse(hose, mmap_type);
+	unsigned long res_size;
+
+	res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size :
+						 bus->legacy_io->size;
+	if (!__legacy_mmap_fits(hose, vma, res_size, sparse))
+		return -EINVAL;
+
+	return hose_mmap_page_range(hose, vma, mmap_type, sparse);
+}
+
+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @bus: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Adjust file name and size for sparse mappings.
+ */
+void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (!has_sparse(hose, mmap_type))
+		return;
+
+	if (mmap_type == pci_mmap_mem) {
+		bus->legacy_mem->attr.name = "legacy_mem_sparse";
+		bus->legacy_mem->size <<= 5;
+	} else {
+		bus->legacy_io->attr.name = "legacy_io_sparse";
+		bus->legacy_io->size <<= 5;
+	}
+	return;
+}
+
+/* Legacy I/O bus read/write functions */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	port += hose->io_space->start;
+
+	switch(size) {
+	case 1:
+		*((u8 *)val) = inb(port);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		*((u16 *)val) = inw(port);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		*((u32 *)val) = inl(port);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	port += hose->io_space->start;
+
+	switch(size) {
+	case 1:
+		outb(port, val);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		outw(port, val);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		outl(port, val);
+		return 4;
+	}
+	return -EINVAL;
+}
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
new file mode 100644
index 0000000000..64fbfb0763
--- /dev/null
+++ b/arch/alpha/kernel/pci.c
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/pci.c
+ *
+ * Extruded from code written by
+ *	Dave Rusling (david.rusling@reo.mts.dec.com)
+ *	David Mosberger (davidm@cs.arizona.edu)
+ */
+
+/* 2.3.x PCI/resources, 1999 Andrea Arcangeli <andrea@suse.de> */
+
+/*
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *	     PCI-PCI bridges cleanup
+ */
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <asm/machvec.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+
+/*
+ * Some string constants used by the various core logics. 
+ */
+
+const char *const pci_io_names[] = {
+  "PCI IO bus 0", "PCI IO bus 1", "PCI IO bus 2", "PCI IO bus 3",
+  "PCI IO bus 4", "PCI IO bus 5", "PCI IO bus 6", "PCI IO bus 7"
+};
+
+const char *const pci_mem_names[] = {
+  "PCI mem bus 0", "PCI mem bus 1", "PCI mem bus 2", "PCI mem bus 3",
+  "PCI mem bus 4", "PCI mem bus 5", "PCI mem bus 6", "PCI mem bus 7"
+};
+
+const char pci_hae0_name[] = "HAE0";
+
+/*
+ * If PCI_PROBE_ONLY in pci_flags is set, we don't change any PCI resource
+ * assignments.
+ */
+
+/*
+ * The PCI controller list.
+ */
+
+struct pci_controller *hose_head, **hose_tail = &hose_head;
+struct pci_controller *pci_isa_hose;
+
+/*
+ * Quirks.
+ */
+
+static void quirk_isa_bridge(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_ISA << 8;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82378, quirk_isa_bridge);
+
+static void quirk_cypress(struct pci_dev *dev)
+{
+	/* The Notorious Cy82C693 chip.  */
+
+	/* The generic legacy mode IDE fixup in drivers/pci/probe.c
+	   doesn't work correctly with the Cypress IDE controller as
+	   it has non-standard register layout.  Fix that.  */
+	if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) {
+		dev->resource[2].start = dev->resource[3].start = 0;
+		dev->resource[2].end = dev->resource[3].end = 0;
+		dev->resource[2].flags = dev->resource[3].flags = 0;
+		if (PCI_FUNC(dev->devfn) == 2) {
+			dev->resource[0].start = 0x170;
+			dev->resource[0].end = 0x177;
+			dev->resource[1].start = 0x376;
+			dev->resource[1].end = 0x376;
+		}
+	}
+
+	/* The Cypress bridge responds on the PCI bus in the address range
+	   0xffff0000-0xffffffff (conventional x86 BIOS ROM).  There is no
+	   way to turn this off.  The bridge also supports several extended
+	   BIOS ranges (disabled after power-up), and some consoles do turn
+	   them on.  So if we use a large direct-map window, or a large SG
+	   window, we must avoid the entire 0xfff00000-0xffffffff region.  */
+	if (dev->class >> 8 == PCI_CLASS_BRIDGE_ISA) {
+		if (__direct_map_base + __direct_map_size >= 0xfff00000UL)
+			__direct_map_size = 0xfff00000UL - __direct_map_base;
+		else {
+			struct pci_controller *hose = dev->sysdata;
+			struct pci_iommu_arena *pci = hose->sg_pci;
+			if (pci && pci->dma_base + pci->size >= 0xfff00000UL)
+				pci->size = 0xfff00000UL - pci->dma_base;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693, quirk_cypress);
+
+/* Called for each device after PCI setup is done. */
+static void pcibios_fixup_final(struct pci_dev *dev)
+{
+	unsigned int class = dev->class >> 8;
+
+	if (class == PCI_CLASS_BRIDGE_ISA || class == PCI_CLASS_BRIDGE_EISA) {
+		dev->dma_mask = MAX_ISA_DMA_ADDRESS - 1;
+		isa_bridge = dev;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
+
+/* Just declaring that the power-of-ten prefixes are actually the
+   power-of-two ones doesn't make it true :) */
+#define KB			1024
+#define MB			(1024*KB)
+#define GB			(1024*MB)
+
+resource_size_t
+pcibios_align_resource(void *data, const struct resource *res,
+		       resource_size_t size, resource_size_t align)
+{
+	struct pci_dev *dev = data;
+	struct pci_controller *hose = dev->sysdata;
+	unsigned long alignto;
+	resource_size_t start = res->start;
+
+	if (res->flags & IORESOURCE_IO) {
+		/* Make sure we start at our min on all hoses */
+		if (start - hose->io_space->start < PCIBIOS_MIN_IO)
+			start = PCIBIOS_MIN_IO + hose->io_space->start;
+
+		/*
+		 * Put everything into 0x00-0xff region modulo 0x400
+		 */
+		if (start & 0x300)
+			start = (start + 0x3ff) & ~0x3ff;
+	}
+	else if	(res->flags & IORESOURCE_MEM) {
+		/* Make sure we start at our min on all hoses */
+		if (start - hose->mem_space->start < PCIBIOS_MIN_MEM)
+			start = PCIBIOS_MIN_MEM + hose->mem_space->start;
+
+		/*
+		 * The following holds at least for the Low Cost
+		 * Alpha implementation of the PCI interface:
+		 *
+		 * In sparse memory address space, the first
+		 * octant (16MB) of every 128MB segment is
+		 * aliased to the very first 16 MB of the
+		 * address space (i.e., it aliases the ISA
+		 * memory address space).  Thus, we try to
+		 * avoid allocating PCI devices in that range.
+		 * Can be allocated in 2nd-7th octant only.
+		 * Devices that need more than 112MB of
+		 * address space must be accessed through
+		 * dense memory space only!
+		 */
+
+		/* Align to multiple of size of minimum base.  */
+		alignto = max_t(resource_size_t, 0x1000, align);
+		start = ALIGN(start, alignto);
+		if (hose->sparse_mem_base && size <= 7 * 16*MB) {
+			if (((start / (16*MB)) & 0x7) == 0) {
+				start &= ~(128*MB - 1);
+				start += 16*MB;
+				start  = ALIGN(start, alignto);
+			}
+			if (start/(128*MB) != (start + size - 1)/(128*MB)) {
+				start &= ~(128*MB - 1);
+				start += (128 + 16)*MB;
+				start  = ALIGN(start, alignto);
+			}
+		}
+	}
+
+	return start;
+}
+#undef KB
+#undef MB
+#undef GB
+
+static int __init
+pcibios_init(void)
+{
+	if (alpha_mv.init_pci)
+		alpha_mv.init_pci();
+	return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+#ifdef ALPHA_RESTORE_SRM_SETUP
+/* Store PCI device configuration left by SRM here. */
+struct pdev_srm_saved_conf
+{
+	struct pdev_srm_saved_conf *next;
+	struct pci_dev *dev;
+};
+
+static struct pdev_srm_saved_conf *srm_saved_configs;
+
+static void pdev_save_srm_config(struct pci_dev *dev)
+{
+	struct pdev_srm_saved_conf *tmp;
+	static int printed = 0;
+
+	if (!alpha_using_srm || pci_has_flag(PCI_PROBE_ONLY))
+		return;
+
+	if (!printed) {
+		printk(KERN_INFO "pci: enabling save/restore of SRM state\n");
+		printed = 1;
+	}
+
+	tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp) {
+		printk(KERN_ERR "%s: kmalloc() failed!\n", __func__);
+		return;
+	}
+	tmp->next = srm_saved_configs;
+	tmp->dev = dev;
+
+	pci_save_state(dev);
+
+	srm_saved_configs = tmp;
+}
+
+void
+pci_restore_srm_config(void)
+{
+	struct pdev_srm_saved_conf *tmp;
+
+	/* No need to restore if probed only. */
+	if (pci_has_flag(PCI_PROBE_ONLY))
+		return;
+
+	/* Restore SRM config. */
+	for (tmp = srm_saved_configs; tmp; tmp = tmp->next) {
+		pci_restore_state(tmp->dev);
+	}
+}
+#else
+#define pdev_save_srm_config(dev)	do {} while (0)
+#endif
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev = bus->self;
+
+	if (pci_has_flag(PCI_PROBE_ONLY) && dev &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		pci_read_bridge_bases(bus);
+	}
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		pdev_save_srm_config(dev);
+	}
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check the latency
+ *  timer as certain firmware forgets to set it properly, as seen
+ *  on SX164 and LX164 with SRM.
+ */
+void
+pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat >= 16) return;
+	printk("PCI: Setting latency timer of device %s to 64\n",
+							pci_name(dev));
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64);
+}
+
+void __init
+pcibios_claim_one_bus(struct pci_bus *b)
+{
+	struct pci_dev *dev;
+	struct pci_bus *child_bus;
+
+	list_for_each_entry(dev, &b->devices, bus_list) {
+		int i;
+
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource *r = &dev->resource[i];
+
+			if (r->parent || !r->start || !r->flags)
+				continue;
+			if (pci_has_flag(PCI_PROBE_ONLY) ||
+			    (r->flags & IORESOURCE_PCI_FIXED)) {
+				if (pci_claim_resource(dev, i) == 0)
+					continue;
+
+				pci_claim_bridge_resource(dev, i);
+			}
+		}
+	}
+
+	list_for_each_entry(child_bus, &b->children, node)
+		pcibios_claim_one_bus(child_bus);
+}
+
+static void __init
+pcibios_claim_console_setup(void)
+{
+	struct pci_bus *b;
+
+	list_for_each_entry(b, &pci_root_buses, node)
+		pcibios_claim_one_bus(b);
+}
+
+void __init
+common_init_pci(void)
+{
+	struct pci_controller *hose;
+	struct list_head resources;
+	struct pci_host_bridge *bridge;
+	struct pci_bus *bus;
+	int ret, next_busno;
+	int need_domain_info = 0;
+	u32 pci_mem_end;
+	u32 sg_base;
+	unsigned long end;
+
+	/* Scan all of the recorded PCI controllers.  */
+	for (next_busno = 0, hose = hose_head; hose; hose = hose->next) {
+		sg_base = hose->sg_pci ? hose->sg_pci->dma_base : ~0;
+
+		/* Adjust hose mem_space limit to prevent PCI allocations
+		   in the iommu windows. */
+		pci_mem_end = min((u32)__direct_map_base, sg_base) - 1;
+		end = hose->mem_space->start + pci_mem_end;
+		if (hose->mem_space->end > end)
+			hose->mem_space->end = end;
+
+		INIT_LIST_HEAD(&resources);
+		pci_add_resource_offset(&resources, hose->io_space,
+					hose->io_space->start);
+		pci_add_resource_offset(&resources, hose->mem_space,
+					hose->mem_space->start);
+
+		bridge = pci_alloc_host_bridge(0);
+		if (!bridge)
+			continue;
+
+		list_splice_init(&resources, &bridge->windows);
+		bridge->dev.parent = NULL;
+		bridge->sysdata = hose;
+		bridge->busnr = next_busno;
+		bridge->ops = alpha_mv.pci_ops;
+		bridge->swizzle_irq = alpha_mv.pci_swizzle;
+		bridge->map_irq = alpha_mv.pci_map_irq;
+
+		ret = pci_scan_root_bus_bridge(bridge);
+		if (ret) {
+			pci_free_host_bridge(bridge);
+			continue;
+		}
+
+		bus = hose->bus = bridge->bus;
+		hose->need_domain_info = need_domain_info;
+		next_busno = bus->busn_res.end + 1;
+		/* Don't allow 8-bit bus number overflow inside the hose -
+		   reserve some space for bridges. */ 
+		if (next_busno > 224) {
+			next_busno = 0;
+			need_domain_info = 1;
+		}
+	}
+
+	pcibios_claim_console_setup();
+
+	pci_assign_unassigned_resources();
+	for (hose = hose_head; hose; hose = hose->next) {
+		bus = hose->bus;
+		if (bus)
+			pci_bus_add_devices(bus);
+	}
+}
+
+struct pci_controller * __init
+alloc_pci_controller(void)
+{
+	struct pci_controller *hose;
+
+	hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES);
+	if (!hose)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(*hose));
+
+	*hose_tail = hose;
+	hose_tail = &hose->next;
+
+	return hose;
+}
+
+struct resource * __init
+alloc_resource(void)
+{
+	void *ptr = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
+
+	if (!ptr)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(struct resource));
+
+	return ptr;
+}
+
+
+/* Provide information on locations of various I/O regions in physical
+   memory.  Do this on a per-card basis so that we choose the right hose.  */
+
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
+		unsigned long, dfn)
+{
+	struct pci_controller *hose;
+	struct pci_dev *dev;
+
+	/* from hose or from bus.devfn */
+	if (which & IOBASE_FROM_HOSE) {
+		for(hose = hose_head; hose; hose = hose->next) 
+			if (hose->index == bus) break;
+		if (!hose) return -ENODEV;
+	} else {
+		/* Special hook for ISA access.  */
+		if (bus == 0 && dfn == 0) {
+			hose = pci_isa_hose;
+		} else {
+			dev = pci_get_domain_bus_and_slot(0, bus, dfn);
+			if (!dev)
+				return -ENODEV;
+			hose = dev->sysdata;
+			pci_dev_put(dev);
+		}
+	}
+
+	switch (which & ~IOBASE_FROM_HOSE) {
+	case IOBASE_HOSE:
+		return hose->index;
+	case IOBASE_SPARSE_MEM:
+		return hose->sparse_mem_base;
+	case IOBASE_DENSE_MEM:
+		return hose->dense_mem_base;
+	case IOBASE_SPARSE_IO:
+		return hose->sparse_io_base;
+	case IOBASE_DENSE_IO:
+		return hose->dense_io_base;
+	case IOBASE_ROOT_BUS:
+		return hose->bus->number;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+/* Destroy an __iomem token.  Not copied from lib/iomap.c.  */
+
+void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
+{
+	if (__is_mmio(addr))
+		iounmap(addr);
+}
+
+EXPORT_SYMBOL(pci_iounmap);
+
+/* FIXME: Some boxes have multiple ISA bridges! */
+struct pci_dev *isa_bridge;
+EXPORT_SYMBOL(isa_bridge);
diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h
new file mode 100644
index 0000000000..18043af45e
--- /dev/null
+++ b/arch/alpha/kernel/pci_impl.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	linux/arch/alpha/kernel/pci_impl.h
+ *
+ * This file contains declarations and inline functions for interfacing
+ * with the PCI initialization routines.
+ */
+
+struct pci_dev;
+struct pci_controller;
+struct pci_iommu_arena;
+
+/*
+ * We can't just blindly use 64K for machines with EISA busses; they
+ * may also have PCI-PCI bridges present, and then we'd configure the
+ * bridge incorrectly.
+ *
+ * Also, we start at 0x8000 or 0x9000, in hopes to get all devices'
+ * IO space areas allocated *before* 0xC000; this is because certain
+ * BIOSes (Millennium for one) use PCI Config space "mechanism #2"
+ * accesses to probe the bus. If a device's registers appear at 0xC000,
+ * it may see an INx/OUTx at that address during BIOS emulation of the
+ * VGA BIOS, and some cards, notably Adaptec 2940UW, take mortal offense.
+ */
+
+#define EISA_DEFAULT_IO_BASE	0x9000	/* start above 8th slot */
+#define DEFAULT_IO_BASE		0x8000	/* start at 8th slot */
+
+/*
+ * We try to make the DEFAULT_MEM_BASE addresses *always* have more than
+ * a single bit set. This is so that devices like the broken Myrinet card
+ * will always have a PCI memory address that will never match a IDSEL
+ * address in PCI Config space, which can cause problems with early rev cards.
+ */
+
+/*
+ * An XL is AVANTI (APECS) family, *but* it has only 27 bits of ISA address
+ * that get passed through the PCI<->ISA bridge chip. Although this causes
+ * us to set the PCI->Mem window bases lower than normal, we still allocate
+ * PCI bus devices' memory addresses *below* the low DMA mapping window,
+ * and hope they fit below 64Mb (to avoid conflicts), and so that they can
+ * be accessed via SPARSE space.
+ *
+ * We accept the risk that a broken Myrinet card will be put into a true XL
+ * and thus can more easily run into the problem described below.
+ */
+#define XL_DEFAULT_MEM_BASE ((16+2)*1024*1024) /* 16M to 64M-1 is avail */
+
+/*
+ * APECS and LCA have only 34 bits for physical addresses, thus limiting PCI
+ * bus memory addresses for SPARSE access to be less than 128Mb.
+ */
+#define APECS_AND_LCA_DEFAULT_MEM_BASE ((16+2)*1024*1024)
+
+/*
+ * Because MCPCIA and T2 core logic support more bits for
+ * physical addresses, they should allow an expanded range of SPARSE
+ * memory addresses.  However, we do not use them all, in order to
+ * avoid the HAE manipulation that would be needed.
+ */
+#define MCPCIA_DEFAULT_MEM_BASE ((32+2)*1024*1024)
+#define T2_DEFAULT_MEM_BASE ((16+1)*1024*1024)
+
+/*
+ * Because CIA and PYXIS have more bits for physical addresses,
+ * they support an expanded range of SPARSE memory addresses.
+ */
+#define DEFAULT_MEM_BASE ((128+16)*1024*1024)
+
+/* ??? Experimenting with no HAE for CIA.  */
+#define CIA_DEFAULT_MEM_BASE ((32+2)*1024*1024)
+
+#define IRONGATE_DEFAULT_MEM_BASE ((256*8-16)*1024*1024)
+
+#define DEFAULT_AGP_APER_SIZE	(64*1024*1024)
+
+/* 
+ * A small note about bridges and interrupts.  The DECchip 21050 (and
+ * later) adheres to the PCI-PCI bridge specification.  This says that
+ * the interrupts on the other side of a bridge are swizzled in the
+ * following manner:
+ *
+ * Dev    Interrupt   Interrupt 
+ *        Pin on      Pin on 
+ *        Device      Connector
+ *
+ *   4    A           A
+ *        B           B
+ *        C           C
+ *        D           D
+ * 
+ *   5    A           B
+ *        B           C
+ *        C           D
+ *        D           A
+ *
+ *   6    A           C
+ *        B           D
+ *        C           A
+ *        D           B
+ *
+ *   7    A           D
+ *        B           A
+ *        C           B
+ *        D           C
+ *
+ *   Where A = pin 1, B = pin 2 and so on and pin=0 = default = A.
+ *   Thus, each swizzle is ((pin-1) + (device#-4)) % 4
+ *
+ *   pci_swizzle_interrupt_pin() swizzles for exactly one bridge.  The routine
+ *   pci_common_swizzle() handles multiple bridges.  But there are a
+ *   couple boards that do strange things.
+ */
+
+
+/* The following macro is used to implement the table-based irq mapping
+   function for all single-bus Alphas.  */
+
+#define COMMON_TABLE_LOOKUP						\
+({ long _ctl_ = -1; 							\
+   if (slot >= min_idsel && slot <= max_idsel && pin < irqs_per_slot)	\
+     _ctl_ = irq_tab[slot - min_idsel][pin];				\
+   _ctl_; })
+
+
+/* A PCI IOMMU allocation arena.  There are typically two of these
+   regions per bus.  */
+/* ??? The 8400 has a 32-byte pte entry, and the entire table apparently
+   lives directly on the host bridge (no tlb?).  We don't support this
+   machine, but if we ever did, we'd need to parameterize all this quite
+   a bit further.  Probably with per-bus operation tables.  */
+
+struct pci_iommu_arena
+{
+	spinlock_t lock;
+	struct pci_controller *hose;
+#define IOMMU_INVALID_PTE 0x2 /* 32:63 bits MBZ */
+#define IOMMU_RESERVED_PTE 0xface
+	unsigned long *ptes;
+	dma_addr_t dma_base;
+	unsigned int size;
+	unsigned int next_entry;
+	unsigned int align_entry;
+};
+
+#if defined(CONFIG_ALPHA_SRM) && \
+    (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \
+     defined(CONFIG_ALPHA_AVANTI))
+# define NEED_SRM_SAVE_RESTORE
+#else
+# undef NEED_SRM_SAVE_RESTORE
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(NEED_SRM_SAVE_RESTORE)
+# define ALPHA_RESTORE_SRM_SETUP
+#else
+# undef ALPHA_RESTORE_SRM_SETUP
+#endif
+
+#ifdef ALPHA_RESTORE_SRM_SETUP
+extern void pci_restore_srm_config(void);
+#else
+#define pci_restore_srm_config()	do {} while (0)
+#endif
+
+/* The hose list.  */
+extern struct pci_controller *hose_head, **hose_tail;
+extern struct pci_controller *pci_isa_hose;
+
+extern unsigned long alpha_agpgart_size;
+
+extern void common_init_pci(void);
+#define common_swizzle pci_common_swizzle
+extern struct pci_controller *alloc_pci_controller(void);
+extern struct resource *alloc_resource(void);
+
+extern struct pci_iommu_arena *iommu_arena_new_node(int,
+						    struct pci_controller *,
+					            dma_addr_t, unsigned long,
+					            unsigned long);
+extern struct pci_iommu_arena *iommu_arena_new(struct pci_controller *,
+					       dma_addr_t, unsigned long,
+					       unsigned long);
+extern const char *const pci_io_names[];
+extern const char *const pci_mem_names[];
+extern const char pci_hae0_name[];
+
+extern unsigned long size_for_memory(unsigned long max);
+
+extern int iommu_reserve(struct pci_iommu_arena *, long, long);
+extern int iommu_release(struct pci_iommu_arena *, long, long);
+extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **);
+extern int iommu_unbind(struct pci_iommu_arena *, long, long);
+
+
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
new file mode 100644
index 0000000000..21f9ac1013
--- /dev/null
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -0,0 +1,933 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/pci_iommu.c
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/scatterlist.h>
+#include <linux/log2.h>
+#include <linux/dma-map-ops.h>
+#include <linux/iommu-helper.h>
+
+#include <asm/io.h>
+#include <asm/hwrpb.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+
+#define DEBUG_ALLOC 0
+#if DEBUG_ALLOC > 0
+# define DBGA(args...)		printk(KERN_DEBUG args)
+#else
+# define DBGA(args...)
+#endif
+#if DEBUG_ALLOC > 1
+# define DBGA2(args...)		printk(KERN_DEBUG args)
+#else
+# define DBGA2(args...)
+#endif
+
+#define DEBUG_NODIRECT 0
+
+#define ISA_DMA_MASK		0x00ffffff
+
+static inline unsigned long
+mk_iommu_pte(unsigned long paddr)
+{
+	return (paddr >> (PAGE_SHIFT-1)) | 1;
+}
+
+/* Return the minimum of MAX or the first power of two larger
+   than main memory.  */
+
+unsigned long
+size_for_memory(unsigned long max)
+{
+	unsigned long mem = max_low_pfn << PAGE_SHIFT;
+	if (mem < max)
+		max = roundup_pow_of_two(mem);
+	return max;
+}
+
+struct pci_iommu_arena * __init
+iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
+		     unsigned long window_size, unsigned long align)
+{
+	unsigned long mem_size;
+	struct pci_iommu_arena *arena;
+
+	mem_size = window_size / (PAGE_SIZE / sizeof(unsigned long));
+
+	/* Note that the TLB lookup logic uses bitwise concatenation,
+	   not addition, so the required arena alignment is based on
+	   the size of the window.  Retain the align parameter so that
+	   particular systems can over-align the arena.  */
+	if (align < mem_size)
+		align = mem_size;
+
+	arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
+	if (!arena)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(*arena));
+	arena->ptes = memblock_alloc(mem_size, align);
+	if (!arena->ptes)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, mem_size, align);
+
+	spin_lock_init(&arena->lock);
+	arena->hose = hose;
+	arena->dma_base = base;
+	arena->size = window_size;
+	arena->next_entry = 0;
+
+	/* Align allocations to a multiple of a page size.  Not needed
+	   unless there are chip bugs.  */
+	arena->align_entry = 1;
+
+	return arena;
+}
+
+struct pci_iommu_arena * __init
+iommu_arena_new(struct pci_controller *hose, dma_addr_t base,
+		unsigned long window_size, unsigned long align)
+{
+	return iommu_arena_new_node(0, hose, base, window_size, align);
+}
+
+/* Must be called with the arena lock held */
+static long
+iommu_arena_find_pages(struct device *dev, struct pci_iommu_arena *arena,
+		       long n, long mask)
+{
+	unsigned long *ptes;
+	long i, p, nent;
+	int pass = 0;
+	unsigned long base;
+	unsigned long boundary_size;
+
+	base = arena->dma_base >> PAGE_SHIFT;
+	boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT);
+
+	/* Search forward for the first mask-aligned sequence of N free ptes */
+	ptes = arena->ptes;
+	nent = arena->size >> PAGE_SHIFT;
+	p = ALIGN(arena->next_entry, mask + 1);
+	i = 0;
+
+again:
+	while (i < n && p+i < nent) {
+		if (!i && iommu_is_span_boundary(p, n, base, boundary_size)) {
+			p = ALIGN(p + 1, mask + 1);
+			goto again;
+		}
+
+		if (ptes[p+i])
+			p = ALIGN(p + i + 1, mask + 1), i = 0;
+		else
+			i = i + 1;
+	}
+
+	if (i < n) {
+		if (pass < 1) {
+			/*
+			 * Reached the end.  Flush the TLB and restart
+			 * the search from the beginning.
+			*/
+			alpha_mv.mv_pci_tbi(arena->hose, 0, -1);
+
+			pass++;
+			p = 0;
+			i = 0;
+			goto again;
+		} else
+			return -1;
+	}
+
+	/* Success. It's the responsibility of the caller to mark them
+	   in use before releasing the lock */
+	return p;
+}
+
+static long
+iommu_arena_alloc(struct device *dev, struct pci_iommu_arena *arena, long n,
+		  unsigned int align)
+{
+	unsigned long flags;
+	unsigned long *ptes;
+	long i, p, mask;
+
+	spin_lock_irqsave(&arena->lock, flags);
+
+	/* Search for N empty ptes */
+	ptes = arena->ptes;
+	mask = max(align, arena->align_entry) - 1;
+	p = iommu_arena_find_pages(dev, arena, n, mask);
+	if (p < 0) {
+		spin_unlock_irqrestore(&arena->lock, flags);
+		return -1;
+	}
+
+	/* Success.  Mark them all in use, ie not zero and invalid
+	   for the iommu tlb that could load them from under us.
+	   The chip specific bits will fill this in with something
+	   kosher when we return.  */
+	for (i = 0; i < n; ++i)
+		ptes[p+i] = IOMMU_INVALID_PTE;
+
+	arena->next_entry = p + n;
+	spin_unlock_irqrestore(&arena->lock, flags);
+
+	return p;
+}
+
+static void
+iommu_arena_free(struct pci_iommu_arena *arena, long ofs, long n)
+{
+	unsigned long *p;
+	long i;
+
+	p = arena->ptes + ofs;
+	for (i = 0; i < n; ++i)
+		p[i] = 0;
+}
+
+/*
+ * True if the machine supports DAC addressing, and DEV can
+ * make use of it given MASK.
+ */
+static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask)
+{
+	dma_addr_t dac_offset = alpha_mv.pci_dac_offset;
+	int ok = 1;
+
+	/* If this is not set, the machine doesn't support DAC at all.  */
+	if (dac_offset == 0)
+		ok = 0;
+
+	/* The device has to be able to address our DAC bit.  */
+	if ((dac_offset & dev->dma_mask) != dac_offset)
+		ok = 0;
+
+	/* If both conditions above are met, we are fine. */
+	DBGA("pci_dac_dma_supported %s from %ps\n",
+	     ok ? "yes" : "no", __builtin_return_address(0));
+
+	return ok;
+}
+
+/* Map a single buffer of the indicated size for PCI DMA in streaming
+   mode.  The 32-bit PCI bus mastering address to use is returned.
+   Once the device is given the dma address, the device owns this memory
+   until either pci_unmap_single or pci_dma_sync_single is performed.  */
+
+static dma_addr_t
+pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
+		 int dac_allowed)
+{
+	struct pci_controller *hose = pdev ? pdev->sysdata : pci_isa_hose;
+	dma_addr_t max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK;
+	struct pci_iommu_arena *arena;
+	long npages, dma_ofs, i;
+	unsigned long paddr;
+	dma_addr_t ret;
+	unsigned int align = 0;
+	struct device *dev = pdev ? &pdev->dev : NULL;
+
+	paddr = __pa(cpu_addr);
+
+#if !DEBUG_NODIRECT
+	/* First check to see if we can use the direct map window.  */
+	if (paddr + size + __direct_map_base - 1 <= max_dma
+	    && paddr + size <= __direct_map_size) {
+		ret = paddr + __direct_map_base;
+
+		DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %ps\n",
+		      cpu_addr, size, ret, __builtin_return_address(0));
+
+		return ret;
+	}
+#endif
+
+	/* Next, use DAC if selected earlier.  */
+	if (dac_allowed) {
+		ret = paddr + alpha_mv.pci_dac_offset;
+
+		DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %ps\n",
+		      cpu_addr, size, ret, __builtin_return_address(0));
+
+		return ret;
+	}
+
+	/* If the machine doesn't define a pci_tbi routine, we have to
+	   assume it doesn't support sg mapping, and, since we tried to
+	   use direct_map above, it now must be considered an error. */
+	if (! alpha_mv.mv_pci_tbi) {
+		printk_once(KERN_WARNING "pci_map_single: no HW sg\n");
+		return DMA_MAPPING_ERROR;
+	}
+
+	arena = hose->sg_pci;
+	if (!arena || arena->dma_base + arena->size - 1 > max_dma)
+		arena = hose->sg_isa;
+
+	npages = iommu_num_pages(paddr, size, PAGE_SIZE);
+
+	/* Force allocation to 64KB boundary for ISA bridges. */
+	if (pdev && pdev == isa_bridge)
+		align = 8;
+	dma_ofs = iommu_arena_alloc(dev, arena, npages, align);
+	if (dma_ofs < 0) {
+		printk(KERN_WARNING "pci_map_single failed: "
+		       "could not allocate dma page tables\n");
+		return DMA_MAPPING_ERROR;
+	}
+
+	paddr &= PAGE_MASK;
+	for (i = 0; i < npages; ++i, paddr += PAGE_SIZE)
+		arena->ptes[i + dma_ofs] = mk_iommu_pte(paddr);
+
+	ret = arena->dma_base + dma_ofs * PAGE_SIZE;
+	ret += (unsigned long)cpu_addr & ~PAGE_MASK;
+
+	DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %ps\n",
+	      cpu_addr, size, npages, ret, __builtin_return_address(0));
+
+	return ret;
+}
+
+/* Helper for generic DMA-mapping functions. */
+static struct pci_dev *alpha_gendev_to_pci(struct device *dev)
+{
+	if (dev && dev_is_pci(dev))
+		return to_pci_dev(dev);
+
+	/* Assume that non-PCI devices asking for DMA are either ISA or EISA,
+	   BUG() otherwise. */
+	BUG_ON(!isa_bridge);
+
+	/* Assume non-busmaster ISA DMA when dma_mask is not set (the ISA
+	   bridge is bus master then). */
+	if (!dev || !dev->dma_mask || !*dev->dma_mask)
+		return isa_bridge;
+
+	/* For EISA bus masters, return isa_bridge (it might have smaller
+	   dma_mask due to wiring limitations). */
+	if (*dev->dma_mask >= isa_bridge->dma_mask)
+		return isa_bridge;
+
+	/* This assumes ISA bus master with dma_mask 0xffffff. */
+	return NULL;
+}
+
+static dma_addr_t alpha_pci_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction dir,
+				     unsigned long attrs)
+{
+	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
+	int dac_allowed;
+
+	BUG_ON(dir == PCI_DMA_NONE);
+
+	dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0; 
+	return pci_map_single_1(pdev, (char *)page_address(page) + offset, 
+				size, dac_allowed);
+}
+
+/* Unmap a single streaming mode DMA translation.  The DMA_ADDR and
+   SIZE must match what was provided for in a previous pci_map_single
+   call.  All other usages are undefined.  After this call, reads by
+   the cpu to the buffer are guaranteed to see whatever the device
+   wrote there.  */
+
+static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				 size_t size, enum dma_data_direction dir,
+				 unsigned long attrs)
+{
+	unsigned long flags;
+	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
+	struct pci_controller *hose = pdev ? pdev->sysdata : pci_isa_hose;
+	struct pci_iommu_arena *arena;
+	long dma_ofs, npages;
+
+	BUG_ON(dir == PCI_DMA_NONE);
+
+	if (dma_addr >= __direct_map_base
+	    && dma_addr < __direct_map_base + __direct_map_size) {
+		/* Nothing to do.  */
+
+		DBGA2("pci_unmap_single: direct [%llx,%zx] from %ps\n",
+		      dma_addr, size, __builtin_return_address(0));
+
+		return;
+	}
+
+	if (dma_addr > 0xffffffff) {
+		DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %ps\n",
+		      dma_addr, size, __builtin_return_address(0));
+		return;
+	}
+
+	arena = hose->sg_pci;
+	if (!arena || dma_addr < arena->dma_base)
+		arena = hose->sg_isa;
+
+	dma_ofs = (dma_addr - arena->dma_base) >> PAGE_SHIFT;
+	if (dma_ofs * PAGE_SIZE >= arena->size) {
+		printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %llx "
+		       " base %llx size %x\n",
+		       dma_addr, arena->dma_base, arena->size);
+		return;
+		BUG();
+	}
+
+	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+
+	spin_lock_irqsave(&arena->lock, flags);
+
+	iommu_arena_free(arena, dma_ofs, npages);
+
+        /* If we're freeing ptes above the `next_entry' pointer (they
+           may have snuck back into the TLB since the last wrap flush),
+           we need to flush the TLB before reallocating the latter.  */
+	if (dma_ofs >= arena->next_entry)
+		alpha_mv.mv_pci_tbi(hose, dma_addr, dma_addr + size - 1);
+
+	spin_unlock_irqrestore(&arena->lock, flags);
+
+	DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %ps\n",
+	      dma_addr, size, npages, __builtin_return_address(0));
+}
+
+/* Allocate and map kernel buffer using consistent mode DMA for PCI
+   device.  Returns non-NULL cpu-view pointer to the buffer if
+   successful and sets *DMA_ADDRP to the pci side dma address as well,
+   else DMA_ADDRP is undefined.  */
+
+static void *alpha_pci_alloc_coherent(struct device *dev, size_t size,
+				      dma_addr_t *dma_addrp, gfp_t gfp,
+				      unsigned long attrs)
+{
+	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
+	void *cpu_addr;
+	long order = get_order(size);
+
+	gfp &= ~GFP_DMA;
+
+try_again:
+	cpu_addr = (void *)__get_free_pages(gfp | __GFP_ZERO, order);
+	if (! cpu_addr) {
+		printk(KERN_INFO "pci_alloc_consistent: "
+		       "get_free_pages failed from %ps\n",
+			__builtin_return_address(0));
+		/* ??? Really atomic allocation?  Otherwise we could play
+		   with vmalloc and sg if we can't find contiguous memory.  */
+		return NULL;
+	}
+	memset(cpu_addr, 0, size);
+
+	*dma_addrp = pci_map_single_1(pdev, cpu_addr, size, 0);
+	if (*dma_addrp == DMA_MAPPING_ERROR) {
+		free_pages((unsigned long)cpu_addr, order);
+		if (alpha_mv.mv_pci_tbi || (gfp & GFP_DMA))
+			return NULL;
+		/* The address doesn't fit required mask and we
+		   do not have iommu. Try again with GFP_DMA. */
+		gfp |= GFP_DMA;
+		goto try_again;
+	}
+
+	DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %ps\n",
+	      size, cpu_addr, *dma_addrp, __builtin_return_address(0));
+
+	return cpu_addr;
+}
+
+/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
+   be values that were returned from pci_alloc_consistent.  SIZE must
+   be the same as what as passed into pci_alloc_consistent.
+   References to the memory and mappings associated with CPU_ADDR or
+   DMA_ADDR past this call are illegal.  */
+
+static void alpha_pci_free_coherent(struct device *dev, size_t size,
+				    void *cpu_addr, dma_addr_t dma_addr,
+				    unsigned long attrs)
+{
+	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
+	pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
+	free_pages((unsigned long)cpu_addr, get_order(size));
+
+	DBGA2("pci_free_consistent: [%llx,%zx] from %ps\n",
+	      dma_addr, size, __builtin_return_address(0));
+}
+
+/* Classify the elements of the scatterlist.  Write dma_address
+   of each element with:
+	0   : Followers all physically adjacent.
+	1   : Followers all virtually adjacent.
+	-1  : Not leader, physically adjacent to previous.
+	-2  : Not leader, virtually adjacent to previous.
+   Write dma_length of each leader with the combined lengths of
+   the mergable followers.  */
+
+#define SG_ENT_VIRT_ADDRESS(SG) (sg_virt((SG)))
+#define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG))
+
+static void
+sg_classify(struct device *dev, struct scatterlist *sg, struct scatterlist *end,
+	    int virt_ok)
+{
+	unsigned long next_paddr;
+	struct scatterlist *leader;
+	long leader_flag, leader_length;
+	unsigned int max_seg_size;
+
+	leader = sg;
+	leader_flag = 0;
+	leader_length = leader->length;
+	next_paddr = SG_ENT_PHYS_ADDRESS(leader) + leader_length;
+
+	/* we will not marge sg without device. */
+	max_seg_size = dev ? dma_get_max_seg_size(dev) : 0;
+	for (++sg; sg < end; ++sg) {
+		unsigned long addr, len;
+		addr = SG_ENT_PHYS_ADDRESS(sg);
+		len = sg->length;
+
+		if (leader_length + len > max_seg_size)
+			goto new_segment;
+
+		if (next_paddr == addr) {
+			sg->dma_address = -1;
+			leader_length += len;
+		} else if (((next_paddr | addr) & ~PAGE_MASK) == 0 && virt_ok) {
+			sg->dma_address = -2;
+			leader_flag = 1;
+			leader_length += len;
+		} else {
+new_segment:
+			leader->dma_address = leader_flag;
+			leader->dma_length = leader_length;
+			leader = sg;
+			leader_flag = 0;
+			leader_length = len;
+		}
+
+		next_paddr = addr + len;
+	}
+
+	leader->dma_address = leader_flag;
+	leader->dma_length = leader_length;
+}
+
+/* Given a scatterlist leader, choose an allocation method and fill
+   in the blanks.  */
+
+static int
+sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end,
+	struct scatterlist *out, struct pci_iommu_arena *arena,
+	dma_addr_t max_dma, int dac_allowed)
+{
+	unsigned long paddr = SG_ENT_PHYS_ADDRESS(leader);
+	long size = leader->dma_length;
+	struct scatterlist *sg;
+	unsigned long *ptes;
+	long npages, dma_ofs, i;
+
+#if !DEBUG_NODIRECT
+	/* If everything is physically contiguous, and the addresses
+	   fall into the direct-map window, use it.  */
+	if (leader->dma_address == 0
+	    && paddr + size + __direct_map_base - 1 <= max_dma
+	    && paddr + size <= __direct_map_size) {
+		out->dma_address = paddr + __direct_map_base;
+		out->dma_length = size;
+
+		DBGA("    sg_fill: [%p,%lx] -> direct %llx\n",
+		     __va(paddr), size, out->dma_address);
+
+		return 0;
+	}
+#endif
+
+	/* If physically contiguous and DAC is available, use it.  */
+	if (leader->dma_address == 0 && dac_allowed) {
+		out->dma_address = paddr + alpha_mv.pci_dac_offset;
+		out->dma_length = size;
+
+		DBGA("    sg_fill: [%p,%lx] -> DAC %llx\n",
+		     __va(paddr), size, out->dma_address);
+
+		return 0;
+	}
+
+	/* Otherwise, we'll use the iommu to make the pages virtually
+	   contiguous.  */
+
+	paddr &= ~PAGE_MASK;
+	npages = iommu_num_pages(paddr, size, PAGE_SIZE);
+	dma_ofs = iommu_arena_alloc(dev, arena, npages, 0);
+	if (dma_ofs < 0) {
+		/* If we attempted a direct map above but failed, die.  */
+		if (leader->dma_address == 0)
+			return -1;
+
+		/* Otherwise, break up the remaining virtually contiguous
+		   hunks into individual direct maps and retry.  */
+		sg_classify(dev, leader, end, 0);
+		return sg_fill(dev, leader, end, out, arena, max_dma, dac_allowed);
+	}
+
+	out->dma_address = arena->dma_base + dma_ofs*PAGE_SIZE + paddr;
+	out->dma_length = size;
+
+	DBGA("    sg_fill: [%p,%lx] -> sg %llx np %ld\n",
+	     __va(paddr), size, out->dma_address, npages);
+
+	/* All virtually contiguous.  We need to find the length of each
+	   physically contiguous subsegment to fill in the ptes.  */
+	ptes = &arena->ptes[dma_ofs];
+	sg = leader;
+	do {
+#if DEBUG_ALLOC > 0
+		struct scatterlist *last_sg = sg;
+#endif
+
+		size = sg->length;
+		paddr = SG_ENT_PHYS_ADDRESS(sg);
+
+		while (sg+1 < end && (int) sg[1].dma_address == -1) {
+			size += sg[1].length;
+			sg = sg_next(sg);
+		}
+
+		npages = iommu_num_pages(paddr, size, PAGE_SIZE);
+
+		paddr &= PAGE_MASK;
+		for (i = 0; i < npages; ++i, paddr += PAGE_SIZE)
+			*ptes++ = mk_iommu_pte(paddr);
+
+#if DEBUG_ALLOC > 0
+		DBGA("    (%ld) [%p,%x] np %ld\n",
+		     last_sg - leader, SG_ENT_VIRT_ADDRESS(last_sg),
+		     last_sg->length, npages);
+		while (++last_sg <= sg) {
+			DBGA("        (%ld) [%p,%x] cont\n",
+			     last_sg - leader, SG_ENT_VIRT_ADDRESS(last_sg),
+			     last_sg->length);
+		}
+#endif
+	} while (++sg < end && (int) sg->dma_address < 0);
+
+	return 1;
+}
+
+static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg,
+			    int nents, enum dma_data_direction dir,
+			    unsigned long attrs)
+{
+	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
+	struct scatterlist *start, *end, *out;
+	struct pci_controller *hose;
+	struct pci_iommu_arena *arena;
+	dma_addr_t max_dma;
+	int dac_allowed;
+
+	BUG_ON(dir == PCI_DMA_NONE);
+
+	dac_allowed = dev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0;
+
+	/* Fast path single entry scatterlists.  */
+	if (nents == 1) {
+		sg->dma_length = sg->length;
+		sg->dma_address
+		  = pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg),
+				     sg->length, dac_allowed);
+		if (sg->dma_address == DMA_MAPPING_ERROR)
+			return -EIO;
+		return 1;
+	}
+
+	start = sg;
+	end = sg + nents;
+
+	/* First, prepare information about the entries.  */
+	sg_classify(dev, sg, end, alpha_mv.mv_pci_tbi != 0);
+
+	/* Second, figure out where we're going to map things.  */
+	if (alpha_mv.mv_pci_tbi) {
+		hose = pdev ? pdev->sysdata : pci_isa_hose;
+		max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK;
+		arena = hose->sg_pci;
+		if (!arena || arena->dma_base + arena->size - 1 > max_dma)
+			arena = hose->sg_isa;
+	} else {
+		max_dma = -1;
+		arena = NULL;
+		hose = NULL;
+	}
+
+	/* Third, iterate over the scatterlist leaders and allocate
+	   dma space as needed.  */
+	for (out = sg; sg < end; ++sg) {
+		if ((int) sg->dma_address < 0)
+			continue;
+		if (sg_fill(dev, sg, end, out, arena, max_dma, dac_allowed) < 0)
+			goto error;
+		out++;
+	}
+
+	/* Mark the end of the list for pci_unmap_sg.  */
+	if (out < end)
+		out->dma_length = 0;
+
+	if (out - start == 0) {
+		printk(KERN_WARNING "pci_map_sg failed: no entries?\n");
+		return -ENOMEM;
+	}
+	DBGA("pci_map_sg: %ld entries\n", out - start);
+
+	return out - start;
+
+ error:
+	printk(KERN_WARNING "pci_map_sg failed: "
+	       "could not allocate dma page tables\n");
+
+	/* Some allocation failed while mapping the scatterlist
+	   entries.  Unmap them now.  */
+	if (out > start)
+		pci_unmap_sg(pdev, start, out - start, dir);
+	return -ENOMEM;
+}
+
+/* Unmap a set of streaming mode DMA translations.  Again, cpu read
+   rules concerning calls here are the same as for pci_unmap_single()
+   above.  */
+
+static void alpha_pci_unmap_sg(struct device *dev, struct scatterlist *sg,
+			       int nents, enum dma_data_direction dir,
+			       unsigned long attrs)
+{
+	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
+	unsigned long flags;
+	struct pci_controller *hose;
+	struct pci_iommu_arena *arena;
+	struct scatterlist *end;
+	dma_addr_t max_dma;
+	dma_addr_t fbeg, fend;
+
+	BUG_ON(dir == PCI_DMA_NONE);
+
+	if (! alpha_mv.mv_pci_tbi)
+		return;
+
+	hose = pdev ? pdev->sysdata : pci_isa_hose;
+	max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK;
+	arena = hose->sg_pci;
+	if (!arena || arena->dma_base + arena->size - 1 > max_dma)
+		arena = hose->sg_isa;
+
+	fbeg = -1, fend = 0;
+
+	spin_lock_irqsave(&arena->lock, flags);
+
+	for (end = sg + nents; sg < end; ++sg) {
+		dma_addr_t addr;
+		size_t size;
+		long npages, ofs;
+		dma_addr_t tend;
+
+		addr = sg->dma_address;
+		size = sg->dma_length;
+		if (!size)
+			break;
+
+		if (addr > 0xffffffff) {
+			/* It's a DAC address -- nothing to do.  */
+			DBGA("    (%ld) DAC [%llx,%zx]\n",
+			      sg - end + nents, addr, size);
+			continue;
+		}
+
+		if (addr >= __direct_map_base
+		    && addr < __direct_map_base + __direct_map_size) {
+			/* Nothing to do.  */
+			DBGA("    (%ld) direct [%llx,%zx]\n",
+			      sg - end + nents, addr, size);
+			continue;
+		}
+
+		DBGA("    (%ld) sg [%llx,%zx]\n",
+		     sg - end + nents, addr, size);
+
+		npages = iommu_num_pages(addr, size, PAGE_SIZE);
+		ofs = (addr - arena->dma_base) >> PAGE_SHIFT;
+		iommu_arena_free(arena, ofs, npages);
+
+		tend = addr + size - 1;
+		if (fbeg > addr) fbeg = addr;
+		if (fend < tend) fend = tend;
+	}
+
+        /* If we're freeing ptes above the `next_entry' pointer (they
+           may have snuck back into the TLB since the last wrap flush),
+           we need to flush the TLB before reallocating the latter.  */
+	if ((fend - arena->dma_base) >> PAGE_SHIFT >= arena->next_entry)
+		alpha_mv.mv_pci_tbi(hose, fbeg, fend);
+
+	spin_unlock_irqrestore(&arena->lock, flags);
+
+	DBGA("pci_unmap_sg: %ld entries\n", nents - (end - sg));
+}
+
+/* Return whether the given PCI device DMA address mask can be
+   supported properly.  */
+
+static int alpha_pci_supported(struct device *dev, u64 mask)
+{
+	struct pci_dev *pdev = alpha_gendev_to_pci(dev);
+	struct pci_controller *hose;
+	struct pci_iommu_arena *arena;
+
+	/* If there exists a direct map, and the mask fits either
+	   the entire direct mapped space or the total system memory as
+	   shifted by the map base */
+	if (__direct_map_size != 0
+	    && (__direct_map_base + __direct_map_size - 1 <= mask ||
+		__direct_map_base + (max_low_pfn << PAGE_SHIFT) - 1 <= mask))
+		return 1;
+
+	/* Check that we have a scatter-gather arena that fits.  */
+	hose = pdev ? pdev->sysdata : pci_isa_hose;
+	arena = hose->sg_isa;
+	if (arena && arena->dma_base + arena->size - 1 <= mask)
+		return 1;
+	arena = hose->sg_pci;
+	if (arena && arena->dma_base + arena->size - 1 <= mask)
+		return 1;
+
+	/* As last resort try ZONE_DMA.  */
+	if (!__direct_map_base && MAX_DMA_ADDRESS - IDENT_ADDR - 1 <= mask)
+		return 1;
+
+	return 0;
+}
+
+
+/*
+ * AGP GART extensions to the IOMMU
+ */
+int
+iommu_reserve(struct pci_iommu_arena *arena, long pg_count, long align_mask) 
+{
+	unsigned long flags;
+	unsigned long *ptes;
+	long i, p;
+
+	if (!arena) return -EINVAL;
+
+	spin_lock_irqsave(&arena->lock, flags);
+
+	/* Search for N empty ptes.  */
+	ptes = arena->ptes;
+	p = iommu_arena_find_pages(NULL, arena, pg_count, align_mask);
+	if (p < 0) {
+		spin_unlock_irqrestore(&arena->lock, flags);
+		return -1;
+	}
+
+	/* Success.  Mark them all reserved (ie not zero and invalid)
+	   for the iommu tlb that could load them from under us.
+	   They will be filled in with valid bits by _bind() */
+	for (i = 0; i < pg_count; ++i)
+		ptes[p+i] = IOMMU_RESERVED_PTE;
+
+	arena->next_entry = p + pg_count;
+	spin_unlock_irqrestore(&arena->lock, flags);
+
+	return p;
+}
+
+int 
+iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count)
+{
+	unsigned long *ptes;
+	long i;
+
+	if (!arena) return -EINVAL;
+
+	ptes = arena->ptes;
+
+	/* Make sure they're all reserved first... */
+	for(i = pg_start; i < pg_start + pg_count; i++)
+		if (ptes[i] != IOMMU_RESERVED_PTE)
+			return -EBUSY;
+
+	iommu_arena_free(arena, pg_start, pg_count);
+	return 0;
+}
+
+int
+iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, 
+	   struct page **pages)
+{
+	unsigned long flags;
+	unsigned long *ptes;
+	long i, j;
+
+	if (!arena) return -EINVAL;
+	
+	spin_lock_irqsave(&arena->lock, flags);
+
+	ptes = arena->ptes;
+
+	for(j = pg_start; j < pg_start + pg_count; j++) {
+		if (ptes[j] != IOMMU_RESERVED_PTE) {
+			spin_unlock_irqrestore(&arena->lock, flags);
+			return -EBUSY;
+		}
+	}
+		
+	for(i = 0, j = pg_start; i < pg_count; i++, j++)
+		ptes[j] = mk_iommu_pte(page_to_phys(pages[i]));
+
+	spin_unlock_irqrestore(&arena->lock, flags);
+
+	return 0;
+}
+
+int
+iommu_unbind(struct pci_iommu_arena *arena, long pg_start, long pg_count)
+{
+	unsigned long *p;
+	long i;
+
+	if (!arena) return -EINVAL;
+
+	p = arena->ptes + pg_start;
+	for(i = 0; i < pg_count; i++)
+		p[i] = IOMMU_RESERVED_PTE;
+
+	return 0;
+}
+
+const struct dma_map_ops alpha_pci_ops = {
+	.alloc			= alpha_pci_alloc_coherent,
+	.free			= alpha_pci_free_coherent,
+	.map_page		= alpha_pci_map_page,
+	.unmap_page		= alpha_pci_unmap_page,
+	.map_sg			= alpha_pci_map_sg,
+	.unmap_sg		= alpha_pci_unmap_sg,
+	.dma_supported		= alpha_pci_supported,
+	.mmap			= dma_common_mmap,
+	.get_sgtable		= dma_common_get_sgtable,
+	.alloc_pages		= dma_common_alloc_pages,
+	.free_pages		= dma_common_free_pages,
+};
+EXPORT_SYMBOL(alpha_pci_ops);
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
new file mode 100644
index 0000000000..efcf732170
--- /dev/null
+++ b/arch/alpha/kernel/perf_event.c
@@ -0,0 +1,899 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hardware performance events for the Alpha.
+ *
+ * We implement HW counts on the EV67 and subsequent CPUs only.
+ *
+ * (C) 2010 Michael J. Cree
+ *
+ * Somewhat based on the Sparc code, and to a lesser extent the PowerPC and
+ * ARM code, which are copyright by their respective authors.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/init.h>
+
+#include <asm/hwrpb.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/pal.h>
+#include <asm/wrperfmon.h>
+#include <asm/hw_irq.h>
+
+
+/* The maximum number of PMCs on any Alpha CPU whatsoever. */
+#define MAX_HWEVENTS 3
+#define PMC_NO_INDEX -1
+
+/* For tracking PMCs and the hw events they monitor on each CPU. */
+struct cpu_hw_events {
+	int			enabled;
+	/* Number of events scheduled; also number entries valid in arrays below. */
+	int			n_events;
+	/* Number events added since last hw_perf_disable(). */
+	int			n_added;
+	/* Events currently scheduled. */
+	struct perf_event	*event[MAX_HWEVENTS];
+	/* Event type of each scheduled event. */
+	unsigned long		evtype[MAX_HWEVENTS];
+	/* Current index of each scheduled event; if not yet determined
+	 * contains PMC_NO_INDEX.
+	 */
+	int			current_idx[MAX_HWEVENTS];
+	/* The active PMCs' config for easy use with wrperfmon(). */
+	unsigned long		config;
+	/* The active counters' indices for easy use with wrperfmon(). */
+	unsigned long		idx_mask;
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+
+
+/*
+ * A structure to hold the description of the PMCs available on a particular
+ * type of Alpha CPU.
+ */
+struct alpha_pmu_t {
+	/* Mapping of the perf system hw event types to indigenous event types */
+	const int *event_map;
+	/* The number of entries in the event_map */
+	int  max_events;
+	/* The number of PMCs on this Alpha */
+	int  num_pmcs;
+	/*
+	 * All PMC counters reside in the IBOX register PCTR.  This is the
+	 * LSB of the counter.
+	 */
+	int  pmc_count_shift[MAX_HWEVENTS];
+	/*
+	 * The mask that isolates the PMC bits when the LSB of the counter
+	 * is shifted to bit 0.
+	 */
+	unsigned long pmc_count_mask[MAX_HWEVENTS];
+	/* The maximum period the PMC can count. */
+	unsigned long pmc_max_period[MAX_HWEVENTS];
+	/*
+	 * The maximum value that may be written to the counter due to
+	 * hardware restrictions is pmc_max_period - pmc_left.
+	 */
+	long pmc_left[3];
+	 /* Subroutine for allocation of PMCs.  Enforces constraints. */
+	int (*check_constraints)(struct perf_event **, unsigned long *, int);
+	/* Subroutine for checking validity of a raw event for this PMU. */
+	int (*raw_event_valid)(u64 config);
+};
+
+/*
+ * The Alpha CPU PMU description currently in operation.  This is set during
+ * the boot process to the specific CPU of the machine.
+ */
+static const struct alpha_pmu_t *alpha_pmu;
+
+
+#define HW_OP_UNSUPPORTED -1
+
+/*
+ * The hardware description of the EV67, EV68, EV69, EV7 and EV79 PMUs
+ * follow. Since they are identical we refer to them collectively as the
+ * EV67 henceforth.
+ */
+
+/*
+ * EV67 PMC event types
+ *
+ * There is no one-to-one mapping of the possible hw event types to the
+ * actual codes that are used to program the PMCs hence we introduce our
+ * own hw event type identifiers.
+ */
+enum ev67_pmc_event_type {
+	EV67_CYCLES = 1,
+	EV67_INSTRUCTIONS,
+	EV67_BCACHEMISS,
+	EV67_MBOXREPLAY,
+	EV67_LAST_ET
+};
+#define EV67_NUM_EVENT_TYPES (EV67_LAST_ET-EV67_CYCLES)
+
+
+/* Mapping of the hw event types to the perf tool interface */
+static const int ev67_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	 = EV67_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]	 = EV67_INSTRUCTIONS,
+	[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_CACHE_MISSES]	 = EV67_BCACHEMISS,
+};
+
+struct ev67_mapping_t {
+	int config;
+	int idx;
+};
+
+/*
+ * The mapping used for one event only - these must be in same order as enum
+ * ev67_pmc_event_type definition.
+ */
+static const struct ev67_mapping_t ev67_mapping[] = {
+	{EV67_PCTR_INSTR_CYCLES, 1},	 /* EV67_CYCLES, */
+	{EV67_PCTR_INSTR_CYCLES, 0},	 /* EV67_INSTRUCTIONS */
+	{EV67_PCTR_INSTR_BCACHEMISS, 1}, /* EV67_BCACHEMISS */
+	{EV67_PCTR_CYCLES_MBOX, 1}	 /* EV67_MBOXREPLAY */
+};
+
+
+/*
+ * Check that a group of events can be simultaneously scheduled on to the
+ * EV67 PMU.  Also allocate counter indices and config.
+ */
+static int ev67_check_constraints(struct perf_event **event,
+				unsigned long *evtype, int n_ev)
+{
+	int idx0;
+	unsigned long config;
+
+	idx0 = ev67_mapping[evtype[0]-1].idx;
+	config = ev67_mapping[evtype[0]-1].config;
+	if (n_ev == 1)
+		goto success;
+
+	BUG_ON(n_ev != 2);
+
+	if (evtype[0] == EV67_MBOXREPLAY || evtype[1] == EV67_MBOXREPLAY) {
+		/* MBOX replay traps must be on PMC 1 */
+		idx0 = (evtype[0] == EV67_MBOXREPLAY) ? 1 : 0;
+		/* Only cycles can accompany MBOX replay traps */
+		if (evtype[idx0] == EV67_CYCLES) {
+			config = EV67_PCTR_CYCLES_MBOX;
+			goto success;
+		}
+	}
+
+	if (evtype[0] == EV67_BCACHEMISS || evtype[1] == EV67_BCACHEMISS) {
+		/* Bcache misses must be on PMC 1 */
+		idx0 = (evtype[0] == EV67_BCACHEMISS) ? 1 : 0;
+		/* Only instructions can accompany Bcache misses */
+		if (evtype[idx0] == EV67_INSTRUCTIONS) {
+			config = EV67_PCTR_INSTR_BCACHEMISS;
+			goto success;
+		}
+	}
+
+	if (evtype[0] == EV67_INSTRUCTIONS || evtype[1] == EV67_INSTRUCTIONS) {
+		/* Instructions must be on PMC 0 */
+		idx0 = (evtype[0] == EV67_INSTRUCTIONS) ? 0 : 1;
+		/* By this point only cycles can accompany instructions */
+		if (evtype[idx0^1] == EV67_CYCLES) {
+			config = EV67_PCTR_INSTR_CYCLES;
+			goto success;
+		}
+	}
+
+	/* Otherwise, darn it, there is a conflict.  */
+	return -1;
+
+success:
+	event[0]->hw.idx = idx0;
+	event[0]->hw.config_base = config;
+	if (n_ev == 2) {
+		event[1]->hw.idx = idx0 ^ 1;
+		event[1]->hw.config_base = config;
+	}
+	return 0;
+}
+
+
+static int ev67_raw_event_valid(u64 config)
+{
+	return config >= EV67_CYCLES && config < EV67_LAST_ET;
+};
+
+
+static const struct alpha_pmu_t ev67_pmu = {
+	.event_map = ev67_perfmon_event_map,
+	.max_events = ARRAY_SIZE(ev67_perfmon_event_map),
+	.num_pmcs = 2,
+	.pmc_count_shift = {EV67_PCTR_0_COUNT_SHIFT, EV67_PCTR_1_COUNT_SHIFT, 0},
+	.pmc_count_mask = {EV67_PCTR_0_COUNT_MASK,  EV67_PCTR_1_COUNT_MASK,  0},
+	.pmc_max_period = {(1UL<<20) - 1, (1UL<<20) - 1, 0},
+	.pmc_left = {16, 4, 0},
+	.check_constraints = ev67_check_constraints,
+	.raw_event_valid = ev67_raw_event_valid,
+};
+
+
+
+/*
+ * Helper routines to ensure that we read/write only the correct PMC bits
+ * when calling the wrperfmon PALcall.
+ */
+static inline void alpha_write_pmc(int idx, unsigned long val)
+{
+	val &= alpha_pmu->pmc_count_mask[idx];
+	val <<= alpha_pmu->pmc_count_shift[idx];
+	val |= (1<<idx);
+	wrperfmon(PERFMON_CMD_WRITE, val);
+}
+
+static inline unsigned long alpha_read_pmc(int idx)
+{
+	unsigned long val;
+
+	val = wrperfmon(PERFMON_CMD_READ, 0);
+	val >>= alpha_pmu->pmc_count_shift[idx];
+	val &= alpha_pmu->pmc_count_mask[idx];
+	return val;
+}
+
+/* Set a new period to sample over */
+static int alpha_perf_event_set_period(struct perf_event *event,
+				struct hw_perf_event *hwc, int idx)
+{
+	long left = local64_read(&hwc->period_left);
+	long period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	/*
+	 * Hardware restrictions require that the counters must not be
+	 * written with values that are too close to the maximum period.
+	 */
+	if (unlikely(left < alpha_pmu->pmc_left[idx]))
+		left = alpha_pmu->pmc_left[idx];
+
+	if (left > (long)alpha_pmu->pmc_max_period[idx])
+		left = alpha_pmu->pmc_max_period[idx];
+
+	local64_set(&hwc->prev_count, (unsigned long)(-left));
+
+	alpha_write_pmc(idx, (unsigned long)(-left));
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+
+/*
+ * Calculates the count (the 'delta') since the last time the PMC was read.
+ *
+ * As the PMCs' full period can easily be exceeded within the perf system
+ * sampling period we cannot use any high order bits as a guard bit in the
+ * PMCs to detect overflow as is done by other architectures.  The code here
+ * calculates the delta on the basis that there is no overflow when ovf is
+ * zero.  The value passed via ovf by the interrupt handler corrects for
+ * overflow.
+ *
+ * This can be racey on rare occasions -- a call to this routine can occur
+ * with an overflowed counter just before the PMI service routine is called.
+ * The check for delta negative hopefully always rectifies this situation.
+ */
+static unsigned long alpha_perf_event_update(struct perf_event *event,
+					struct hw_perf_event *hwc, int idx, long ovf)
+{
+	long prev_raw_count, new_raw_count;
+	long delta;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = alpha_read_pmc(idx);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
+
+	/* It is possible on very rare occasions that the PMC has overflowed
+	 * but the interrupt is yet to come.  Detect and fix this situation.
+	 */
+	if (unlikely(delta < 0)) {
+		delta += alpha_pmu->pmc_max_period[idx] + 1;
+	}
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+
+/*
+ * Collect all HW events into the array event[].
+ */
+static int collect_events(struct perf_event *group, int max_count,
+			  struct perf_event *event[], unsigned long *evtype,
+			  int *current_idx)
+{
+	struct perf_event *pe;
+	int n = 0;
+
+	if (!is_software_event(group)) {
+		if (n >= max_count)
+			return -1;
+		event[n] = group;
+		evtype[n] = group->hw.event_base;
+		current_idx[n++] = PMC_NO_INDEX;
+	}
+	for_each_sibling_event(pe, group) {
+		if (!is_software_event(pe) && pe->state != PERF_EVENT_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			event[n] = pe;
+			evtype[n] = pe->hw.event_base;
+			current_idx[n++] = PMC_NO_INDEX;
+		}
+	}
+	return n;
+}
+
+
+
+/*
+ * Check that a group of events can be simultaneously scheduled on to the PMU.
+ */
+static int alpha_check_constraints(struct perf_event **events,
+				   unsigned long *evtypes, int n_ev)
+{
+
+	/* No HW events is possible from hw_perf_group_sched_in(). */
+	if (n_ev == 0)
+		return 0;
+
+	if (n_ev > alpha_pmu->num_pmcs)
+		return -1;
+
+	return alpha_pmu->check_constraints(events, evtypes, n_ev);
+}
+
+
+/*
+ * If new events have been scheduled then update cpuc with the new
+ * configuration.  This may involve shifting cycle counts from one PMC to
+ * another.
+ */
+static void maybe_change_configuration(struct cpu_hw_events *cpuc)
+{
+	int j;
+
+	if (cpuc->n_added == 0)
+		return;
+
+	/* Find counters that are moving to another PMC and update */
+	for (j = 0; j < cpuc->n_events; j++) {
+		struct perf_event *pe = cpuc->event[j];
+
+		if (cpuc->current_idx[j] != PMC_NO_INDEX &&
+			cpuc->current_idx[j] != pe->hw.idx) {
+			alpha_perf_event_update(pe, &pe->hw, cpuc->current_idx[j], 0);
+			cpuc->current_idx[j] = PMC_NO_INDEX;
+		}
+	}
+
+	/* Assign to counters all unassigned events. */
+	cpuc->idx_mask = 0;
+	for (j = 0; j < cpuc->n_events; j++) {
+		struct perf_event *pe = cpuc->event[j];
+		struct hw_perf_event *hwc = &pe->hw;
+		int idx = hwc->idx;
+
+		if (cpuc->current_idx[j] == PMC_NO_INDEX) {
+			alpha_perf_event_set_period(pe, hwc, idx);
+			cpuc->current_idx[j] = idx;
+		}
+
+		if (!(hwc->state & PERF_HES_STOPPED))
+			cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
+	}
+	cpuc->config = cpuc->event[0]->hw.config_base;
+}
+
+
+
+/* Schedule perf HW event on to PMU.
+ *  - this function is called from outside this module via the pmu struct
+ *    returned from perf event initialisation.
+ */
+static int alpha_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int n0;
+	int ret;
+	unsigned long irq_flags;
+
+	/*
+	 * The Sparc code has the IRQ disable first followed by the perf
+	 * disable, however this can lead to an overflowed counter with the
+	 * PMI disabled on rare occasions.  The alpha_perf_event_update()
+	 * routine should detect this situation by noting a negative delta,
+	 * nevertheless we disable the PMCs first to enable a potential
+	 * final PMI to occur before we disable interrupts.
+	 */
+	perf_pmu_disable(event->pmu);
+	local_irq_save(irq_flags);
+
+	/* Default to error to be returned */
+	ret = -EAGAIN;
+
+	/* Insert event on to PMU and if successful modify ret to valid return */
+	n0 = cpuc->n_events;
+	if (n0 < alpha_pmu->num_pmcs) {
+		cpuc->event[n0] = event;
+		cpuc->evtype[n0] = event->hw.event_base;
+		cpuc->current_idx[n0] = PMC_NO_INDEX;
+
+		if (!alpha_check_constraints(cpuc->event, cpuc->evtype, n0+1)) {
+			cpuc->n_events++;
+			cpuc->n_added++;
+			ret = 0;
+		}
+	}
+
+	hwc->state = PERF_HES_UPTODATE;
+	if (!(flags & PERF_EF_START))
+		hwc->state |= PERF_HES_STOPPED;
+
+	local_irq_restore(irq_flags);
+	perf_pmu_enable(event->pmu);
+
+	return ret;
+}
+
+
+
+/* Disable performance monitoring unit
+ *  - this function is called from outside this module via the pmu struct
+ *    returned from perf event initialisation.
+ */
+static void alpha_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long irq_flags;
+	int j;
+
+	perf_pmu_disable(event->pmu);
+	local_irq_save(irq_flags);
+
+	for (j = 0; j < cpuc->n_events; j++) {
+		if (event == cpuc->event[j]) {
+			int idx = cpuc->current_idx[j];
+
+			/* Shift remaining entries down into the existing
+			 * slot.
+			 */
+			while (++j < cpuc->n_events) {
+				cpuc->event[j - 1] = cpuc->event[j];
+				cpuc->evtype[j - 1] = cpuc->evtype[j];
+				cpuc->current_idx[j - 1] =
+					cpuc->current_idx[j];
+			}
+
+			/* Absorb the final count and turn off the event. */
+			alpha_perf_event_update(event, hwc, idx, 0);
+			perf_event_update_userpage(event);
+
+			cpuc->idx_mask &= ~(1UL<<idx);
+			cpuc->n_events--;
+			break;
+		}
+	}
+
+	local_irq_restore(irq_flags);
+	perf_pmu_enable(event->pmu);
+}
+
+
+static void alpha_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	alpha_perf_event_update(event, hwc, hwc->idx, 0);
+}
+
+
+static void alpha_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		cpuc->idx_mask &= ~(1UL<<hwc->idx);
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		alpha_perf_event_update(event, hwc, hwc->idx, 0);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+
+	if (cpuc->enabled)
+		wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
+}
+
+
+static void alpha_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+		alpha_perf_event_set_period(event, hwc, hwc->idx);
+	}
+
+	hwc->state = 0;
+
+	cpuc->idx_mask |= 1UL<<hwc->idx;
+	if (cpuc->enabled)
+		wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
+}
+
+
+/*
+ * Check that CPU performance counters are supported.
+ * - currently support EV67 and later CPUs.
+ * - actually some later revisions of the EV6 have the same PMC model as the
+ *     EV67 but we don't do sufficiently deep CPU detection to detect them.
+ *     Bad luck to the very few people who might have one, I guess.
+ */
+static int supported_cpu(void)
+{
+	struct percpu_struct *cpu;
+	unsigned long cputype;
+
+	/* Get cpu type from HW */
+	cpu = (struct percpu_struct *)((char *)hwrpb + hwrpb->processor_offset);
+	cputype = cpu->type & 0xffffffff;
+	/* Include all of EV67, EV68, EV7, EV79 and EV69 as supported. */
+	return (cputype >= EV67_CPU) && (cputype <= EV69_CPU);
+}
+
+
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Nothing to be done! */
+	return;
+}
+
+
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event *evts[MAX_HWEVENTS];
+	unsigned long evtypes[MAX_HWEVENTS];
+	int idx_rubbish_bin[MAX_HWEVENTS];
+	int ev;
+	int n;
+
+	/* We only support a limited range of HARDWARE event types with one
+	 * only programmable via a RAW event type.
+	 */
+	if (attr->type == PERF_TYPE_HARDWARE) {
+		if (attr->config >= alpha_pmu->max_events)
+			return -EINVAL;
+		ev = alpha_pmu->event_map[attr->config];
+	} else if (attr->type == PERF_TYPE_HW_CACHE) {
+		return -EOPNOTSUPP;
+	} else if (attr->type == PERF_TYPE_RAW) {
+		if (!alpha_pmu->raw_event_valid(attr->config))
+			return -EINVAL;
+		ev = attr->config;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (ev < 0) {
+		return ev;
+	}
+
+	/*
+	 * We place the event type in event_base here and leave calculation
+	 * of the codes to programme the PMU for alpha_pmu_enable() because
+	 * it is only then we will know what HW events are actually
+	 * scheduled on to the PMU.  At that point the code to programme the
+	 * PMU is put into config_base and the PMC to use is placed into
+	 * idx.  We initialise idx (below) to PMC_NO_INDEX to indicate that
+	 * it is yet to be determined.
+	 */
+	hwc->event_base = ev;
+
+	/* Collect events in a group together suitable for calling
+	 * alpha_check_constraints() to verify that the group as a whole can
+	 * be scheduled on to the PMU.
+	 */
+	n = 0;
+	if (event->group_leader != event) {
+		n = collect_events(event->group_leader,
+				alpha_pmu->num_pmcs - 1,
+				evts, evtypes, idx_rubbish_bin);
+		if (n < 0)
+			return -EINVAL;
+	}
+	evtypes[n] = hwc->event_base;
+	evts[n] = event;
+
+	if (alpha_check_constraints(evts, evtypes, n + 1))
+		return -EINVAL;
+
+	/* Indicate that PMU config and idx are yet to be determined. */
+	hwc->config_base = 0;
+	hwc->idx = PMC_NO_INDEX;
+
+	event->destroy = hw_perf_event_destroy;
+
+	/*
+	 * Most architectures reserve the PMU for their use at this point.
+	 * As there is no existing mechanism to arbitrate usage and there
+	 * appears to be no other user of the Alpha PMU we just assume
+	 * that we can just use it, hence a NO-OP here.
+	 *
+	 * Maybe an alpha_reserve_pmu() routine should be implemented but is
+	 * anything else ever going to use it?
+	 */
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = alpha_pmu->pmc_max_period[0];
+		hwc->last_period = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	return 0;
+}
+
+/*
+ * Main entry point to initialise a HW performance event.
+ */
+static int alpha_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	if (!alpha_pmu)
+		return -ENODEV;
+
+	/* Do the real initialisation work. */
+	err = __hw_perf_event_init(event);
+
+	return err;
+}
+
+/*
+ * Main entry point - enable HW performance counters.
+ */
+static void alpha_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	if (cpuc->n_events > 0) {
+		/* Update cpuc with information from any new scheduled events. */
+		maybe_change_configuration(cpuc);
+
+		/* Start counting the desired events. */
+		wrperfmon(PERFMON_CMD_LOGGING_OPTIONS, EV67_PCTR_MODE_AGGREGATE);
+		wrperfmon(PERFMON_CMD_DESIRED_EVENTS, cpuc->config);
+		wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
+	}
+}
+
+
+/*
+ * Main entry point - disable HW performance counters.
+ */
+
+static void alpha_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	cpuc->n_added = 0;
+
+	wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
+}
+
+static struct pmu pmu = {
+	.pmu_enable	= alpha_pmu_enable,
+	.pmu_disable	= alpha_pmu_disable,
+	.event_init	= alpha_pmu_event_init,
+	.add		= alpha_pmu_add,
+	.del		= alpha_pmu_del,
+	.start		= alpha_pmu_start,
+	.stop		= alpha_pmu_stop,
+	.read		= alpha_pmu_read,
+	.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+};
+
+
+/*
+ * Main entry point - don't know when this is called but it
+ * obviously dumps debug info.
+ */
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	unsigned long pcr;
+	int pcr0, pcr1;
+	int cpu;
+
+	if (!supported_cpu())
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	pcr = wrperfmon(PERFMON_CMD_READ, 0);
+	pcr0 = (pcr >> alpha_pmu->pmc_count_shift[0]) & alpha_pmu->pmc_count_mask[0];
+	pcr1 = (pcr >> alpha_pmu->pmc_count_shift[1]) & alpha_pmu->pmc_count_mask[1];
+
+	pr_info("CPU#%d: PCTR0[%06x] PCTR1[%06x]\n", cpu, pcr0, pcr1);
+
+	local_irq_restore(flags);
+}
+
+
+/*
+ * Performance Monitoring Interrupt Service Routine called when a PMC
+ * overflows.  The PMC that overflowed is passed in la_ptr.
+ */
+static void alpha_perf_event_irq_handler(unsigned long la_ptr,
+					struct pt_regs *regs)
+{
+	struct cpu_hw_events *cpuc;
+	struct perf_sample_data data;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int idx, j;
+
+	__this_cpu_inc(irq_pmi_count);
+	cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	/* Completely counting through the PMC's period to trigger a new PMC
+	 * overflow interrupt while in this interrupt routine is utterly
+	 * disastrous!  The EV6 and EV67 counters are sufficiently large to
+	 * prevent this but to be really sure disable the PMCs.
+	 */
+	wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
+
+	/* la_ptr is the counter that overflowed. */
+	if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
+		/* This should never occur! */
+		irq_err_count++;
+		pr_warn("PMI: silly index %ld\n", la_ptr);
+		wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
+		return;
+	}
+
+	idx = la_ptr;
+
+	for (j = 0; j < cpuc->n_events; j++) {
+		if (cpuc->current_idx[j] == idx)
+			break;
+	}
+
+	if (unlikely(j == cpuc->n_events)) {
+		/* This can occur if the event is disabled right on a PMC overflow. */
+		wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
+		return;
+	}
+
+	event = cpuc->event[j];
+
+	if (unlikely(!event)) {
+		/* This should never occur! */
+		irq_err_count++;
+		pr_warn("PMI: No event at index %d!\n", idx);
+		wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
+		return;
+	}
+
+	hwc = &event->hw;
+	alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1);
+	perf_sample_data_init(&data, 0, hwc->last_period);
+
+	if (alpha_perf_event_set_period(event, hwc, idx)) {
+		if (perf_event_overflow(event, &data, regs)) {
+			/* Interrupts coming too quickly; "throttle" the
+			 * counter, i.e., disable it for a little while.
+			 */
+			alpha_pmu_stop(event, 0);
+		}
+	}
+	wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
+
+	return;
+}
+
+
+
+/*
+ * Init call to initialise performance events at kernel startup.
+ */
+int __init init_hw_perf_events(void)
+{
+	pr_info("Performance events: ");
+
+	if (!supported_cpu()) {
+		pr_cont("No support for your CPU.\n");
+		return 0;
+	}
+
+	pr_cont("Supported CPU type!\n");
+
+	/* Override performance counter IRQ vector */
+
+	perf_irq = alpha_perf_event_irq_handler;
+
+	/* And set up PMU specification */
+	alpha_pmu = &ev67_pmu;
+
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+
+	return 0;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
new file mode 100644
index 0000000000..5f8527081d
--- /dev/null
+++ b/arch/alpha/kernel/process.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/kernel/process.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/time.h>
+#include <linux/major.h>
+#include <linux/stat.h>
+#include <linux/vt.h>
+#include <linux/mman.h>
+#include <linux/elfcore.h>
+#include <linux/reboot.h>
+#include <linux/tty.h>
+#include <linux/console.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+
+#include <asm/reg.h>
+#include <linux/uaccess.h>
+#include <asm/io.h>
+#include <asm/hwrpb.h>
+#include <asm/fpu.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
+
+#ifdef CONFIG_ALPHA_WTINT
+/*
+ * Sleep the CPU.
+ * EV6, LCA45 and QEMU know how to power down, skipping N timer interrupts.
+ */
+void arch_cpu_idle(void)
+{
+	wtint(0);
+	raw_local_irq_enable();
+}
+
+void arch_cpu_idle_dead(void)
+{
+	wtint(INT_MAX);
+}
+#endif /* ALPHA_WTINT */
+
+struct halt_info {
+	int mode;
+	char *restart_cmd;
+};
+
+static void
+common_shutdown_1(void *generic_ptr)
+{
+	struct halt_info *how = (struct halt_info *)generic_ptr;
+	struct percpu_struct *cpup;
+	unsigned long *pflags, flags;
+	int cpuid = smp_processor_id();
+
+	/* No point in taking interrupts anymore. */
+	local_irq_disable();
+
+	cpup = (struct percpu_struct *)
+			((unsigned long)hwrpb + hwrpb->processor_offset
+			 + hwrpb->processor_size * cpuid);
+	pflags = &cpup->flags;
+	flags = *pflags;
+
+	/* Clear reason to "default"; clear "bootstrap in progress". */
+	flags &= ~0x00ff0001UL;
+
+#ifdef CONFIG_SMP
+	/* Secondaries halt here. */
+	if (cpuid != boot_cpuid) {
+		flags |= 0x00040000UL; /* "remain halted" */
+		*pflags = flags;
+		set_cpu_present(cpuid, false);
+		set_cpu_possible(cpuid, false);
+		halt();
+	}
+#endif
+
+	if (how->mode == LINUX_REBOOT_CMD_RESTART) {
+		if (!how->restart_cmd) {
+			flags |= 0x00020000UL; /* "cold bootstrap" */
+		} else {
+			/* For SRM, we could probably set environment
+			   variables to get this to work.  We'd have to
+			   delay this until after srm_paging_stop unless
+			   we ever got srm_fixup working.
+
+			   At the moment, SRM will use the last boot device,
+			   but the file and flags will be the defaults, when
+			   doing a "warm" bootstrap.  */
+			flags |= 0x00030000UL; /* "warm bootstrap" */
+		}
+	} else {
+		flags |= 0x00040000UL; /* "remain halted" */
+	}
+	*pflags = flags;
+
+#ifdef CONFIG_SMP
+	/* Wait for the secondaries to halt. */
+	set_cpu_present(boot_cpuid, false);
+	set_cpu_possible(boot_cpuid, false);
+	while (cpumask_weight(cpu_present_mask))
+		barrier();
+#endif
+
+	/* If booted from SRM, reset some of the original environment. */
+	if (alpha_using_srm) {
+#ifdef CONFIG_DUMMY_CONSOLE
+		/* If we've gotten here after SysRq-b, leave interrupt
+		   context before taking over the console. */
+		if (in_irq())
+			irq_exit();
+		/* This has the effect of resetting the VGA video origin.  */
+		console_lock();
+		do_take_over_console(&dummy_con, 0, MAX_NR_CONSOLES-1, 1);
+		console_unlock();
+#endif
+		pci_restore_srm_config();
+		set_hae(srm_hae);
+	}
+
+	if (alpha_mv.kill_arch)
+		alpha_mv.kill_arch(how->mode);
+
+	if (! alpha_using_srm && how->mode != LINUX_REBOOT_CMD_RESTART) {
+		/* Unfortunately, since MILO doesn't currently understand
+		   the hwrpb bits above, we can't reliably halt the 
+		   processor and keep it halted.  So just loop.  */
+		return;
+	}
+
+	if (alpha_using_srm)
+		srm_paging_stop();
+
+	halt();
+}
+
+static void
+common_shutdown(int mode, char *restart_cmd)
+{
+	struct halt_info args;
+	args.mode = mode;
+	args.restart_cmd = restart_cmd;
+	on_each_cpu(common_shutdown_1, &args, 0);
+}
+
+void
+machine_restart(char *restart_cmd)
+{
+	common_shutdown(LINUX_REBOOT_CMD_RESTART, restart_cmd);
+}
+
+
+void
+machine_halt(void)
+{
+	common_shutdown(LINUX_REBOOT_CMD_HALT, NULL);
+}
+
+
+void
+machine_power_off(void)
+{
+	common_shutdown(LINUX_REBOOT_CMD_POWER_OFF, NULL);
+}
+
+
+/* Used by sysrq-p, among others.  I don't believe r9-r15 are ever
+   saved in the context it's used.  */
+
+void
+show_regs(struct pt_regs *regs)
+{
+	show_regs_print_info(KERN_DEFAULT);
+	dik_show_regs(regs, NULL);
+}
+
+/*
+ * Re-start a thread when doing execve()
+ */
+void
+start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
+{
+	regs->pc = pc;
+	regs->ps = 8;
+	wrusp(sp);
+}
+EXPORT_SYMBOL(start_thread);
+
+void
+flush_thread(void)
+{
+	/* Arrange for each exec'ed process to start off with a clean slate
+	   with respect to the FPU.  This is all exceptions disabled.  */
+	current_thread_info()->ieee_state = 0;
+	wrfpcr(FPCR_DYN_NORMAL | ieee_swcr_to_fpcr(0));
+
+	/* Clean slate for TLS.  */
+	current_thread_info()->pcb.unique = 0;
+}
+
+void
+release_thread(struct task_struct *dead_task)
+{
+}
+
+/*
+ * Copy architecture-specific thread state
+ */
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+		unsigned long kthread_arg, struct task_struct *p,
+		unsigned long tls)
+{
+	extern void ret_from_fork(void);
+	extern void ret_from_kernel_thread(void);
+
+	struct thread_info *childti = task_thread_info(p);
+	struct pt_regs *childregs = task_pt_regs(p);
+	struct pt_regs *regs = current_pt_regs();
+	struct switch_stack *childstack, *stack;
+
+	childstack = ((struct switch_stack *) childregs) - 1;
+	childti->pcb.ksp = (unsigned long) childstack;
+	childti->pcb.flags = 1;	/* set FEN, clear everything else */
+
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+		/* kernel thread */
+		memset(childstack, 0,
+			sizeof(struct switch_stack) + sizeof(struct pt_regs));
+		childstack->r26 = (unsigned long) ret_from_kernel_thread;
+		childstack->r9 = usp;	/* function */
+		childstack->r10 = kthread_arg;
+		childregs->hae = alpha_mv.hae_cache;
+		childti->pcb.usp = 0;
+		return 0;
+	}
+	/* Note: if CLONE_SETTLS is not set, then we must inherit the
+	   value from the parent, which will have been set by the block
+	   copy in dup_task_struct.  This is non-intuitive, but is
+	   required for proper operation in the case of a threaded
+	   application calling fork.  */
+	if (clone_flags & CLONE_SETTLS)
+		childti->pcb.unique = tls;
+	else
+		regs->r20 = 0;	/* OSF/1 has some strange fork() semantics.  */
+	childti->pcb.usp = usp ?: rdusp();
+	*childregs = *regs;
+	childregs->r0 = 0;
+	childregs->r19 = 0;
+	childregs->r20 = 1;	/* OSF/1 has some strange fork() semantics.  */
+	stack = ((struct switch_stack *) regs) - 1;
+	*childstack = *stack;
+	childstack->r26 = (unsigned long) ret_from_fork;
+	return 0;
+}
+
+/*
+ * Fill in the user structure for a ELF core dump.
+ */
+void
+dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti)
+{
+	/* switch stack follows right below pt_regs: */
+	struct switch_stack * sw = ((struct switch_stack *) pt) - 1;
+
+	dest[ 0] = pt->r0;
+	dest[ 1] = pt->r1;
+	dest[ 2] = pt->r2;
+	dest[ 3] = pt->r3;
+	dest[ 4] = pt->r4;
+	dest[ 5] = pt->r5;
+	dest[ 6] = pt->r6;
+	dest[ 7] = pt->r7;
+	dest[ 8] = pt->r8;
+	dest[ 9] = sw->r9;
+	dest[10] = sw->r10;
+	dest[11] = sw->r11;
+	dest[12] = sw->r12;
+	dest[13] = sw->r13;
+	dest[14] = sw->r14;
+	dest[15] = sw->r15;
+	dest[16] = pt->r16;
+	dest[17] = pt->r17;
+	dest[18] = pt->r18;
+	dest[19] = pt->r19;
+	dest[20] = pt->r20;
+	dest[21] = pt->r21;
+	dest[22] = pt->r22;
+	dest[23] = pt->r23;
+	dest[24] = pt->r24;
+	dest[25] = pt->r25;
+	dest[26] = pt->r26;
+	dest[27] = pt->r27;
+	dest[28] = pt->r28;
+	dest[29] = pt->gp;
+	dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp;
+	dest[31] = pt->pc;
+
+	/* Once upon a time this was the PS value.  Which is stupid
+	   since that is always 8 for usermode.  Usurped for the more
+	   useful value of the thread's UNIQUE field.  */
+	dest[32] = ti->pcb.unique;
+}
+EXPORT_SYMBOL(dump_elf_thread);
+
+int
+dump_elf_task(elf_greg_t *dest, struct task_struct *task)
+{
+	dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task));
+	return 1;
+}
+EXPORT_SYMBOL(dump_elf_task);
+
+int
+dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task)
+{
+	struct switch_stack *sw = (struct switch_stack *)task_pt_regs(task) - 1;
+	memcpy(dest, sw->fp, 32 * 8);
+	return 1;
+}
+EXPORT_SYMBOL(dump_elf_task_fp);
+
+/*
+ * Return saved PC of a blocked thread.  This assumes the frame
+ * pointer is the 6th saved long on the kernel stack and that the
+ * saved return address is the first long in the frame.  This all
+ * holds provided the thread blocked through a call to schedule() ($15
+ * is the frame pointer in schedule() and $15 is saved at offset 48 by
+ * entry.S:do_switch_stack).
+ *
+ * Under heavy swap load I've seen this lose in an ugly way.  So do
+ * some extra sanity checking on the ranges we expect these pointers
+ * to be in so that we can fail gracefully.  This is just for ps after
+ * all.  -- r~
+ */
+
+static unsigned long
+thread_saved_pc(struct task_struct *t)
+{
+	unsigned long base = (unsigned long)task_stack_page(t);
+	unsigned long fp, sp = task_thread_info(t)->pcb.ksp;
+
+	if (sp > base && sp+6*8 < base + 16*1024) {
+		fp = ((unsigned long*)sp)[6];
+		if (fp > sp && fp < base + 16*1024)
+			return *(unsigned long *)fp;
+	}
+
+	return 0;
+}
+
+unsigned long
+__get_wchan(struct task_struct *p)
+{
+	unsigned long schedule_frame;
+	unsigned long pc;
+
+	/*
+	 * This one depends on the frame size of schedule().  Do a
+	 * "disass schedule" in gdb to find the frame size.  Also, the
+	 * code assumes that sleep_on() follows immediately after
+	 * interruptible_sleep_on() and that add_timer() follows
+	 * immediately after interruptible_sleep().  Ugly, isn't it?
+	 * Maybe adding a wchan field to task_struct would be better,
+	 * after all...
+	 */
+
+	pc = thread_saved_pc(p);
+	if (in_sched_functions(pc)) {
+		schedule_frame = ((unsigned long *)task_thread_info(p)->pcb.ksp)[6];
+		return ((unsigned long *)schedule_frame)[12];
+	}
+	return pc;
+}
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h
new file mode 100644
index 0000000000..5816a31c1b
--- /dev/null
+++ b/arch/alpha/kernel/proto.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+/* Prototypes of functions used across modules here in this directory.  */
+
+#define vucp	volatile unsigned char  *
+#define vusp	volatile unsigned short *
+#define vip	volatile int *
+#define vuip	volatile unsigned int   *
+#define vulp	volatile unsigned long  *
+
+struct pt_regs;
+struct task_struct;
+struct pci_dev;
+struct pci_controller;
+
+/* core_apecs.c */
+extern struct pci_ops apecs_pci_ops;
+extern void apecs_init_arch(void);
+extern void apecs_pci_clr_err(void);
+extern void apecs_machine_check(unsigned long vector, unsigned long la_ptr);
+extern void apecs_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+
+/* core_cia.c */
+extern struct pci_ops cia_pci_ops;
+extern void cia_init_pci(void);
+extern void cia_init_arch(void);
+extern void pyxis_init_arch(void);
+extern void cia_kill_arch(int);
+extern void cia_machine_check(unsigned long vector, unsigned long la_ptr);
+extern void cia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+
+/* core_irongate.c */
+extern struct pci_ops irongate_pci_ops;
+extern int irongate_pci_clr_err(void);
+extern void irongate_init_arch(void);
+#define irongate_pci_tbi ((void *)0)
+
+/* core_lca.c */
+extern struct pci_ops lca_pci_ops;
+extern void lca_init_arch(void);
+extern void lca_machine_check(unsigned long vector, unsigned long la_ptr);
+extern void lca_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+
+/* core_marvel.c */
+extern struct pci_ops marvel_pci_ops;
+extern void marvel_init_arch(void);
+extern void marvel_kill_arch(int);
+extern void marvel_machine_check(unsigned long, unsigned long);
+extern void marvel_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+extern struct _alpha_agp_info *marvel_agp_info(void);
+struct io7 *marvel_find_io7(int pe);
+struct io7 *marvel_next_io7(struct io7 *prev);
+void io7_clear_errors(struct io7 *io7);
+
+/* core_mcpcia.c */
+extern struct pci_ops mcpcia_pci_ops;
+extern void mcpcia_init_arch(void);
+extern void mcpcia_init_hoses(void);
+extern void mcpcia_machine_check(unsigned long vector, unsigned long la_ptr);
+extern void mcpcia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+
+/* core_polaris.c */
+extern struct pci_ops polaris_pci_ops;
+extern int polaris_read_config_dword(struct pci_dev *, int, u32 *);
+extern int polaris_write_config_dword(struct pci_dev *, int, u32);
+extern void polaris_init_arch(void);
+extern void polaris_machine_check(unsigned long vector, unsigned long la_ptr);
+#define polaris_pci_tbi ((void *)0)
+
+/* core_t2.c */
+extern struct pci_ops t2_pci_ops;
+extern void t2_init_arch(void);
+extern void t2_kill_arch(int);
+extern void t2_machine_check(unsigned long vector, unsigned long la_ptr);
+extern void t2_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+
+/* core_titan.c */
+extern struct pci_ops titan_pci_ops;
+extern void titan_init_arch(void);
+extern void titan_kill_arch(int);
+extern void titan_machine_check(unsigned long, unsigned long);
+extern void titan_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+extern struct _alpha_agp_info *titan_agp_info(void);
+
+/* core_tsunami.c */
+extern struct pci_ops tsunami_pci_ops;
+extern void tsunami_init_arch(void);
+extern void tsunami_kill_arch(int);
+extern void tsunami_machine_check(unsigned long vector, unsigned long la_ptr);
+extern void tsunami_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+
+/* core_wildfire.c */
+extern struct pci_ops wildfire_pci_ops;
+extern void wildfire_init_arch(void);
+extern void wildfire_kill_arch(int);
+extern void wildfire_machine_check(unsigned long vector, unsigned long la_ptr);
+extern void wildfire_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
+
+/* console.c */
+#ifdef CONFIG_VGA_HOSE
+extern void find_console_vga_hose(void);
+extern void locate_and_init_vga(void *(*)(void *, void *));
+#else
+static inline void find_console_vga_hose(void) { }
+static inline void locate_and_init_vga(void *(*sel_func)(void *, void *)) { }
+#endif
+
+/* setup.c */
+extern unsigned long srm_hae;
+extern int boot_cpuid;
+#ifdef CONFIG_VERBOSE_MCHECK
+extern unsigned long alpha_verbose_mcheck;
+#endif
+
+/* srmcons.c */
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM)
+extern void register_srm_console(void);
+extern void unregister_srm_console(void);
+#else
+#define register_srm_console()
+#define unregister_srm_console()
+#endif
+
+/* smp.c */
+extern void setup_smp(void);
+extern void handle_ipi(struct pt_regs *);
+
+/* bios32.c */
+/* extern void reset_for_srm(void); */
+
+/* time.c */
+extern irqreturn_t rtc_timer_interrupt(int irq, void *dev);
+extern void init_clockevent(void);
+extern void common_init_rtc(void);
+extern unsigned long est_cycle_freq;
+
+/* smc37c93x.c */
+extern void SMC93x_Init(void);
+
+/* smc37c669.c */
+extern void SMC669_Init(int);
+
+/* es1888.c */
+extern void es1888_init(void);
+
+/* ../lib/fpreg.c */
+extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
+extern unsigned long alpha_read_fp_reg (unsigned long reg);
+
+/* head.S */
+extern void wrmces(unsigned long mces);
+extern void cserve_ena(unsigned long);
+extern void cserve_dis(unsigned long);
+extern void __smp_callin(unsigned long);
+
+/* entry.S */
+extern void entArith(void);
+extern void entIF(void);
+extern void entInt(void);
+extern void entMM(void);
+extern void entSys(void);
+extern void entUna(void);
+extern void entDbg(void);
+
+/* ptrace.c */
+extern int ptrace_set_bpt (struct task_struct *child);
+extern int ptrace_cancel_bpt (struct task_struct *child);
+
+/* traps.c */
+extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15);
+extern void die_if_kernel(char *, struct pt_regs *, long, unsigned long *);
+
+/* sys_titan.c */
+extern void titan_dispatch_irqs(u64);
+
+/* ../mm/init.c */
+extern void switch_to_system_map(void);
+extern void srm_paging_stop(void);
+
+static inline int
+__alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
+			 unsigned long size, unsigned long flags)
+{
+	pgprot_t prot;
+
+	prot = __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE
+			| _PAGE_KWE | flags);
+	return ioremap_page_range(address, address + size, phys_addr, prot);
+}
+
+/* irq.c */
+
+#ifdef CONFIG_SMP
+#define mcheck_expected(cpu)	(cpu_data[cpu].mcheck_expected)
+#define mcheck_taken(cpu)	(cpu_data[cpu].mcheck_taken)
+#define mcheck_extra(cpu)	(cpu_data[cpu].mcheck_extra)
+#else
+extern struct mcheck_info
+{
+	unsigned char expected __attribute__((aligned(8)));
+	unsigned char taken;
+	unsigned char extra;
+} __mcheck_info;
+
+#define mcheck_expected(cpu)	(*((void)(cpu), &__mcheck_info.expected))
+#define mcheck_taken(cpu)	(*((void)(cpu), &__mcheck_info.taken))
+#define mcheck_extra(cpu)	(*((void)(cpu), &__mcheck_info.extra))
+#endif
+
+extern void process_mcheck_info(unsigned long vector, unsigned long la_ptr,
+				const char *machine, int expected);
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
new file mode 100644
index 0000000000..8c43212ae3
--- /dev/null
+++ b/arch/alpha/kernel/ptrace.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+/* ptrace.c */
+/* By Ross Biro 1/23/92 */
+/* edited by Linus Torvalds */
+/* mangled further by Bob Manson (manson@santafe.edu) */
+/* more mutilation by David Mosberger (davidm@azstarnet.com) */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/signal.h>
+#include <linux/tracehook.h>
+#include <linux/audit.h>
+
+#include <linux/uaccess.h>
+#include <asm/fpu.h>
+
+#include "proto.h"
+
+#define DEBUG	DBG_MEM
+#undef DEBUG
+
+#ifdef DEBUG
+enum {
+	DBG_MEM		= (1<<0),
+	DBG_BPT		= (1<<1),
+	DBG_MEM_ALL	= (1<<2)
+};
+#define DBG(fac,args)	{if ((fac) & DEBUG) printk args;}
+#else
+#define DBG(fac,args)
+#endif
+
+#define BREAKINST	0x00000080	/* call_pal bpt */
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * Processes always block with the following stack-layout:
+ *
+ *  +================================+ <---- task + 2*PAGE_SIZE
+ *  | PALcode saved frame (ps, pc,   | ^
+ *  | gp, a0, a1, a2)		     | |
+ *  +================================+ | struct pt_regs
+ *  |	        		     | |
+ *  | frame generated by SAVE_ALL    | |
+ *  |	        		     | v
+ *  +================================+
+ *  |	        		     | ^
+ *  | frame saved by do_switch_stack | | struct switch_stack
+ *  |	        		     | v
+ *  +================================+
+ */
+
+/* 
+ * The following table maps a register index into the stack offset at
+ * which the register is saved.  Register indices are 0-31 for integer
+ * regs, 32-63 for fp regs, and 64 for the pc.  Notice that sp and
+ * zero have no stack-slot and need to be treated specially (see
+ * get_reg/put_reg below).
+ */
+enum {
+	REG_R0 = 0, REG_F0 = 32, REG_FPCR = 63, REG_PC = 64
+};
+
+#define PT_REG(reg) \
+  (PAGE_SIZE*2 - sizeof(struct pt_regs) + offsetof(struct pt_regs, reg))
+
+#define SW_REG(reg) \
+ (PAGE_SIZE*2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \
+  + offsetof(struct switch_stack, reg))
+
+static int regoff[] = {
+	PT_REG(	   r0), PT_REG(	   r1), PT_REG(	   r2), PT_REG(	  r3),
+	PT_REG(	   r4), PT_REG(	   r5), PT_REG(	   r6), PT_REG(	  r7),
+	PT_REG(	   r8), SW_REG(	   r9), SW_REG(	  r10), SW_REG(	 r11),
+	SW_REG(	  r12), SW_REG(	  r13), SW_REG(	  r14), SW_REG(	 r15),
+	PT_REG(	  r16), PT_REG(	  r17), PT_REG(	  r18), PT_REG(	 r19),
+	PT_REG(	  r20), PT_REG(	  r21), PT_REG(	  r22), PT_REG(	 r23),
+	PT_REG(	  r24), PT_REG(	  r25), PT_REG(	  r26), PT_REG(	 r27),
+	PT_REG(	  r28), PT_REG(	   gp),		   -1,		   -1,
+	SW_REG(fp[ 0]), SW_REG(fp[ 1]), SW_REG(fp[ 2]), SW_REG(fp[ 3]),
+	SW_REG(fp[ 4]), SW_REG(fp[ 5]), SW_REG(fp[ 6]), SW_REG(fp[ 7]),
+	SW_REG(fp[ 8]), SW_REG(fp[ 9]), SW_REG(fp[10]), SW_REG(fp[11]),
+	SW_REG(fp[12]), SW_REG(fp[13]), SW_REG(fp[14]), SW_REG(fp[15]),
+	SW_REG(fp[16]), SW_REG(fp[17]), SW_REG(fp[18]), SW_REG(fp[19]),
+	SW_REG(fp[20]), SW_REG(fp[21]), SW_REG(fp[22]), SW_REG(fp[23]),
+	SW_REG(fp[24]), SW_REG(fp[25]), SW_REG(fp[26]), SW_REG(fp[27]),
+	SW_REG(fp[28]), SW_REG(fp[29]), SW_REG(fp[30]), SW_REG(fp[31]),
+	PT_REG(	   pc)
+};
+
+static unsigned long zero;
+
+/*
+ * Get address of register REGNO in task TASK.
+ */
+static unsigned long *
+get_reg_addr(struct task_struct * task, unsigned long regno)
+{
+	unsigned long *addr;
+
+	if (regno == 30) {
+		addr = &task_thread_info(task)->pcb.usp;
+	} else if (regno == 65) {
+		addr = &task_thread_info(task)->pcb.unique;
+	} else if (regno == 31 || regno > 65) {
+		zero = 0;
+		addr = &zero;
+	} else {
+		addr = task_stack_page(task) + regoff[regno];
+	}
+	return addr;
+}
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+static unsigned long
+get_reg(struct task_struct * task, unsigned long regno)
+{
+	/* Special hack for fpcr -- combine hardware and software bits.  */
+	if (regno == 63) {
+		unsigned long fpcr = *get_reg_addr(task, regno);
+		unsigned long swcr
+		  = task_thread_info(task)->ieee_state & IEEE_SW_MASK;
+		swcr = swcr_update_status(swcr, fpcr);
+		return fpcr | swcr;
+	}
+	return *get_reg_addr(task, regno);
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+static int
+put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
+{
+	if (regno == 63) {
+		task_thread_info(task)->ieee_state
+		  = ((task_thread_info(task)->ieee_state & ~IEEE_SW_MASK)
+		     | (data & IEEE_SW_MASK));
+		data = (data & FPCR_DYN_MASK) | ieee_swcr_to_fpcr(data);
+	}
+	*get_reg_addr(task, regno) = data;
+	return 0;
+}
+
+static inline int
+read_int(struct task_struct *task, unsigned long addr, int * data)
+{
+	int copied = access_process_vm(task, addr, data, sizeof(int),
+			FOLL_FORCE);
+	return (copied == sizeof(int)) ? 0 : -EIO;
+}
+
+static inline int
+write_int(struct task_struct *task, unsigned long addr, int data)
+{
+	int copied = access_process_vm(task, addr, &data, sizeof(int),
+			FOLL_FORCE | FOLL_WRITE);
+	return (copied == sizeof(int)) ? 0 : -EIO;
+}
+
+/*
+ * Set breakpoint.
+ */
+int
+ptrace_set_bpt(struct task_struct * child)
+{
+	int displ, i, res, reg_b, nsaved = 0;
+	unsigned int insn, op_code;
+	unsigned long pc;
+
+	pc  = get_reg(child, REG_PC);
+	res = read_int(child, pc, (int *) &insn);
+	if (res < 0)
+		return res;
+
+	op_code = insn >> 26;
+	if (op_code >= 0x30) {
+		/*
+		 * It's a branch: instead of trying to figure out
+		 * whether the branch will be taken or not, we'll put
+		 * a breakpoint at either location.  This is simpler,
+		 * more reliable, and probably not a whole lot slower
+		 * than the alternative approach of emulating the
+		 * branch (emulation can be tricky for fp branches).
+		 */
+		displ = ((s32)(insn << 11)) >> 9;
+		task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
+		if (displ)		/* guard against unoptimized code */
+			task_thread_info(child)->bpt_addr[nsaved++]
+			  = pc + 4 + displ;
+		DBG(DBG_BPT, ("execing branch\n"));
+	} else if (op_code == 0x1a) {
+		reg_b = (insn >> 16) & 0x1f;
+		task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b);
+		DBG(DBG_BPT, ("execing jump\n"));
+	} else {
+		task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
+		DBG(DBG_BPT, ("execing normal insn\n"));
+	}
+
+	/* install breakpoints: */
+	for (i = 0; i < nsaved; ++i) {
+		res = read_int(child, task_thread_info(child)->bpt_addr[i],
+			       (int *) &insn);
+		if (res < 0)
+			return res;
+		task_thread_info(child)->bpt_insn[i] = insn;
+		DBG(DBG_BPT, ("    -> next_pc=%lx\n",
+			      task_thread_info(child)->bpt_addr[i]));
+		res = write_int(child, task_thread_info(child)->bpt_addr[i],
+				BREAKINST);
+		if (res < 0)
+			return res;
+	}
+	task_thread_info(child)->bpt_nsaved = nsaved;
+	return 0;
+}
+
+/*
+ * Ensure no single-step breakpoint is pending.  Returns non-zero
+ * value if child was being single-stepped.
+ */
+int
+ptrace_cancel_bpt(struct task_struct * child)
+{
+	int i, nsaved = task_thread_info(child)->bpt_nsaved;
+
+	task_thread_info(child)->bpt_nsaved = 0;
+
+	if (nsaved > 2) {
+		printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved);
+		nsaved = 2;
+	}
+
+	for (i = 0; i < nsaved; ++i) {
+		write_int(child, task_thread_info(child)->bpt_addr[i],
+			  task_thread_info(child)->bpt_insn[i]);
+	}
+	return (nsaved != 0);
+}
+
+void user_enable_single_step(struct task_struct *child)
+{
+	/* Mark single stepping.  */
+	task_thread_info(child)->bpt_nsaved = -1;
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+	ptrace_cancel_bpt(child);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{ 
+	user_disable_single_step(child);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	unsigned long tmp;
+	size_t copied;
+	long ret;
+
+	switch (request) {
+	/* When I and D space are separate, these will need to be fixed.  */
+	case PTRACE_PEEKTEXT: /* read word at location addr. */
+	case PTRACE_PEEKDATA:
+		copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp),
+				FOLL_FORCE);
+		ret = -EIO;
+		if (copied != sizeof(tmp))
+			break;
+		
+		force_successful_syscall_return();
+		ret = tmp;
+		break;
+
+	/* Read register number ADDR. */
+	case PTRACE_PEEKUSR:
+		force_successful_syscall_return();
+		ret = get_reg(child, addr);
+		DBG(DBG_MEM, ("peek $%lu->%#lx\n", addr, ret));
+		break;
+
+	/* When I and D space are separate, this will have to be fixed.  */
+	case PTRACE_POKETEXT: /* write the word at location addr. */
+	case PTRACE_POKEDATA:
+		ret = generic_ptrace_pokedata(child, addr, data);
+		break;
+
+	case PTRACE_POKEUSR: /* write the specified register */
+		DBG(DBG_MEM, ("poke $%lu<-%#lx\n", addr, data));
+		ret = put_reg(child, addr, data);
+		break;
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+	return ret;
+}
+
+asmlinkage unsigned long syscall_trace_enter(void)
+{
+	unsigned long ret = 0;
+	struct pt_regs *regs = current_pt_regs();
+	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+	    tracehook_report_syscall_entry(current_pt_regs()))
+		ret = -1UL;
+	audit_syscall_entry(regs->r0, regs->r16, regs->r17, regs->r18, regs->r19);
+	return ret ?: current_pt_regs()->r0;
+}
+
+asmlinkage void
+syscall_trace_leave(void)
+{
+	audit_syscall_exit(current_pt_regs());
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(current_pt_regs(), 0);
+}
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
new file mode 100644
index 0000000000..fb3025396a
--- /dev/null
+++ b/arch/alpha/kernel/rtc.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/kernel/rtc.c
+ *
+ *  Copyright (C) 1991, 1992, 1995, 1999, 2000  Linus Torvalds
+ *
+ * This file contains date handling.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+
+#include "proto.h"
+
+
+/*
+ * Support for the RTC device.
+ *
+ * We don't want to use the rtc-cmos driver, because we don't want to support
+ * alarms, as that would be indistinguishable from timer interrupts.
+ *
+ * Further, generic code is really, really tied to a 1900 epoch.  This is
+ * true in __get_rtc_time as well as the users of struct rtc_time e.g.
+ * rtc_tm_to_time.  Thankfully all of the other epochs in use are later
+ * than 1900, and so it's easy to adjust.
+ */
+
+static unsigned long rtc_epoch;
+
+static int __init
+specifiy_epoch(char *str)
+{
+	unsigned long epoch = simple_strtoul(str, NULL, 0);
+	if (epoch < 1900)
+		printk("Ignoring invalid user specified epoch %lu\n", epoch);
+	else
+		rtc_epoch = epoch;
+	return 1;
+}
+__setup("epoch=", specifiy_epoch);
+
+static void __init
+init_rtc_epoch(void)
+{
+	int epoch, year, ctrl;
+
+	if (rtc_epoch != 0) {
+		/* The epoch was specified on the command-line.  */
+		return;
+	}
+
+	/* Detect the epoch in use on this computer.  */
+	ctrl = CMOS_READ(RTC_CONTROL);
+	year = CMOS_READ(RTC_YEAR);
+	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+		year = bcd2bin(year);
+
+	/* PC-like is standard; used for year >= 70 */
+	epoch = 1900;
+	if (year < 20) {
+		epoch = 2000;
+	} else if (year >= 20 && year < 48) {
+		/* NT epoch */
+		epoch = 1980;
+	} else if (year >= 48 && year < 70) {
+		/* Digital UNIX epoch */
+		epoch = 1952;
+	}
+	rtc_epoch = epoch;
+
+	printk(KERN_INFO "Using epoch %d for rtc year %d\n", epoch, year);
+}
+
+static int
+alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	int ret = mc146818_get_time(tm);
+
+	if (ret < 0) {
+		dev_err_ratelimited(dev, "unable to read current time\n");
+		return ret;
+	}
+
+	/* Adjust for non-default epochs.  It's easier to depend on the
+	   generic __get_rtc_time and adjust the epoch here than create
+	   a copy of __get_rtc_time with the edits we need.  */
+	if (rtc_epoch != 1900) {
+		int year = tm->tm_year;
+		/* Undo the century adjustment made in __get_rtc_time.  */
+		if (year >= 100)
+			year -= 100;
+		year += rtc_epoch - 1900;
+		/* Redo the century adjustment with the epoch in place.  */
+		if (year <= 69)
+			year += 100;
+		tm->tm_year = year;
+	}
+
+	return 0;
+}
+
+static int
+alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct rtc_time xtm;
+
+	if (rtc_epoch != 1900) {
+		xtm = *tm;
+		xtm.tm_year -= rtc_epoch - 1900;
+		tm = &xtm;
+	}
+
+	return mc146818_set_time(tm);
+}
+
+static int
+alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case RTC_EPOCH_READ:
+		return put_user(rtc_epoch, (unsigned long __user *)arg);
+	case RTC_EPOCH_SET:
+		if (arg < 1900)
+			return -EINVAL;
+		rtc_epoch = arg;
+		return 0;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
+static const struct rtc_class_ops alpha_rtc_ops = {
+	.read_time = alpha_rtc_read_time,
+	.set_time = alpha_rtc_set_time,
+	.ioctl = alpha_rtc_ioctl,
+};
+
+/*
+ * Similarly, except do the actual CMOS access on the boot cpu only.
+ * This requires marshalling the data across an interprocessor call.
+ */
+
+#if defined(CONFIG_SMP) && \
+    (defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_MARVEL))
+# define HAVE_REMOTE_RTC 1
+
+union remote_data {
+	struct rtc_time *tm;
+	long retval;
+};
+
+static void
+do_remote_read(void *data)
+{
+	union remote_data *x = data;
+	x->retval = alpha_rtc_read_time(NULL, x->tm);
+}
+
+static int
+remote_read_time(struct device *dev, struct rtc_time *tm)
+{
+	union remote_data x;
+	if (smp_processor_id() != boot_cpuid) {
+		x.tm = tm;
+		smp_call_function_single(boot_cpuid, do_remote_read, &x, 1);
+		return x.retval;
+	}
+	return alpha_rtc_read_time(NULL, tm);
+}
+
+static void
+do_remote_set(void *data)
+{
+	union remote_data *x = data;
+	x->retval = alpha_rtc_set_time(NULL, x->tm);
+}
+
+static int
+remote_set_time(struct device *dev, struct rtc_time *tm)
+{
+	union remote_data x;
+	if (smp_processor_id() != boot_cpuid) {
+		x.tm = tm;
+		smp_call_function_single(boot_cpuid, do_remote_set, &x, 1);
+		return x.retval;
+	}
+	return alpha_rtc_set_time(NULL, tm);
+}
+
+static const struct rtc_class_ops remote_rtc_ops = {
+	.read_time = remote_read_time,
+	.set_time = remote_set_time,
+	.ioctl = alpha_rtc_ioctl,
+};
+#endif
+
+static int __init
+alpha_rtc_init(void)
+{
+	struct platform_device *pdev;
+	struct rtc_device *rtc;
+
+	init_rtc_epoch();
+
+	pdev = platform_device_register_simple("rtc-alpha", -1, NULL, 0);
+	rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+
+	platform_set_drvdata(pdev, rtc);
+	rtc->ops = &alpha_rtc_ops;
+
+#ifdef HAVE_REMOTE_RTC
+	if (alpha_mv.rtc_boot_cpu_only)
+		rtc->ops = &remote_rtc_ops;
+#endif
+
+	return devm_rtc_register_device(rtc);
+}
+device_initcall(alpha_rtc_init);
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
new file mode 100644
index 0000000000..b4fbbba30a
--- /dev/null
+++ b/arch/alpha/kernel/setup.c
@@ -0,0 +1,1458 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/* 2.3.x bootmem, 1999 Andrea Arcangeli <andrea@suse.de> */
+
+/*
+ * Bootup setup stuff.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/screen_info.h>
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/panic_notifier.h>
+#include <linux/platform_device.h>
+#include <linux/memblock.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/eisa.h>
+#include <linux/pfn.h>
+#ifdef CONFIG_MAGIC_SYSRQ
+#include <linux/sysrq.h>
+#include <linux/reboot.h>
+#endif
+#include <linux/notifier.h>
+#include <asm/setup.h>
+#include <asm/io.h>
+#include <linux/log2.h>
+#include <linux/export.h>
+
+static int alpha_panic_event(struct notifier_block *, unsigned long, void *);
+static struct notifier_block alpha_panic_block = {
+	alpha_panic_event,
+        NULL,
+        INT_MAX /* try to do it first */
+};
+
+#include <linux/uaccess.h>
+#include <asm/hwrpb.h>
+#include <asm/dma.h>
+#include <asm/mmu_context.h>
+#include <asm/console.h>
+
+#include "proto.h"
+#include "pci_impl.h"
+
+
+struct hwrpb_struct *hwrpb;
+EXPORT_SYMBOL(hwrpb);
+unsigned long srm_hae;
+
+int alpha_l1i_cacheshape;
+int alpha_l1d_cacheshape;
+int alpha_l2_cacheshape;
+int alpha_l3_cacheshape;
+
+#ifdef CONFIG_VERBOSE_MCHECK
+/* 0=minimum, 1=verbose, 2=all */
+/* These can be overridden via the command line, ie "verbose_mcheck=2") */
+unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON;
+#endif
+
+/* Which processor we booted from.  */
+int boot_cpuid;
+
+/*
+ * Using SRM callbacks for initial console output. This works from
+ * setup_arch() time through the end of time_init(), as those places
+ * are under our (Alpha) control.
+
+ * "srmcons" specified in the boot command arguments allows us to
+ * see kernel messages during the period of time before the true
+ * console device is "registered" during console_init(). 
+ * As of this version (2.5.59), console_init() will call
+ * disable_early_printk() as the last action before initializing
+ * the console drivers. That's the last possible time srmcons can be 
+ * unregistered without interfering with console behavior.
+ *
+ * By default, OFF; set it with a bootcommand arg of "srmcons" or 
+ * "console=srm". The meaning of these two args is:
+ *     "srmcons"     - early callback prints 
+ *     "console=srm" - full callback based console, including early prints
+ */
+int srmcons_output = 0;
+
+/* Enforce a memory size limit; useful for testing. By default, none. */
+unsigned long mem_size_limit = 0;
+
+/* Set AGP GART window size (0 means disabled). */
+unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE;
+
+#ifdef CONFIG_ALPHA_GENERIC
+struct alpha_machine_vector alpha_mv;
+EXPORT_SYMBOL(alpha_mv);
+#endif
+
+#ifndef alpha_using_srm
+int alpha_using_srm;
+EXPORT_SYMBOL(alpha_using_srm);
+#endif
+
+#ifndef alpha_using_qemu
+int alpha_using_qemu;
+#endif
+
+static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long,
+					       unsigned long);
+static struct alpha_machine_vector *get_sysvec_byname(const char *);
+static void get_sysnames(unsigned long, unsigned long, unsigned long,
+			 char **, char **);
+static void determine_cpu_caches (unsigned int);
+
+static char __initdata command_line[COMMAND_LINE_SIZE];
+
+/*
+ * The format of "screen_info" is strange, and due to early
+ * i386-setup code. This is just enough to make the console
+ * code think we're on a VGA color display.
+ */
+
+struct screen_info screen_info = {
+	.orig_x = 0,
+	.orig_y = 25,
+	.orig_video_cols = 80,
+	.orig_video_lines = 25,
+	.orig_video_isVGA = 1,
+	.orig_video_points = 16
+};
+
+EXPORT_SYMBOL(screen_info);
+
+/*
+ * The direct map I/O window, if any.  This should be the same
+ * for all busses, since it's used by virt_to_bus.
+ */
+
+unsigned long __direct_map_base;
+unsigned long __direct_map_size;
+EXPORT_SYMBOL(__direct_map_base);
+EXPORT_SYMBOL(__direct_map_size);
+
+/*
+ * Declare all of the machine vectors.
+ */
+
+/* GCC 2.7.2 (on alpha at least) is lame.  It does not support either 
+   __attribute__((weak)) or #pragma weak.  Bypass it and talk directly
+   to the assembler.  */
+
+#define WEAK(X) \
+	extern struct alpha_machine_vector X; \
+	asm(".weak "#X)
+
+WEAK(alcor_mv);
+WEAK(alphabook1_mv);
+WEAK(avanti_mv);
+WEAK(cabriolet_mv);
+WEAK(clipper_mv);
+WEAK(dp264_mv);
+WEAK(eb164_mv);
+WEAK(eb64p_mv);
+WEAK(eb66_mv);
+WEAK(eb66p_mv);
+WEAK(eiger_mv);
+WEAK(jensen_mv);
+WEAK(lx164_mv);
+WEAK(lynx_mv);
+WEAK(marvel_ev7_mv);
+WEAK(miata_mv);
+WEAK(mikasa_mv);
+WEAK(mikasa_primo_mv);
+WEAK(monet_mv);
+WEAK(nautilus_mv);
+WEAK(noname_mv);
+WEAK(noritake_mv);
+WEAK(noritake_primo_mv);
+WEAK(p2k_mv);
+WEAK(pc164_mv);
+WEAK(privateer_mv);
+WEAK(rawhide_mv);
+WEAK(ruffian_mv);
+WEAK(rx164_mv);
+WEAK(sable_mv);
+WEAK(sable_gamma_mv);
+WEAK(shark_mv);
+WEAK(sx164_mv);
+WEAK(takara_mv);
+WEAK(titan_mv);
+WEAK(webbrick_mv);
+WEAK(wildfire_mv);
+WEAK(xl_mv);
+WEAK(xlt_mv);
+
+#undef WEAK
+
+/*
+ * I/O resources inherited from PeeCees.  Except for perhaps the
+ * turbochannel alphas, everyone has these on some sort of SuperIO chip.
+ *
+ * ??? If this becomes less standard, move the struct out into the
+ * machine vector.
+ */
+
+static void __init
+reserve_std_resources(void)
+{
+	static struct resource standard_io_resources[] = {
+		{ .name = "rtc", .start = -1, .end = -1 },
+        	{ .name = "dma1", .start = 0x00, .end = 0x1f },
+        	{ .name = "pic1", .start = 0x20, .end = 0x3f },
+        	{ .name = "timer", .start = 0x40, .end = 0x5f },
+        	{ .name = "keyboard", .start = 0x60, .end = 0x6f },
+        	{ .name = "dma page reg", .start = 0x80, .end = 0x8f },
+        	{ .name = "pic2", .start = 0xa0, .end = 0xbf },
+        	{ .name = "dma2", .start = 0xc0, .end = 0xdf },
+	};
+
+	struct resource *io = &ioport_resource;
+	size_t i;
+
+	if (hose_head) {
+		struct pci_controller *hose;
+		for (hose = hose_head; hose; hose = hose->next)
+			if (hose->index == 0) {
+				io = hose->io_space;
+				break;
+			}
+	}
+
+	/* Fix up for the Jensen's queer RTC placement.  */
+	standard_io_resources[0].start = RTC_PORT(0);
+	standard_io_resources[0].end = RTC_PORT(0) + 0x0f;
+
+	for (i = 0; i < ARRAY_SIZE(standard_io_resources); ++i)
+		request_resource(io, standard_io_resources+i);
+}
+
+#define PFN_MAX		PFN_DOWN(0x80000000)
+#define for_each_mem_cluster(memdesc, _cluster, i)		\
+	for ((_cluster) = (memdesc)->cluster, (i) = 0;		\
+	     (i) < (memdesc)->numclusters; (i)++, (_cluster)++)
+
+static unsigned long __init
+get_mem_size_limit(char *s)
+{
+        unsigned long end = 0;
+        char *from = s;
+
+        end = simple_strtoul(from, &from, 0);
+        if ( *from == 'K' || *from == 'k' ) {
+                end = end << 10;
+                from++;
+        } else if ( *from == 'M' || *from == 'm' ) {
+                end = end << 20;
+                from++;
+        } else if ( *from == 'G' || *from == 'g' ) {
+                end = end << 30;
+                from++;
+        }
+        return end >> PAGE_SHIFT; /* Return the PFN of the limit. */
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void * __init
+move_initrd(unsigned long mem_limit)
+{
+	void *start;
+	unsigned long size;
+
+	size = initrd_end - initrd_start;
+	start = memblock_alloc(PAGE_ALIGN(size), PAGE_SIZE);
+	if (!start || __pa(start) + size > mem_limit) {
+		initrd_start = initrd_end = 0;
+		return NULL;
+	}
+	memmove(start, (void *)initrd_start, size);
+	initrd_start = (unsigned long)start;
+	initrd_end = initrd_start + size;
+	printk("initrd moved to %p\n", start);
+	return start;
+}
+#endif
+
+static void __init
+setup_memory(void *kernel_end)
+{
+	struct memclust_struct * cluster;
+	struct memdesc_struct * memdesc;
+	unsigned long kernel_size;
+	unsigned long i;
+
+	/* Find free clusters, and init and free the bootmem accordingly.  */
+	memdesc = (struct memdesc_struct *)
+	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
+
+	for_each_mem_cluster(memdesc, cluster, i) {
+		unsigned long end;
+
+		printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
+		       i, cluster->usage, cluster->start_pfn,
+		       cluster->start_pfn + cluster->numpages);
+
+		end = cluster->start_pfn + cluster->numpages;
+		if (end > max_low_pfn)
+			max_low_pfn = end;
+
+		memblock_add(PFN_PHYS(cluster->start_pfn),
+			     cluster->numpages << PAGE_SHIFT);
+
+		/* Bit 0 is console/PALcode reserved.  Bit 1 is
+		   non-volatile memory -- we might want to mark
+		   this for later.  */
+		if (cluster->usage & 3)
+			memblock_reserve(PFN_PHYS(cluster->start_pfn),
+				         cluster->numpages << PAGE_SHIFT);
+	}
+
+	/*
+	 * Except for the NUMA systems (wildfire, marvel) all of the 
+	 * Alpha systems we run on support 32GB of memory or less.
+	 * Since the NUMA systems introduce large holes in memory addressing,
+	 * we can get into a situation where there is not enough contiguous
+	 * memory for the memory map. 
+	 *
+	 * Limit memory to the first 32GB to limit the NUMA systems to 
+	 * memory on their first node (wildfire) or 2 (marvel) to avoid 
+	 * not being able to produce the memory map. In order to access 
+	 * all of the memory on the NUMA systems, build with discontiguous
+	 * memory support.
+	 *
+	 * If the user specified a memory limit, let that memory limit stand.
+	 */
+	if (!mem_size_limit) 
+		mem_size_limit = (32ul * 1024 * 1024 * 1024) >> PAGE_SHIFT;
+
+	if (mem_size_limit && max_low_pfn >= mem_size_limit)
+	{
+		printk("setup: forcing memory size to %ldK (from %ldK).\n",
+		       mem_size_limit << (PAGE_SHIFT - 10),
+		       max_low_pfn    << (PAGE_SHIFT - 10));
+		max_low_pfn = mem_size_limit;
+	}
+
+	/* Reserve the kernel memory. */
+	kernel_size = virt_to_phys(kernel_end) - KERNEL_START_PHYS;
+	memblock_reserve(KERNEL_START_PHYS, kernel_size);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_start = INITRD_START;
+	if (initrd_start) {
+		initrd_end = initrd_start+INITRD_SIZE;
+		printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
+		       (void *) initrd_start, INITRD_SIZE);
+
+		if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) {
+			if (!move_initrd(PFN_PHYS(max_low_pfn)))
+				printk("initrd extends beyond end of memory "
+				       "(0x%08lx > 0x%p)\ndisabling initrd\n",
+				       initrd_end,
+				       phys_to_virt(PFN_PHYS(max_low_pfn)));
+		} else {
+			memblock_reserve(virt_to_phys((void *)initrd_start),
+					INITRD_SIZE);
+		}
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
+int __init
+page_is_ram(unsigned long pfn)
+{
+	struct memclust_struct * cluster;
+	struct memdesc_struct * memdesc;
+	unsigned long i;
+
+	memdesc = (struct memdesc_struct *)
+		(hwrpb->mddt_offset + (unsigned long) hwrpb);
+	for_each_mem_cluster(memdesc, cluster, i)
+	{
+		if (pfn >= cluster->start_pfn  &&
+		    pfn < cluster->start_pfn + cluster->numpages) {
+			return (cluster->usage & 3) ? 0 : 1;
+		}
+	}
+
+	return 0;
+}
+
+static int __init
+register_cpus(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+		register_cpu(p, i);
+	}
+	return 0;
+}
+
+arch_initcall(register_cpus);
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_reboot_handler(int unused)
+{
+	machine_halt();
+}
+
+static const struct sysrq_key_op srm_sysrq_reboot_op = {
+	.handler	= sysrq_reboot_handler,
+	.help_msg       = "reboot(b)",
+	.action_msg     = "Resetting",
+	.enable_mask    = SYSRQ_ENABLE_BOOT,
+};
+#endif
+
+void __init
+setup_arch(char **cmdline_p)
+{
+	extern char _end[];
+
+	struct alpha_machine_vector *vec = NULL;
+	struct percpu_struct *cpu;
+	char *type_name, *var_name, *p;
+	void *kernel_end = _end; /* end of kernel */
+	char *args = command_line;
+
+	hwrpb = (struct hwrpb_struct*) __va(INIT_HWRPB->phys_addr);
+	boot_cpuid = hard_smp_processor_id();
+
+        /*
+	 * Pre-process the system type to make sure it will be valid.
+	 *
+	 * This may restore real CABRIO and EB66+ family names, ie
+	 * EB64+ and EB66.
+	 *
+	 * Oh, and "white box" AS800 (aka DIGITAL Server 3000 series)
+	 * and AS1200 (DIGITAL Server 5000 series) have the type as
+	 * the negative of the real one.
+	 */
+        if ((long)hwrpb->sys_type < 0) {
+		hwrpb->sys_type = -((long)hwrpb->sys_type);
+		hwrpb_update_checksum(hwrpb);
+	}
+
+	/* Register a call for panic conditions. */
+	atomic_notifier_chain_register(&panic_notifier_list,
+			&alpha_panic_block);
+
+#ifndef alpha_using_srm
+	/* Assume that we've booted from SRM if we haven't booted from MILO.
+	   Detect the later by looking for "MILO" in the system serial nr.  */
+	alpha_using_srm = !str_has_prefix((const char *)hwrpb->ssn, "MILO");
+#endif
+#ifndef alpha_using_qemu
+	/* Similarly, look for QEMU.  */
+	alpha_using_qemu = strstr((const char *)hwrpb->ssn, "QEMU") != 0;
+#endif
+
+	/* If we are using SRM, we want to allow callbacks
+	   as early as possible, so do this NOW, and then
+	   they should work immediately thereafter.
+	*/
+	kernel_end = callback_init(kernel_end);
+
+	/* 
+	 * Locate the command line.
+	 */
+	/* Hack for Jensen... since we're restricted to 8 or 16 chars for
+	   boot flags depending on the boot mode, we need some shorthand.
+	   This should do for installation.  */
+	if (strcmp(COMMAND_LINE, "INSTALL") == 0) {
+		strlcpy(command_line, "root=/dev/fd0 load_ramdisk=1", sizeof command_line);
+	} else {
+		strlcpy(command_line, COMMAND_LINE, sizeof command_line);
+	}
+	strcpy(boot_command_line, command_line);
+	*cmdline_p = command_line;
+
+	/* 
+	 * Process command-line arguments.
+	 */
+	while ((p = strsep(&args, " \t")) != NULL) {
+		if (!*p) continue;
+		if (strncmp(p, "alpha_mv=", 9) == 0) {
+			vec = get_sysvec_byname(p+9);
+			continue;
+		}
+		if (strncmp(p, "cycle=", 6) == 0) {
+			est_cycle_freq = simple_strtol(p+6, NULL, 0);
+			continue;
+		}
+		if (strncmp(p, "mem=", 4) == 0) {
+			mem_size_limit = get_mem_size_limit(p+4);
+			continue;
+		}
+		if (strncmp(p, "srmcons", 7) == 0) {
+			srmcons_output |= 1;
+			continue;
+		}
+		if (strncmp(p, "console=srm", 11) == 0) {
+			srmcons_output |= 2;
+			continue;
+		}
+		if (strncmp(p, "gartsize=", 9) == 0) {
+			alpha_agpgart_size =
+				get_mem_size_limit(p+9) << PAGE_SHIFT;
+			continue;
+		}
+#ifdef CONFIG_VERBOSE_MCHECK
+		if (strncmp(p, "verbose_mcheck=", 15) == 0) {
+			alpha_verbose_mcheck = simple_strtol(p+15, NULL, 0);
+			continue;
+		}
+#endif
+	}
+
+	/* Replace the command line, now that we've killed it with strsep.  */
+	strcpy(command_line, boot_command_line);
+
+	/* If we want SRM console printk echoing early, do it now. */
+	if (alpha_using_srm && srmcons_output) {
+		register_srm_console();
+
+		/*
+		 * If "console=srm" was specified, clear the srmcons_output
+		 * flag now so that time.c won't unregister_srm_console
+		 */
+		if (srmcons_output & 2)
+			srmcons_output = 0;
+	}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+	/* If we're using SRM, make sysrq-b halt back to the prom,
+	   not auto-reboot.  */
+	if (alpha_using_srm) {
+		unregister_sysrq_key('b', __sysrq_reboot_op);
+		register_sysrq_key('b', &srm_sysrq_reboot_op);
+	}
+#endif
+
+	/*
+	 * Identify and reconfigure for the current system.
+	 */
+	cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset);
+
+	get_sysnames(hwrpb->sys_type, hwrpb->sys_variation,
+		     cpu->type, &type_name, &var_name);
+	if (*var_name == '0')
+		var_name = "";
+
+	if (!vec) {
+		vec = get_sysvec(hwrpb->sys_type, hwrpb->sys_variation,
+				 cpu->type);
+	}
+
+	if (!vec) {
+		panic("Unsupported system type: %s%s%s (%ld %ld)\n",
+		      type_name, (*var_name ? " variation " : ""), var_name,
+		      hwrpb->sys_type, hwrpb->sys_variation);
+	}
+	if (vec != &alpha_mv) {
+		alpha_mv = *vec;
+	}
+	
+	printk("Booting "
+#ifdef CONFIG_ALPHA_GENERIC
+	       "GENERIC "
+#endif
+	       "on %s%s%s using machine vector %s from %s\n",
+	       type_name, (*var_name ? " variation " : ""),
+	       var_name, alpha_mv.vector_name,
+	       (alpha_using_srm ? "SRM" : "MILO"));
+
+	printk("Major Options: "
+#ifdef CONFIG_SMP
+	       "SMP "
+#endif
+#ifdef CONFIG_ALPHA_EV56
+	       "EV56 "
+#endif
+#ifdef CONFIG_ALPHA_EV67
+	       "EV67 "
+#endif
+#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS
+	       "LEGACY_START "
+#endif
+#ifdef CONFIG_VERBOSE_MCHECK
+	       "VERBOSE_MCHECK "
+#endif
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+	       "DEBUG_SPINLOCK "
+#endif
+#ifdef CONFIG_MAGIC_SYSRQ
+	       "MAGIC_SYSRQ "
+#endif
+	       "\n");
+
+	printk("Command line: %s\n", command_line);
+
+	/* 
+	 * Sync up the HAE.
+	 * Save the SRM's current value for restoration.
+	 */
+	srm_hae = *alpha_mv.hae_register;
+	__set_hae(alpha_mv.hae_cache);
+
+	/* Reset enable correctable error reports.  */
+	wrmces(0x7);
+
+	/* Find our memory.  */
+	setup_memory(kernel_end);
+	memblock_set_bottom_up(true);
+	sparse_init();
+
+	/* First guess at cpu cache sizes.  Do this before init_arch.  */
+	determine_cpu_caches(cpu->type);
+
+	/* Initialize the machine.  Usually has to do with setting up
+	   DMA windows and the like.  */
+	if (alpha_mv.init_arch)
+		alpha_mv.init_arch();
+
+	/* Reserve standard resources.  */
+	reserve_std_resources();
+
+	/* 
+	 * Give us a default console.  TGA users will see nothing until
+	 * chr_dev_init is called, rather late in the boot sequence.
+	 */
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+#endif
+#endif
+
+	/* Default root filesystem to sda2.  */
+	ROOT_DEV = Root_SDA2;
+
+#ifdef CONFIG_EISA
+	/* FIXME:  only set this when we actually have EISA in this box? */
+	EISA_bus = 1;
+#endif
+
+ 	/*
+	 * Check ASN in HWRPB for validity, report if bad.
+	 * FIXME: how was this failing?  Should we trust it instead,
+	 * and copy the value into alpha_mv.max_asn?
+ 	 */
+
+ 	if (hwrpb->max_asn != MAX_ASN) {
+		printk("Max ASN from HWRPB is bad (0x%lx)\n", hwrpb->max_asn);
+ 	}
+
+	/*
+	 * Identify the flock of penguins.
+	 */
+
+#ifdef CONFIG_SMP
+	setup_smp();
+#endif
+	paging_init();
+}
+
+static char sys_unknown[] = "Unknown";
+static char systype_names[][16] = {
+	"0",
+	"ADU", "Cobra", "Ruby", "Flamingo", "Mannequin", "Jensen",
+	"Pelican", "Morgan", "Sable", "Medulla", "Noname",
+	"Turbolaser", "Avanti", "Mustang", "Alcor", "Tradewind",
+	"Mikasa", "EB64", "EB66", "EB64+", "AlphaBook1",
+	"Rawhide", "K2", "Lynx", "XL", "EB164", "Noritake",
+	"Cortex", "29", "Miata", "XXM", "Takara", "Yukon",
+	"Tsunami", "Wildfire", "CUSCO", "Eiger", "Titan", "Marvel"
+};
+
+static char unofficial_names[][8] = {"100", "Ruffian"};
+
+static char api_names[][16] = {"200", "Nautilus"};
+
+static char eb164_names[][8] = {"EB164", "PC164", "LX164", "SX164", "RX164"};
+static int eb164_indices[] = {0,0,0,1,1,1,1,1,2,2,2,2,3,3,3,3,4};
+
+static char alcor_names[][16] = {"Alcor", "Maverick", "Bret"};
+static int alcor_indices[] = {0,0,0,1,1,1,0,0,0,0,0,0,2,2,2,2,2,2};
+
+static char eb64p_names[][16] = {"EB64+", "Cabriolet", "AlphaPCI64"};
+static int eb64p_indices[] = {0,0,1,2};
+
+static char eb66_names[][8] = {"EB66", "EB66+"};
+static int eb66_indices[] = {0,0,1};
+
+static char marvel_names[][16] = {
+	"Marvel/EV7"
+};
+static int marvel_indices[] = { 0 };
+
+static char rawhide_names[][16] = {
+	"Dodge", "Wrangler", "Durango", "Tincup", "DaVinci"
+};
+static int rawhide_indices[] = {0,0,0,1,1,2,2,3,3,4,4};
+
+static char titan_names[][16] = {
+	"DEFAULT", "Privateer", "Falcon", "Granite"
+};
+static int titan_indices[] = {0,1,2,2,3};
+
+static char tsunami_names[][16] = {
+	"0", "DP264", "Warhol", "Windjammer", "Monet", "Clipper",
+	"Goldrush", "Webbrick", "Catamaran", "Brisbane", "Melbourne",
+	"Flying Clipper", "Shark"
+};
+static int tsunami_indices[] = {0,1,2,3,4,5,6,7,8,9,10,11,12};
+
+static struct alpha_machine_vector * __init
+get_sysvec(unsigned long type, unsigned long variation, unsigned long cpu)
+{
+	static struct alpha_machine_vector *systype_vecs[] __initdata =
+	{
+		NULL,		/* 0 */
+		NULL,		/* ADU */
+		NULL,		/* Cobra */
+		NULL,		/* Ruby */
+		NULL,		/* Flamingo */
+		NULL,		/* Mannequin */
+		&jensen_mv,
+		NULL, 		/* Pelican */
+		NULL,		/* Morgan */
+		NULL,		/* Sable -- see below.  */
+		NULL,		/* Medulla */
+		&noname_mv,
+		NULL,		/* Turbolaser */
+		&avanti_mv,
+		NULL,		/* Mustang */
+		NULL,		/* Alcor, Bret, Maverick. HWRPB inaccurate? */
+		NULL,		/* Tradewind */
+		NULL,		/* Mikasa -- see below.  */
+		NULL,		/* EB64 */
+		NULL,		/* EB66 -- see variation.  */
+		NULL,		/* EB64+ -- see variation.  */
+		&alphabook1_mv,
+		&rawhide_mv,
+		NULL,		/* K2 */
+		&lynx_mv,	/* Lynx */
+		&xl_mv,
+		NULL,		/* EB164 -- see variation.  */
+		NULL,		/* Noritake -- see below.  */
+		NULL,		/* Cortex */
+		NULL,		/* 29 */
+		&miata_mv,
+		NULL,		/* XXM */
+		&takara_mv,
+		NULL,		/* Yukon */
+		NULL,		/* Tsunami -- see variation.  */
+		&wildfire_mv,	/* Wildfire */
+		NULL,		/* CUSCO */
+		&eiger_mv,	/* Eiger */
+		NULL,		/* Titan */
+		NULL,		/* Marvel */
+	};
+
+	static struct alpha_machine_vector *unofficial_vecs[] __initdata =
+	{
+		NULL,		/* 100 */
+		&ruffian_mv,
+	};
+
+	static struct alpha_machine_vector *api_vecs[] __initdata =
+	{
+		NULL,		/* 200 */
+		&nautilus_mv,
+	};
+
+	static struct alpha_machine_vector *alcor_vecs[] __initdata = 
+	{
+		&alcor_mv, &xlt_mv, &xlt_mv
+	};
+
+	static struct alpha_machine_vector *eb164_vecs[] __initdata =
+	{
+		&eb164_mv, &pc164_mv, &lx164_mv, &sx164_mv, &rx164_mv
+	};
+
+	static struct alpha_machine_vector *eb64p_vecs[] __initdata =
+	{
+		&eb64p_mv,
+		&cabriolet_mv,
+		&cabriolet_mv		/* AlphaPCI64 */
+	};
+
+	static struct alpha_machine_vector *eb66_vecs[] __initdata =
+	{
+		&eb66_mv,
+		&eb66p_mv
+	};
+
+	static struct alpha_machine_vector *marvel_vecs[] __initdata =
+	{
+		&marvel_ev7_mv,
+	};
+
+	static struct alpha_machine_vector *titan_vecs[] __initdata =
+	{
+		&titan_mv,		/* default   */
+		&privateer_mv,		/* privateer */
+		&titan_mv,		/* falcon    */
+		&privateer_mv,		/* granite   */
+	};
+
+	static struct alpha_machine_vector *tsunami_vecs[]  __initdata =
+	{
+		NULL,
+		&dp264_mv,		/* dp264 */
+		&dp264_mv,		/* warhol */
+		&dp264_mv,		/* windjammer */
+		&monet_mv,		/* monet */
+		&clipper_mv,		/* clipper */
+		&dp264_mv,		/* goldrush */
+		&webbrick_mv,		/* webbrick */
+		&dp264_mv,		/* catamaran */
+		NULL,			/* brisbane? */
+		NULL,			/* melbourne? */
+		NULL,			/* flying clipper? */
+		&shark_mv,		/* shark */
+	};
+
+	/* ??? Do we need to distinguish between Rawhides?  */
+
+	struct alpha_machine_vector *vec;
+
+	/* Search the system tables first... */
+	vec = NULL;
+	if (type < ARRAY_SIZE(systype_vecs)) {
+		vec = systype_vecs[type];
+	} else if ((type > ST_API_BIAS) &&
+		   (type - ST_API_BIAS) < ARRAY_SIZE(api_vecs)) {
+		vec = api_vecs[type - ST_API_BIAS];
+	} else if ((type > ST_UNOFFICIAL_BIAS) &&
+		   (type - ST_UNOFFICIAL_BIAS) < ARRAY_SIZE(unofficial_vecs)) {
+		vec = unofficial_vecs[type - ST_UNOFFICIAL_BIAS];
+	}
+
+	/* If we've not found one, try for a variation.  */
+
+	if (!vec) {
+		/* Member ID is a bit-field. */
+		unsigned long member = (variation >> 10) & 0x3f;
+
+		cpu &= 0xffffffff; /* make it usable */
+
+		switch (type) {
+		case ST_DEC_ALCOR:
+			if (member < ARRAY_SIZE(alcor_indices))
+				vec = alcor_vecs[alcor_indices[member]];
+			break;
+		case ST_DEC_EB164:
+			if (member < ARRAY_SIZE(eb164_indices))
+				vec = eb164_vecs[eb164_indices[member]];
+			/* PC164 may show as EB164 variation with EV56 CPU,
+			   but, since no true EB164 had anything but EV5... */
+			if (vec == &eb164_mv && cpu == EV56_CPU)
+				vec = &pc164_mv;
+			break;
+		case ST_DEC_EB64P:
+			if (member < ARRAY_SIZE(eb64p_indices))
+				vec = eb64p_vecs[eb64p_indices[member]];
+			break;
+		case ST_DEC_EB66:
+			if (member < ARRAY_SIZE(eb66_indices))
+				vec = eb66_vecs[eb66_indices[member]];
+			break;
+		case ST_DEC_MARVEL:
+			if (member < ARRAY_SIZE(marvel_indices))
+				vec = marvel_vecs[marvel_indices[member]];
+			break;
+		case ST_DEC_TITAN:
+			vec = titan_vecs[0];	/* default */
+			if (member < ARRAY_SIZE(titan_indices))
+				vec = titan_vecs[titan_indices[member]];
+			break;
+		case ST_DEC_TSUNAMI:
+			if (member < ARRAY_SIZE(tsunami_indices))
+				vec = tsunami_vecs[tsunami_indices[member]];
+			break;
+		case ST_DEC_1000:
+			if (cpu == EV5_CPU || cpu == EV56_CPU)
+				vec = &mikasa_primo_mv;
+			else
+				vec = &mikasa_mv;
+			break;
+		case ST_DEC_NORITAKE:
+			if (cpu == EV5_CPU || cpu == EV56_CPU)
+				vec = &noritake_primo_mv;
+			else
+				vec = &noritake_mv;
+			break;
+		case ST_DEC_2100_A500:
+			if (cpu == EV5_CPU || cpu == EV56_CPU)
+				vec = &sable_gamma_mv;
+			else
+				vec = &sable_mv;
+			break;
+		}
+	}
+	return vec;
+}
+
+static struct alpha_machine_vector * __init
+get_sysvec_byname(const char *name)
+{
+	static struct alpha_machine_vector *all_vecs[] __initdata =
+	{
+		&alcor_mv,
+		&alphabook1_mv,
+		&avanti_mv,
+		&cabriolet_mv,
+		&clipper_mv,
+		&dp264_mv,
+		&eb164_mv,
+		&eb64p_mv,
+		&eb66_mv,
+		&eb66p_mv,
+		&eiger_mv,
+		&jensen_mv,
+		&lx164_mv,
+		&lynx_mv,
+		&miata_mv,
+		&mikasa_mv,
+		&mikasa_primo_mv,
+		&monet_mv,
+		&nautilus_mv,
+		&noname_mv,
+		&noritake_mv,
+		&noritake_primo_mv,
+		&p2k_mv,
+		&pc164_mv,
+		&privateer_mv,
+		&rawhide_mv,
+		&ruffian_mv,
+		&rx164_mv,
+		&sable_mv,
+		&sable_gamma_mv,
+		&shark_mv,
+		&sx164_mv,
+		&takara_mv,
+		&webbrick_mv,
+		&wildfire_mv,
+		&xl_mv,
+		&xlt_mv
+	};
+
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(all_vecs); ++i) {
+		struct alpha_machine_vector *mv = all_vecs[i];
+		if (strcasecmp(mv->vector_name, name) == 0)
+			return mv;
+	}
+	return NULL;
+}
+
+static void
+get_sysnames(unsigned long type, unsigned long variation, unsigned long cpu,
+	     char **type_name, char **variation_name)
+{
+	unsigned long member;
+
+	/* If not in the tables, make it UNKNOWN,
+	   else set type name to family */
+	if (type < ARRAY_SIZE(systype_names)) {
+		*type_name = systype_names[type];
+	} else if ((type > ST_API_BIAS) &&
+		   (type - ST_API_BIAS) < ARRAY_SIZE(api_names)) {
+		*type_name = api_names[type - ST_API_BIAS];
+	} else if ((type > ST_UNOFFICIAL_BIAS) &&
+		   (type - ST_UNOFFICIAL_BIAS) < ARRAY_SIZE(unofficial_names)) {
+		*type_name = unofficial_names[type - ST_UNOFFICIAL_BIAS];
+	} else {
+		*type_name = sys_unknown;
+		*variation_name = sys_unknown;
+		return;
+	}
+
+	/* Set variation to "0"; if variation is zero, done.  */
+	*variation_name = systype_names[0];
+	if (variation == 0) {
+		return;
+	}
+
+	member = (variation >> 10) & 0x3f; /* member ID is a bit-field */
+
+	cpu &= 0xffffffff; /* make it usable */
+
+	switch (type) { /* select by family */
+	default: /* default to variation "0" for now */
+		break;
+	case ST_DEC_EB164:
+		if (member >= ARRAY_SIZE(eb164_indices))
+			break;
+		*variation_name = eb164_names[eb164_indices[member]];
+		/* PC164 may show as EB164 variation, but with EV56 CPU,
+		   so, since no true EB164 had anything but EV5... */
+		if (eb164_indices[member] == 0 && cpu == EV56_CPU)
+			*variation_name = eb164_names[1]; /* make it PC164 */
+		break;
+	case ST_DEC_ALCOR:
+		if (member < ARRAY_SIZE(alcor_indices))
+			*variation_name = alcor_names[alcor_indices[member]];
+		break;
+	case ST_DEC_EB64P:
+		if (member < ARRAY_SIZE(eb64p_indices))
+			*variation_name = eb64p_names[eb64p_indices[member]];
+		break;
+	case ST_DEC_EB66:
+		if (member < ARRAY_SIZE(eb66_indices))
+			*variation_name = eb66_names[eb66_indices[member]];
+		break;
+	case ST_DEC_MARVEL:
+		if (member < ARRAY_SIZE(marvel_indices))
+			*variation_name = marvel_names[marvel_indices[member]];
+		break;
+	case ST_DEC_RAWHIDE:
+		if (member < ARRAY_SIZE(rawhide_indices))
+			*variation_name = rawhide_names[rawhide_indices[member]];
+		break;
+	case ST_DEC_TITAN:
+		*variation_name = titan_names[0];	/* default */
+		if (member < ARRAY_SIZE(titan_indices))
+			*variation_name = titan_names[titan_indices[member]];
+		break;
+	case ST_DEC_TSUNAMI:
+		if (member < ARRAY_SIZE(tsunami_indices))
+			*variation_name = tsunami_names[tsunami_indices[member]];
+		break;
+	}
+}
+
+/*
+ * A change was made to the HWRPB via an ECO and the following code
+ * tracks a part of the ECO.  In HWRPB versions less than 5, the ECO
+ * was not implemented in the console firmware.  If it's revision 5 or
+ * greater we can get the name of the platform as an ASCII string from
+ * the HWRPB.  That's what this function does.  It checks the revision
+ * level and if the string is in the HWRPB it returns the address of
+ * the string--a pointer to the name of the platform.
+ *
+ * Returns:
+ *      - Pointer to a ASCII string if it's in the HWRPB
+ *      - Pointer to a blank string if the data is not in the HWRPB.
+ */
+
+static char *
+platform_string(void)
+{
+	struct dsr_struct *dsr;
+	static char unk_system_string[] = "N/A";
+
+	/* Go to the console for the string pointer.
+	 * If the rpb_vers is not 5 or greater the rpb
+	 * is old and does not have this data in it.
+	 */
+	if (hwrpb->revision < 5)
+		return (unk_system_string);
+	else {
+		/* The Dynamic System Recognition struct
+		 * has the system platform name starting
+		 * after the character count of the string.
+		 */
+		dsr =  ((struct dsr_struct *)
+			((char *)hwrpb + hwrpb->dsr_offset));
+		return ((char *)dsr + (dsr->sysname_off +
+				       sizeof(long)));
+	}
+}
+
+static int
+get_nr_processors(struct percpu_struct *cpubase, unsigned long num)
+{
+	struct percpu_struct *cpu;
+	unsigned long i;
+	int count = 0;
+
+	for (i = 0; i < num; i++) {
+		cpu = (struct percpu_struct *)
+			((char *)cpubase + i*hwrpb->processor_size);
+		if ((cpu->flags & 0x1cc) == 0x1cc)
+			count++;
+	}
+	return count;
+}
+
+static void
+show_cache_size (struct seq_file *f, const char *which, int shape)
+{
+	if (shape == -1)
+		seq_printf (f, "%s\t\t: n/a\n", which);
+	else if (shape == 0)
+		seq_printf (f, "%s\t\t: unknown\n", which);
+	else
+		seq_printf (f, "%s\t\t: %dK, %d-way, %db line\n",
+			    which, shape >> 10, shape & 15,
+			    1 << ((shape >> 4) & 15));
+}
+
+static int
+show_cpuinfo(struct seq_file *f, void *slot)
+{
+	extern struct unaligned_stat {
+		unsigned long count, va, pc;
+	} unaligned[2];
+
+	static char cpu_names[][8] = {
+		"EV3", "EV4", "Simulate", "LCA4", "EV5", "EV45", "EV56",
+		"EV6", "PCA56", "PCA57", "EV67", "EV68CB", "EV68AL",
+		"EV68CX", "EV7", "EV79", "EV69"
+	};
+
+	struct percpu_struct *cpu = slot;
+	unsigned int cpu_index;
+	char *cpu_name;
+	char *systype_name;
+	char *sysvariation_name;
+	int nr_processors;
+	unsigned long timer_freq;
+
+	cpu_index = (unsigned) (cpu->type - 1);
+	cpu_name = "Unknown";
+	if (cpu_index < ARRAY_SIZE(cpu_names))
+		cpu_name = cpu_names[cpu_index];
+
+	get_sysnames(hwrpb->sys_type, hwrpb->sys_variation,
+		     cpu->type, &systype_name, &sysvariation_name);
+
+	nr_processors = get_nr_processors(cpu, hwrpb->nr_processors);
+
+#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200
+	timer_freq = (100UL * hwrpb->intr_freq) / 4096;
+#else
+	timer_freq = 100UL * CONFIG_HZ;
+#endif
+
+	seq_printf(f, "cpu\t\t\t: Alpha\n"
+		      "cpu model\t\t: %s\n"
+		      "cpu variation\t\t: %ld\n"
+		      "cpu revision\t\t: %ld\n"
+		      "cpu serial number\t: %s\n"
+		      "system type\t\t: %s\n"
+		      "system variation\t: %s\n"
+		      "system revision\t\t: %ld\n"
+		      "system serial number\t: %s\n"
+		      "cycle frequency [Hz]\t: %lu %s\n"
+		      "timer frequency [Hz]\t: %lu.%02lu\n"
+		      "page size [bytes]\t: %ld\n"
+		      "phys. address bits\t: %ld\n"
+		      "max. addr. space #\t: %ld\n"
+		      "BogoMIPS\t\t: %lu.%02lu\n"
+		      "kernel unaligned acc\t: %ld (pc=%lx,va=%lx)\n"
+		      "user unaligned acc\t: %ld (pc=%lx,va=%lx)\n"
+		      "platform string\t\t: %s\n"
+		      "cpus detected\t\t: %d\n",
+		       cpu_name, cpu->variation, cpu->revision,
+		       (char*)cpu->serial_no,
+		       systype_name, sysvariation_name, hwrpb->sys_revision,
+		       (char*)hwrpb->ssn,
+		       est_cycle_freq ? : hwrpb->cycle_freq,
+		       est_cycle_freq ? "est." : "",
+		       timer_freq / 100, timer_freq % 100,
+		       hwrpb->pagesize,
+		       hwrpb->pa_bits,
+		       hwrpb->max_asn,
+		       loops_per_jiffy / (500000/HZ),
+		       (loops_per_jiffy / (5000/HZ)) % 100,
+		       unaligned[0].count, unaligned[0].pc, unaligned[0].va,
+		       unaligned[1].count, unaligned[1].pc, unaligned[1].va,
+		       platform_string(), nr_processors);
+
+#ifdef CONFIG_SMP
+	seq_printf(f, "cpus active\t\t: %u\n"
+		      "cpu active mask\t\t: %016lx\n",
+		       num_online_cpus(), cpumask_bits(cpu_possible_mask)[0]);
+#endif
+
+	show_cache_size (f, "L1 Icache", alpha_l1i_cacheshape);
+	show_cache_size (f, "L1 Dcache", alpha_l1d_cacheshape);
+	show_cache_size (f, "L2 cache", alpha_l2_cacheshape);
+	show_cache_size (f, "L3 cache", alpha_l3_cacheshape);
+
+	return 0;
+}
+
+static int __init
+read_mem_block(int *addr, int stride, int size)
+{
+	long nloads = size / stride, cnt, tmp;
+
+	__asm__ __volatile__(
+	"	rpcc    %0\n"
+	"1:	ldl	%3,0(%2)\n"
+	"	subq	%1,1,%1\n"
+	/* Next two XORs introduce an explicit data dependency between
+	   consecutive loads in the loop, which will give us true load
+	   latency. */
+	"	xor	%3,%2,%2\n"
+	"	xor	%3,%2,%2\n"
+	"	addq	%2,%4,%2\n"
+	"	bne	%1,1b\n"
+	"	rpcc	%3\n"
+	"	subl	%3,%0,%0\n"
+	: "=&r" (cnt), "=&r" (nloads), "=&r" (addr), "=&r" (tmp)
+	: "r" (stride), "1" (nloads), "2" (addr));
+
+	return cnt / (size / stride);
+}
+
+#define CSHAPE(totalsize, linesize, assoc) \
+  ((totalsize & ~0xff) | (linesize << 4) | assoc)
+
+/* ??? EV5 supports up to 64M, but did the systems with more than
+   16M of BCACHE ever exist? */
+#define MAX_BCACHE_SIZE	16*1024*1024
+
+/* Note that the offchip caches are direct mapped on all Alphas. */
+static int __init
+external_cache_probe(int minsize, int width)
+{
+	int cycles, prev_cycles = 1000000;
+	int stride = 1 << width;
+	long size = minsize, maxsize = MAX_BCACHE_SIZE * 2;
+
+	if (maxsize > (max_low_pfn + 1) << PAGE_SHIFT)
+		maxsize = 1 << (ilog2(max_low_pfn + 1) + PAGE_SHIFT);
+
+	/* Get the first block cached. */
+	read_mem_block(__va(0), stride, size);
+
+	while (size < maxsize) {
+		/* Get an average load latency in cycles. */
+		cycles = read_mem_block(__va(0), stride, size);
+		if (cycles > prev_cycles * 2) {
+			/* Fine, we exceed the cache. */
+			printk("%ldK Bcache detected; load hit latency %d "
+			       "cycles, load miss latency %d cycles\n",
+			       size >> 11, prev_cycles, cycles);
+			return CSHAPE(size >> 1, width, 1);
+		}
+		/* Try to get the next block cached. */
+		read_mem_block(__va(size), stride, size);
+		prev_cycles = cycles;
+		size <<= 1;
+	}
+	return -1;	/* No BCACHE found. */
+}
+
+static void __init
+determine_cpu_caches (unsigned int cpu_type)
+{
+	int L1I, L1D, L2, L3;
+
+	switch (cpu_type) {
+	case EV4_CPU:
+	case EV45_CPU:
+	  {
+		if (cpu_type == EV4_CPU)
+			L1I = CSHAPE(8*1024, 5, 1);
+		else
+			L1I = CSHAPE(16*1024, 5, 1);
+		L1D = L1I;
+		L3 = -1;
+	
+		/* BIU_CTL is a write-only Abox register.  PALcode has a
+		   shadow copy, and may be available from some versions
+		   of the CSERVE PALcall.  If we can get it, then
+
+			unsigned long biu_ctl, size;
+			size = 128*1024 * (1 << ((biu_ctl >> 28) & 7));
+			L2 = CSHAPE (size, 5, 1);
+
+		   Unfortunately, we can't rely on that.
+		*/
+		L2 = external_cache_probe(128*1024, 5);
+		break;
+	  }
+
+	case LCA4_CPU:
+	  {
+		unsigned long car, size;
+
+		L1I = L1D = CSHAPE(8*1024, 5, 1);
+		L3 = -1;
+
+		car = *(vuip) phys_to_virt (0x120000078UL);
+		size = 64*1024 * (1 << ((car >> 5) & 7));
+		/* No typo -- 8 byte cacheline size.  Whodathunk.  */
+		L2 = (car & 1 ? CSHAPE (size, 3, 1) : -1);
+		break;
+	  }
+
+	case EV5_CPU:
+	case EV56_CPU:
+	  {
+		unsigned long sc_ctl, width;
+
+		L1I = L1D = CSHAPE(8*1024, 5, 1);
+
+		/* Check the line size of the Scache.  */
+		sc_ctl = *(vulp) phys_to_virt (0xfffff000a8UL);
+		width = sc_ctl & 0x1000 ? 6 : 5;
+		L2 = CSHAPE (96*1024, width, 3);
+
+		/* BC_CONTROL and BC_CONFIG are write-only IPRs.  PALcode
+		   has a shadow copy, and may be available from some versions
+		   of the CSERVE PALcall.  If we can get it, then
+
+			unsigned long bc_control, bc_config, size;
+			size = 1024*1024 * (1 << ((bc_config & 7) - 1));
+			L3 = (bc_control & 1 ? CSHAPE (size, width, 1) : -1);
+
+		   Unfortunately, we can't rely on that.
+		*/
+		L3 = external_cache_probe(1024*1024, width);
+		break;
+	  }
+
+	case PCA56_CPU:
+	case PCA57_CPU:
+	  {
+		if (cpu_type == PCA56_CPU) {
+			L1I = CSHAPE(16*1024, 6, 1);
+			L1D = CSHAPE(8*1024, 5, 1);
+		} else {
+			L1I = CSHAPE(32*1024, 6, 2);
+			L1D = CSHAPE(16*1024, 5, 1);
+		}
+		L3 = -1;
+
+#if 0
+		unsigned long cbox_config, size;
+
+		cbox_config = *(vulp) phys_to_virt (0xfffff00008UL);
+		size = 512*1024 * (1 << ((cbox_config >> 12) & 3));
+
+		L2 = ((cbox_config >> 31) & 1 ? CSHAPE (size, 6, 1) : -1);
+#else
+		L2 = external_cache_probe(512*1024, 6);
+#endif
+		break;
+	  }
+
+	case EV6_CPU:
+	case EV67_CPU:
+	case EV68CB_CPU:
+	case EV68AL_CPU:
+	case EV68CX_CPU:
+	case EV69_CPU:
+		L1I = L1D = CSHAPE(64*1024, 6, 2);
+		L2 = external_cache_probe(1024*1024, 6);
+		L3 = -1;
+		break;
+
+	case EV7_CPU:
+	case EV79_CPU:
+		L1I = L1D = CSHAPE(64*1024, 6, 2);
+		L2 = CSHAPE(7*1024*1024/4, 6, 7);
+		L3 = -1;
+		break;
+
+	default:
+		/* Nothing known about this cpu type.  */
+		L1I = L1D = L2 = L3 = 0;
+		break;
+	}
+
+	alpha_l1i_cacheshape = L1I;
+	alpha_l1d_cacheshape = L1D;
+	alpha_l2_cacheshape = L2;
+	alpha_l3_cacheshape = L3;
+}
+
+/*
+ * We show only CPU #0 info.
+ */
+static void *
+c_start(struct seq_file *f, loff_t *pos)
+{
+	return *pos ? NULL : (char *)hwrpb + hwrpb->processor_offset;
+}
+
+static void *
+c_next(struct seq_file *f, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void
+c_stop(struct seq_file *f, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
+
+
+static int
+alpha_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+#if 1
+	/* FIXME FIXME FIXME */
+	/* If we are using SRM and serial console, just hard halt here. */
+	if (alpha_using_srm && srmcons_output)
+		__halt();
+#endif
+        return NOTIFY_DONE;
+}
+
+static __init int add_pcspkr(void)
+{
+	struct platform_device *pd;
+	int ret;
+
+	pd = platform_device_alloc("pcspkr", -1);
+	if (!pd)
+		return -ENOMEM;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		platform_device_put(pd);
+
+	return ret;
+}
+device_initcall(add_pcspkr);
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
new file mode 100644
index 0000000000..d8ed71d5be
--- /dev/null
+++ b/arch/alpha/kernel/signal.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/kernel/signal.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/bitops.h>
+#include <linux/syscalls.h>
+#include <linux/tracehook.h>
+
+#include <linux/uaccess.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+
+#include "proto.h"
+
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+asmlinkage void ret_from_sys_call(void);
+
+/*
+ * The OSF/1 sigprocmask calling sequence is different from the
+ * C sigprocmask() sequence..
+ */
+SYSCALL_DEFINE2(osf_sigprocmask, int, how, unsigned long, newmask)
+{
+	sigset_t oldmask;
+	sigset_t mask;
+	unsigned long res;
+
+	siginitset(&mask, newmask & _BLOCKABLE);
+	res = sigprocmask(how, &mask, &oldmask);
+	if (!res) {
+		force_successful_syscall_return();
+		res = oldmask.sig[0];
+	}
+	return res;
+}
+
+SYSCALL_DEFINE3(osf_sigaction, int, sig,
+		const struct osf_sigaction __user *, act,
+		struct osf_sigaction __user *, oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (!access_ok(act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+		    __get_user(mask, &act->sa_mask))
+			return -EFAULT;
+		siginitset(&new_ka.sa.sa_mask, mask);
+		new_ka.ka_restorer = NULL;
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+			return -EFAULT;
+	}
+
+	return ret;
+}
+
+SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
+		struct sigaction __user *, oact,
+		size_t, sigsetsize, void __user *, restorer)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (act) {
+		new_ka.ka_restorer = restorer;
+		if (copy_from_user(&new_ka.sa, act, sizeof(*act)))
+			return -EFAULT;
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (copy_to_user(oact, &old_ka.sa, sizeof(*oact)))
+			return -EFAULT;
+	}
+
+	return ret;
+}
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+#if _NSIG_WORDS > 1
+# error "Non SA_SIGINFO frame needs rearranging"
+#endif
+
+struct sigframe
+{
+	struct sigcontext sc;
+	unsigned int retcode[3];
+};
+
+struct rt_sigframe
+{
+	struct siginfo info;
+	struct ucontext uc;
+	unsigned int retcode[3];
+};
+
+/* If this changes, userland unwinders that Know Things about our signal
+   frame will break.  Do not undertake lightly.  It also implies an ABI
+   change wrt the size of siginfo_t, which may cause some pain.  */
+extern char compile_time_assert
+        [offsetof(struct rt_sigframe, uc.uc_mcontext) == 176 ? 1 : -1];
+
+#define INSN_MOV_R30_R16	0x47fe0410
+#define INSN_LDI_R0		0x201f0000
+#define INSN_CALLSYS		0x00000083
+
+static long
+restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
+{
+	unsigned long usp;
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	long i, err = __get_user(regs->pc, &sc->sc_pc);
+
+	current->restart_block.fn = do_no_restart_syscall;
+
+	sw->r26 = (unsigned long) ret_from_sys_call;
+
+	err |= __get_user(regs->r0, sc->sc_regs+0);
+	err |= __get_user(regs->r1, sc->sc_regs+1);
+	err |= __get_user(regs->r2, sc->sc_regs+2);
+	err |= __get_user(regs->r3, sc->sc_regs+3);
+	err |= __get_user(regs->r4, sc->sc_regs+4);
+	err |= __get_user(regs->r5, sc->sc_regs+5);
+	err |= __get_user(regs->r6, sc->sc_regs+6);
+	err |= __get_user(regs->r7, sc->sc_regs+7);
+	err |= __get_user(regs->r8, sc->sc_regs+8);
+	err |= __get_user(sw->r9, sc->sc_regs+9);
+	err |= __get_user(sw->r10, sc->sc_regs+10);
+	err |= __get_user(sw->r11, sc->sc_regs+11);
+	err |= __get_user(sw->r12, sc->sc_regs+12);
+	err |= __get_user(sw->r13, sc->sc_regs+13);
+	err |= __get_user(sw->r14, sc->sc_regs+14);
+	err |= __get_user(sw->r15, sc->sc_regs+15);
+	err |= __get_user(regs->r16, sc->sc_regs+16);
+	err |= __get_user(regs->r17, sc->sc_regs+17);
+	err |= __get_user(regs->r18, sc->sc_regs+18);
+	err |= __get_user(regs->r19, sc->sc_regs+19);
+	err |= __get_user(regs->r20, sc->sc_regs+20);
+	err |= __get_user(regs->r21, sc->sc_regs+21);
+	err |= __get_user(regs->r22, sc->sc_regs+22);
+	err |= __get_user(regs->r23, sc->sc_regs+23);
+	err |= __get_user(regs->r24, sc->sc_regs+24);
+	err |= __get_user(regs->r25, sc->sc_regs+25);
+	err |= __get_user(regs->r26, sc->sc_regs+26);
+	err |= __get_user(regs->r27, sc->sc_regs+27);
+	err |= __get_user(regs->r28, sc->sc_regs+28);
+	err |= __get_user(regs->gp, sc->sc_regs+29);
+	err |= __get_user(usp, sc->sc_regs+30);
+	wrusp(usp);
+
+	for (i = 0; i < 31; i++)
+		err |= __get_user(sw->fp[i], sc->sc_fpregs+i);
+	err |= __get_user(sw->fp[31], &sc->sc_fpcr);
+
+	return err;
+}
+
+/* Note that this syscall is also used by setcontext(3) to install
+   a given sigcontext.  This because it's impossible to set *all*
+   registers and transfer control from userland.  */
+
+asmlinkage void
+do_sigreturn(struct sigcontext __user *sc)
+{
+	struct pt_regs *regs = current_pt_regs();
+	sigset_t set;
+
+	/* Verify that it's a good sigcontext before using it */
+	if (!access_ok(sc, sizeof(*sc)))
+		goto give_sigsegv;
+	if (__get_user(set.sig[0], &sc->sc_mask))
+		goto give_sigsegv;
+
+	set_current_blocked(&set);
+
+	if (restore_sigcontext(sc, regs))
+		goto give_sigsegv;
+
+	/* Send SIGTRAP if we're single-stepping: */
+	if (ptrace_cancel_bpt (current)) {
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc,
+			       current);
+	}
+	return;
+
+give_sigsegv:
+	force_sig(SIGSEGV);
+}
+
+asmlinkage void
+do_rt_sigreturn(struct rt_sigframe __user *frame)
+{
+	struct pt_regs *regs = current_pt_regs();
+	sigset_t set;
+
+	/* Verify that it's a good ucontext_t before using it */
+	if (!access_ok(&frame->uc, sizeof(frame->uc)))
+		goto give_sigsegv;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto give_sigsegv;
+
+	set_current_blocked(&set);
+
+	if (restore_sigcontext(&frame->uc.uc_mcontext, regs))
+		goto give_sigsegv;
+
+	/* Send SIGTRAP if we're single-stepping: */
+	if (ptrace_cancel_bpt (current)) {
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc,
+			       current);
+	}
+	return;
+
+give_sigsegv:
+	force_sig(SIGSEGV);
+}
+
+
+/*
+ * Set up a signal frame.
+ */
+
+static inline void __user *
+get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size)
+{
+	return (void __user *)((sigsp(sp, ksig) - frame_size) & -32ul);
+}
+
+static long
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, 
+		 unsigned long mask, unsigned long sp)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	long i, err = 0;
+
+	err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack);
+	err |= __put_user(mask, &sc->sc_mask);
+	err |= __put_user(regs->pc, &sc->sc_pc);
+	err |= __put_user(8, &sc->sc_ps);
+
+	err |= __put_user(regs->r0 , sc->sc_regs+0);
+	err |= __put_user(regs->r1 , sc->sc_regs+1);
+	err |= __put_user(regs->r2 , sc->sc_regs+2);
+	err |= __put_user(regs->r3 , sc->sc_regs+3);
+	err |= __put_user(regs->r4 , sc->sc_regs+4);
+	err |= __put_user(regs->r5 , sc->sc_regs+5);
+	err |= __put_user(regs->r6 , sc->sc_regs+6);
+	err |= __put_user(regs->r7 , sc->sc_regs+7);
+	err |= __put_user(regs->r8 , sc->sc_regs+8);
+	err |= __put_user(sw->r9   , sc->sc_regs+9);
+	err |= __put_user(sw->r10  , sc->sc_regs+10);
+	err |= __put_user(sw->r11  , sc->sc_regs+11);
+	err |= __put_user(sw->r12  , sc->sc_regs+12);
+	err |= __put_user(sw->r13  , sc->sc_regs+13);
+	err |= __put_user(sw->r14  , sc->sc_regs+14);
+	err |= __put_user(sw->r15  , sc->sc_regs+15);
+	err |= __put_user(regs->r16, sc->sc_regs+16);
+	err |= __put_user(regs->r17, sc->sc_regs+17);
+	err |= __put_user(regs->r18, sc->sc_regs+18);
+	err |= __put_user(regs->r19, sc->sc_regs+19);
+	err |= __put_user(regs->r20, sc->sc_regs+20);
+	err |= __put_user(regs->r21, sc->sc_regs+21);
+	err |= __put_user(regs->r22, sc->sc_regs+22);
+	err |= __put_user(regs->r23, sc->sc_regs+23);
+	err |= __put_user(regs->r24, sc->sc_regs+24);
+	err |= __put_user(regs->r25, sc->sc_regs+25);
+	err |= __put_user(regs->r26, sc->sc_regs+26);
+	err |= __put_user(regs->r27, sc->sc_regs+27);
+	err |= __put_user(regs->r28, sc->sc_regs+28);
+	err |= __put_user(regs->gp , sc->sc_regs+29);
+	err |= __put_user(sp, sc->sc_regs+30);
+	err |= __put_user(0, sc->sc_regs+31);
+
+	for (i = 0; i < 31; i++)
+		err |= __put_user(sw->fp[i], sc->sc_fpregs+i);
+	err |= __put_user(0, sc->sc_fpregs+31);
+	err |= __put_user(sw->fp[31], &sc->sc_fpcr);
+
+	err |= __put_user(regs->trap_a0, &sc->sc_traparg_a0);
+	err |= __put_user(regs->trap_a1, &sc->sc_traparg_a1);
+	err |= __put_user(regs->trap_a2, &sc->sc_traparg_a2);
+
+	return err;
+}
+
+static int
+setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	unsigned long oldsp, r26, err = 0;
+	struct sigframe __user *frame;
+
+	oldsp = rdusp();
+	frame = get_sigframe(ksig, oldsp, sizeof(*frame));
+	if (!access_ok(frame, sizeof(*frame)))
+		return -EFAULT;
+
+	err |= setup_sigcontext(&frame->sc, regs, set->sig[0], oldsp);
+	if (err)
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	r26 = (unsigned long) ksig->ka.ka_restorer;
+	if (!r26) {
+		err |= __put_user(INSN_MOV_R30_R16, frame->retcode+0);
+		err |= __put_user(INSN_LDI_R0+__NR_sigreturn, frame->retcode+1);
+		err |= __put_user(INSN_CALLSYS, frame->retcode+2);
+		imb();
+		r26 = (unsigned long) frame->retcode;
+	}
+
+	/* Check that everything was written properly.  */
+	if (err)
+		return err;
+
+	/* "Return" to the handler */
+	regs->r26 = r26;
+	regs->r27 = regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->r16 = ksig->sig;			/* a0: signal number */
+	regs->r17 = 0;				/* a1: exception code */
+	regs->r18 = (unsigned long) &frame->sc;	/* a2: sigcontext pointer */
+	wrusp((unsigned long) frame);
+	
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+		current->comm, current->pid, frame, regs->pc, regs->r26);
+#endif
+	return 0;
+}
+
+static int
+setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	unsigned long oldsp, r26, err = 0;
+	struct rt_sigframe __user *frame;
+
+	oldsp = rdusp();
+	frame = get_sigframe(ksig, oldsp, sizeof(*frame));
+	if (!access_ok(frame, sizeof(*frame)))
+		return -EFAULT;
+
+	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user(set->sig[0], &frame->uc.uc_osf_sigmask);
+	err |= __save_altstack(&frame->uc.uc_stack, oldsp);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, 
+				set->sig[0], oldsp);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	r26 = (unsigned long) ksig->ka.ka_restorer;
+	if (!r26) {
+		err |= __put_user(INSN_MOV_R30_R16, frame->retcode+0);
+		err |= __put_user(INSN_LDI_R0+__NR_rt_sigreturn,
+				  frame->retcode+1);
+		err |= __put_user(INSN_CALLSYS, frame->retcode+2);
+		imb();
+		r26 = (unsigned long) frame->retcode;
+	}
+
+	if (err)
+		return -EFAULT;
+
+	/* "Return" to the handler */
+	regs->r26 = r26;
+	regs->r27 = regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->r16 = ksig->sig;			  /* a0: signal number */
+	regs->r17 = (unsigned long) &frame->info; /* a1: siginfo pointer */
+	regs->r18 = (unsigned long) &frame->uc;	  /* a2: ucontext pointer */
+	wrusp((unsigned long) frame);
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+		current->comm, current->pid, frame, regs->pc, regs->r26);
+#endif
+
+	return 0;
+}
+
+
+/*
+ * OK, we're invoking a handler.
+ */
+static inline void
+handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int ret;
+
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame(ksig, oldset, regs);
+	else
+		ret = setup_frame(ksig, oldset, regs);
+
+	signal_setup_done(ret, ksig, 0);
+}
+
+static inline void
+syscall_restart(unsigned long r0, unsigned long r19,
+		struct pt_regs *regs, struct k_sigaction *ka)
+{
+	switch (regs->r0) {
+	case ERESTARTSYS:
+		if (!(ka->sa.sa_flags & SA_RESTART)) {
+		case ERESTARTNOHAND:
+			regs->r0 = EINTR;
+			break;
+		}
+		fallthrough;
+	case ERESTARTNOINTR:
+		regs->r0 = r0;	/* reset v0 and a3 and replay syscall */
+		regs->r19 = r19;
+		regs->pc -= 4;
+		break;
+	case ERESTART_RESTARTBLOCK:
+		regs->r0 = EINTR;
+		break;
+	}
+}
+
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ *
+ * "r0" and "r19" are the registers we need to restore for system call
+ * restart. "r0" is also used as an indicator whether we can restart at
+ * all (if we get here from anything but a syscall return, it will be 0)
+ */
+static void
+do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19)
+{
+	unsigned long single_stepping = ptrace_cancel_bpt(current);
+	struct ksignal ksig;
+
+	/* This lets the debugger run, ... */
+	if (get_signal(&ksig)) {
+		/* ... so re-check the single stepping. */
+		single_stepping |= ptrace_cancel_bpt(current);
+		/* Whee!  Actually deliver the signal.  */
+		if (r0)
+			syscall_restart(r0, r19, regs, &ksig.ka);
+		handle_signal(&ksig, regs);
+	} else {
+		single_stepping |= ptrace_cancel_bpt(current);
+		if (r0) {
+			switch (regs->r0) {
+			case ERESTARTNOHAND:
+			case ERESTARTSYS:
+			case ERESTARTNOINTR:
+				/* Reset v0 and a3 and replay syscall.  */
+				regs->r0 = r0;
+				regs->r19 = r19;
+				regs->pc -= 4;
+				break;
+			case ERESTART_RESTARTBLOCK:
+				/* Set v0 to the restart_syscall and replay */
+				regs->r0 = __NR_restart_syscall;
+				regs->pc -= 4;
+				break;
+			}
+		}
+		restore_saved_sigmask();
+	}
+	if (single_stepping)
+		ptrace_set_bpt(current);	/* re-set breakpoint */
+}
+
+void
+do_work_pending(struct pt_regs *regs, unsigned long thread_flags,
+		 unsigned long r0, unsigned long r19)
+{
+	do {
+		if (thread_flags & _TIF_NEED_RESCHED) {
+			schedule();
+		} else {
+			local_irq_enable();
+			if (thread_flags & (_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL)) {
+				do_signal(regs, r0, r19);
+				r0 = 0;
+			} else {
+				tracehook_notify_resume(regs);
+			}
+		}
+		local_irq_disable();
+		thread_flags = read_thread_flags();
+	} while (thread_flags & _TIF_WORK_MASK);
+}
diff --git a/arch/alpha/kernel/smc37c669.c b/arch/alpha/kernel/smc37c669.c
new file mode 100644
index 0000000000..bbbd34586d
--- /dev/null
+++ b/arch/alpha/kernel/smc37c669.c
@@ -0,0 +1,2537 @@
+/*
+ * SMC 37C669 initialization code
+ */
+#include <linux/kernel.h>
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+
+#include <asm/hwrpb.h>
+#include <asm/io.h>
+
+#if 0
+# define DBG_DEVS(args)         printk args
+#else
+# define DBG_DEVS(args)
+#endif
+
+#define KB              1024
+#define MB              (1024*KB)
+#define GB              (1024*MB)
+
+#define SMC_DEBUG   0
+
+/* File:	smcc669_def.h
+ *
+ * Copyright (C) 1997 by
+ * Digital Equipment Corporation, Maynard, Massachusetts.
+ * All rights reserved.
+ *
+ * This software is furnished under a license and may be used and copied
+ * only  in  accordance  of  the  terms  of  such  license  and with the
+ * inclusion of the above copyright notice. This software or  any  other
+ * copies thereof may not be provided or otherwise made available to any
+ * other person.  No title to and  ownership of the  software is  hereby
+ * transferred.
+ *
+ * The information in this software is  subject to change without notice
+ * and  should  not  be  construed  as a commitment by Digital Equipment
+ * Corporation.
+ *
+ * Digital assumes no responsibility for the use  or  reliability of its
+ * software on equipment which is not supplied by Digital.
+ *
+ *
+ * Abstract:	
+ *
+ *	This file contains header definitions for the SMC37c669 
+ *	Super I/O controller. 
+ *
+ * Author:	
+ *
+ *	Eric Rasmussen
+ *
+ * Modification History:
+ *
+ *	er	28-Jan-1997	Initial Entry
+ */
+
+#ifndef __SMC37c669_H
+#define __SMC37c669_H
+
+/*
+** Macros for handling device IRQs
+**
+** The mask acts as a flag used in mapping actual ISA IRQs (0 - 15) 
+** to device IRQs (A - H).
+*/
+#define SMC37c669_DEVICE_IRQ_MASK	0x80000000
+#define SMC37c669_DEVICE_IRQ( __i )	\
+	((SMC37c669_DEVICE_IRQ_MASK) | (__i))
+#define SMC37c669_IS_DEVICE_IRQ(__i)	\
+	(((__i) & (SMC37c669_DEVICE_IRQ_MASK)) == (SMC37c669_DEVICE_IRQ_MASK))
+#define SMC37c669_RAW_DEVICE_IRQ(__i)	\
+	((__i) & ~(SMC37c669_DEVICE_IRQ_MASK))
+
+/*
+** Macros for handling device DRQs
+**
+** The mask acts as a flag used in mapping actual ISA DMA
+** channels to device DMA channels (A - C).
+*/
+#define SMC37c669_DEVICE_DRQ_MASK	0x80000000
+#define SMC37c669_DEVICE_DRQ(__d)	\
+	((SMC37c669_DEVICE_DRQ_MASK) | (__d))
+#define SMC37c669_IS_DEVICE_DRQ(__d)	\
+	(((__d) & (SMC37c669_DEVICE_DRQ_MASK)) == (SMC37c669_DEVICE_DRQ_MASK))
+#define SMC37c669_RAW_DEVICE_DRQ(__d)	\
+	((__d) & ~(SMC37c669_DEVICE_DRQ_MASK))
+
+#define SMC37c669_DEVICE_ID	0x3
+
+/*
+** SMC37c669 Device Function Definitions
+*/
+#define SERIAL_0	0
+#define SERIAL_1	1
+#define PARALLEL_0	2
+#define FLOPPY_0	3
+#define IDE_0		4
+#define NUM_FUNCS	5
+
+/*
+** Default Device Function Mappings
+*/
+#define COM1_BASE	0x3F8
+#define COM1_IRQ	4
+#define COM2_BASE	0x2F8
+#define COM2_IRQ	3
+#define PARP_BASE	0x3BC
+#define PARP_IRQ	7
+#define PARP_DRQ	3
+#define FDC_BASE	0x3F0
+#define FDC_IRQ		6
+#define FDC_DRQ		2
+
+/*
+** Configuration On/Off Key Definitions
+*/
+#define SMC37c669_CONFIG_ON_KEY		0x55
+#define SMC37c669_CONFIG_OFF_KEY	0xAA
+
+/*
+** SMC 37c669 Device IRQs
+*/
+#define SMC37c669_DEVICE_IRQ_A	    ( SMC37c669_DEVICE_IRQ( 0x01 ) )
+#define SMC37c669_DEVICE_IRQ_B	    ( SMC37c669_DEVICE_IRQ( 0x02 ) )
+#define SMC37c669_DEVICE_IRQ_C	    ( SMC37c669_DEVICE_IRQ( 0x03 ) )
+#define SMC37c669_DEVICE_IRQ_D	    ( SMC37c669_DEVICE_IRQ( 0x04 ) )
+#define SMC37c669_DEVICE_IRQ_E	    ( SMC37c669_DEVICE_IRQ( 0x05 ) )
+#define SMC37c669_DEVICE_IRQ_F	    ( SMC37c669_DEVICE_IRQ( 0x06 ) )
+/*      SMC37c669_DEVICE_IRQ_G	    *** RESERVED ***/
+#define SMC37c669_DEVICE_IRQ_H	    ( SMC37c669_DEVICE_IRQ( 0x08 ) )
+
+/*
+** SMC 37c669 Device DMA Channel Definitions
+*/
+#define SMC37c669_DEVICE_DRQ_A		    ( SMC37c669_DEVICE_DRQ( 0x01 ) )
+#define SMC37c669_DEVICE_DRQ_B		    ( SMC37c669_DEVICE_DRQ( 0x02 ) )
+#define SMC37c669_DEVICE_DRQ_C		    ( SMC37c669_DEVICE_DRQ( 0x03 ) )
+
+/*
+** Configuration Register Index Definitions
+*/
+#define SMC37c669_CR00_INDEX	    0x00
+#define SMC37c669_CR01_INDEX	    0x01
+#define SMC37c669_CR02_INDEX	    0x02
+#define SMC37c669_CR03_INDEX	    0x03
+#define SMC37c669_CR04_INDEX	    0x04
+#define SMC37c669_CR05_INDEX	    0x05
+#define SMC37c669_CR06_INDEX	    0x06
+#define SMC37c669_CR07_INDEX	    0x07
+#define SMC37c669_CR08_INDEX	    0x08
+#define SMC37c669_CR09_INDEX	    0x09
+#define SMC37c669_CR0A_INDEX	    0x0A
+#define SMC37c669_CR0B_INDEX	    0x0B
+#define SMC37c669_CR0C_INDEX	    0x0C
+#define SMC37c669_CR0D_INDEX	    0x0D
+#define SMC37c669_CR0E_INDEX	    0x0E
+#define SMC37c669_CR0F_INDEX	    0x0F
+#define SMC37c669_CR10_INDEX	    0x10
+#define SMC37c669_CR11_INDEX	    0x11
+#define SMC37c669_CR12_INDEX	    0x12
+#define SMC37c669_CR13_INDEX	    0x13
+#define SMC37c669_CR14_INDEX	    0x14
+#define SMC37c669_CR15_INDEX	    0x15
+#define SMC37c669_CR16_INDEX	    0x16
+#define SMC37c669_CR17_INDEX	    0x17
+#define SMC37c669_CR18_INDEX	    0x18
+#define SMC37c669_CR19_INDEX	    0x19
+#define SMC37c669_CR1A_INDEX	    0x1A
+#define SMC37c669_CR1B_INDEX	    0x1B
+#define SMC37c669_CR1C_INDEX	    0x1C
+#define SMC37c669_CR1D_INDEX	    0x1D
+#define SMC37c669_CR1E_INDEX	    0x1E
+#define SMC37c669_CR1F_INDEX	    0x1F
+#define SMC37c669_CR20_INDEX	    0x20
+#define SMC37c669_CR21_INDEX	    0x21
+#define SMC37c669_CR22_INDEX	    0x22
+#define SMC37c669_CR23_INDEX	    0x23
+#define SMC37c669_CR24_INDEX	    0x24
+#define SMC37c669_CR25_INDEX	    0x25
+#define SMC37c669_CR26_INDEX	    0x26
+#define SMC37c669_CR27_INDEX	    0x27
+#define SMC37c669_CR28_INDEX	    0x28
+#define SMC37c669_CR29_INDEX	    0x29
+
+/*
+** Configuration Register Alias Definitions
+*/
+#define SMC37c669_DEVICE_ID_INDEX		    SMC37c669_CR0D_INDEX
+#define SMC37c669_DEVICE_REVISION_INDEX		    SMC37c669_CR0E_INDEX
+#define SMC37c669_FDC_BASE_ADDRESS_INDEX	    SMC37c669_CR20_INDEX
+#define SMC37c669_IDE_BASE_ADDRESS_INDEX	    SMC37c669_CR21_INDEX
+#define SMC37c669_IDE_ALTERNATE_ADDRESS_INDEX	    SMC37c669_CR22_INDEX
+#define SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX	    SMC37c669_CR23_INDEX
+#define SMC37c669_SERIAL0_BASE_ADDRESS_INDEX	    SMC37c669_CR24_INDEX
+#define SMC37c669_SERIAL1_BASE_ADDRESS_INDEX	    SMC37c669_CR25_INDEX
+#define SMC37c669_PARALLEL_FDC_DRQ_INDEX	    SMC37c669_CR26_INDEX
+#define SMC37c669_PARALLEL_FDC_IRQ_INDEX	    SMC37c669_CR27_INDEX
+#define SMC37c669_SERIAL_IRQ_INDEX		    SMC37c669_CR28_INDEX
+
+/*
+** Configuration Register Definitions
+**
+** The INDEX (write only) and DATA (read/write) ports are effective 
+** only when the chip is in the Configuration State.
+*/
+typedef struct _SMC37c669_CONFIG_REGS {
+    unsigned char index_port;
+    unsigned char data_port;
+} SMC37c669_CONFIG_REGS;
+
+/*
+** CR00 - default value 0x28
+**
+**  IDE_EN (CR00<1:0>):
+**	0x - 30ua pull-ups on nIDEEN, nHDCS0, NHDCS1
+**	11 - IRQ_H available as IRQ output,
+**	     IRRX2, IRTX2 available as alternate IR pins
+**	10 - nIDEEN, nHDCS0, nHDCS1 used to control IDE
+**
+**  VALID (CR00<7>):
+**	A high level on this software controlled bit can
+**	be used to indicate that a valid configuration
+**	cycle has occurred.  The control software must
+**	take care to set this bit at the appropriate times.
+**	Set to zero after power up.  This bit has no
+**	effect on any other hardware in the chip.
+**
+*/
+typedef union _SMC37c669_CR00 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned ide_en : 2;	    /* See note above		*/
+	unsigned reserved1 : 1;	    /* RAZ			*/
+	unsigned fdc_pwr : 1;	    /* 1 = supply power to FDC  */
+	unsigned reserved2 : 3;	    /* Read as 010b		*/
+	unsigned valid : 1;	    /* See note above		*/
+    }	by_field;
+} SMC37c669_CR00;
+
+/*
+** CR01 - default value 0x9C
+*/
+typedef union _SMC37c669_CR01 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned reserved1 : 2;	    /* RAZ			    */
+	unsigned ppt_pwr : 1;	    /* 1 = supply power to PPT	    */
+	unsigned ppt_mode : 1;	    /* 1 = Printer mode, 0 = EPP    */
+	unsigned reserved2 : 1;	    /* Read as 1		    */
+	unsigned reserved3 : 2;	    /* RAZ			    */
+	unsigned lock_crx: 1;	    /* Lock CR00 - CR18		    */
+    }	by_field;
+} SMC37c669_CR01;
+
+/*
+** CR02 - default value 0x88
+*/
+typedef union _SMC37c669_CR02 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned reserved1 : 3;	    /* RAZ			    */
+	unsigned uart1_pwr : 1;	    /* 1 = supply power to UART1    */
+	unsigned reserved2 : 3;	    /* RAZ			    */
+	unsigned uart2_pwr : 1;	    /* 1 = supply power to UART2    */
+    }	by_field;
+} SMC37c669_CR02;
+
+/*
+** CR03 - default value 0x78
+**
+**  CR03<7>	CR03<2>	    Pin 94
+**  -------	-------	    ------
+**     0	   X	    DRV2 (input)
+**     1	   0	    ADRX
+**     1	   1	    IRQ_B
+**
+**  CR03<6>	CR03<5>	    Op Mode
+**  -------	-------	    -------
+**     0	   0	    Model 30
+**     0	   1	    PS/2
+**     1	   0	    Reserved
+**     1	   1	    AT Mode
+*/
+typedef union _SMC37c669_CR03 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned pwrgd_gamecs : 1;  /* 1 = PWRGD, 0 = GAMECS	    */
+	unsigned fdc_mode2 : 1;	    /* 1 = Enhanced Mode 2	    */
+	unsigned pin94_0 : 1;	    /* See note above		    */
+	unsigned reserved1 : 1;	    /* RAZ			    */
+	unsigned drvden : 1;	    /* 1 = high, 0 - output	    */
+	unsigned op_mode : 2;	    /* See note above		    */
+	unsigned pin94_1 : 1;	    /* See note above		    */
+    }	by_field;
+} SMC37c669_CR03;
+
+/*
+** CR04 - default value 0x00
+**
+**  PP_EXT_MODE:
+**	If CR01<PP_MODE> = 0 and PP_EXT_MODE =
+**	    00 - Standard and Bidirectional
+**	    01 - EPP mode and SPP
+**	    10 - ECP mode
+**		 In this mode, 2 drives can be supported
+**		 directly, 3 or 4 drives must use external
+**		 4 drive support.  SPP can be selected
+**		 through the ECR register of ECP as mode 000.
+**	    11 - ECP mode and EPP mode
+**		 In this mode, 2 drives can be supported
+**		 directly, 3 or 4 drives must use external
+**		 4 drive support.  SPP can be selected
+**		 through the ECR register of ECP as mode 000.
+**		 In this mode, EPP can be selected through
+**		 the ECR register of ECP as mode 100.
+**
+**  PP_FDC:
+**	00 - Normal
+**	01 - PPFD1
+**	10 - PPFD2
+**	11 - Reserved
+**
+**  MIDI1:
+**	Serial Clock Select: 
+**	    A low level on this bit disables MIDI support,
+**	    clock = divide by 13.  A high level on this 
+**	    bit enables MIDI support, clock = divide by 12.
+**
+**	MIDI operates at 31.25 Kbps which can be derived 
+**	from 125 KHz (24 MHz / 12 = 2 MHz, 2 MHz / 16 = 125 KHz)
+**
+**  ALT_IO:
+**	0 - Use pins IRRX, IRTX
+**	1 - Use pins IRRX2, IRTX2
+**
+**	If this bit is set, the IR receive and transmit
+**	functions will not be available on pins 25 and 26
+**	unless CR00<IDE_EN> = 11.
+*/
+typedef union _SMC37c669_CR04 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned ppt_ext_mode : 2;  /* See note above		    */
+	unsigned ppt_fdc : 2;	    /* See note above		    */
+	unsigned midi1 : 1;	    /* See note above		    */
+	unsigned midi2 : 1;	    /* See note above		    */
+	unsigned epp_type : 1;	    /* 0 = EPP 1.9, 1 = EPP 1.7	    */
+	unsigned alt_io : 1;	    /* See note above		    */
+    }	by_field;
+} SMC37c669_CR04;
+
+/*
+** CR05 - default value 0x00
+**
+**  DEN_SEL:
+**	00 - Densel output normal
+**	01 - Reserved
+**	10 - Densel output 1
+**	11 - Densel output 0
+**
+*/
+typedef union _SMC37c669_CR05 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned reserved1 : 2;	    /* RAZ					*/
+	unsigned fdc_dma_mode : 1;  /* 0 = burst, 1 = non-burst			*/
+	unsigned den_sel : 2;	    /* See note above				*/
+	unsigned swap_drv : 1;	    /* Swap the FDC motor selects		*/
+	unsigned extx4 : 1;	    /* 0 = 2 drive, 1 = external 4 drive decode	*/
+	unsigned reserved2 : 1;	    /* RAZ					*/
+    }	by_field;
+} SMC37c669_CR05;
+
+/*
+** CR06 - default value 0xFF
+*/
+typedef union _SMC37c669_CR06 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned floppy_a : 2;	    /* Type of floppy drive A	    */
+	unsigned floppy_b : 2;	    /* Type of floppy drive B	    */
+	unsigned floppy_c : 2;	    /* Type of floppy drive C	    */
+	unsigned floppy_d : 2;	    /* Type of floppy drive D	    */
+    }	by_field;
+} SMC37c669_CR06;
+
+/*
+** CR07 - default value 0x00
+**
+**  Auto Power Management CR07<7:4>:
+**	0 - Auto Powerdown disabled (default)
+**	1 - Auto Powerdown enabled
+**
+**	This bit is reset to the default state by POR or
+**	a hardware reset.
+**
+*/
+typedef union _SMC37c669_CR07 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned floppy_boot : 2;   /* 0 = A:, 1 = B:		    */
+	unsigned reserved1 : 2;	    /* RAZ			    */
+	unsigned ppt_en : 1;	    /* See note above		    */
+	unsigned uart1_en : 1;	    /* See note above		    */
+	unsigned uart2_en : 1;	    /* See note above		    */
+	unsigned fdc_en : 1;	    /* See note above		    */
+    }	by_field;
+} SMC37c669_CR07;
+
+/*
+** CR08 - default value 0x00
+*/
+typedef union _SMC37c669_CR08 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned zero : 4;	    /* 0			    */
+	unsigned addrx7_4 : 4;	    /* ADR<7:3> for ADRx decode	    */
+    }	by_field;
+} SMC37c669_CR08;
+
+/*
+** CR09 - default value 0x00
+**
+**  ADRx_CONFIG:
+**	00 - ADRx disabled
+**	01 - 1 byte decode A<3:0> = 0000b
+**	10 - 8 byte block decode A<3:0> = 0XXXb
+**	11 - 16 byte block decode A<3:0> = XXXXb
+**
+*/
+typedef union _SMC37c669_CR09 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned adra8 : 3;	    /* ADR<10:8> for ADRx decode    */
+	unsigned reserved1 : 3;
+	unsigned adrx_config : 2;   /* See note above		    */
+    }	by_field;
+} SMC37c669_CR09;
+
+/*
+** CR0A - default value 0x00
+*/
+typedef union _SMC37c669_CR0A {
+    unsigned char as_uchar;
+    struct {
+    	unsigned ecp_fifo_threshold : 4;
+	unsigned reserved1 : 4;
+    }	by_field;
+} SMC37c669_CR0A;
+
+/*
+** CR0B - default value 0x00
+*/
+typedef union _SMC37c669_CR0B {
+    unsigned char as_uchar;
+    struct {
+    	unsigned fdd0_drtx : 2;	    /* FDD0 Data Rate Table	    */
+	unsigned fdd1_drtx : 2;	    /* FDD1 Data Rate Table	    */
+	unsigned fdd2_drtx : 2;	    /* FDD2 Data Rate Table	    */
+	unsigned fdd3_drtx : 2;	    /* FDD3 Data Rate Table	    */
+    }	by_field;
+} SMC37c669_CR0B;
+
+/*
+** CR0C - default value 0x00
+**
+**  UART2_MODE:
+**	000 - Standard (default)
+**	001 - IrDA (HPSIR)
+**	010 - Amplitude Shift Keyed IR @500 KHz
+**	011 - Reserved
+**	1xx - Reserved
+**
+*/
+typedef union _SMC37c669_CR0C {
+    unsigned char as_uchar;
+    struct {
+    	unsigned uart2_rcv_polarity : 1;    /* 1 = invert RX		*/
+	unsigned uart2_xmit_polarity : 1;   /* 1 = invert TX		*/
+	unsigned uart2_duplex : 1;	    /* 1 = full, 0 = half	*/
+	unsigned uart2_mode : 3;	    /* See note above		*/
+	unsigned uart1_speed : 1;	    /* 1 = high speed enabled	*/
+	unsigned uart2_speed : 1;	    /* 1 = high speed enabled	*/
+    }	by_field;
+} SMC37c669_CR0C;
+
+/*
+** CR0D - default value 0x03
+**
+**  Device ID Register - read only
+*/
+typedef union _SMC37c669_CR0D {
+    unsigned char as_uchar;
+    struct {
+    	unsigned device_id : 8;	    /* Returns 0x3 in this field    */
+    }	by_field;
+} SMC37c669_CR0D;
+
+/*
+** CR0E - default value 0x02
+**
+**  Device Revision Register - read only
+*/
+typedef union _SMC37c669_CR0E {
+    unsigned char as_uchar;
+    struct {
+    	unsigned device_rev : 8;    /* Returns 0x2 in this field    */
+    }	by_field;
+} SMC37c669_CR0E;
+
+/*
+** CR0F - default value 0x00
+*/
+typedef union _SMC37c669_CR0F {
+    unsigned char as_uchar;
+    struct {
+    	unsigned test0 : 1;	    /* Reserved - set to 0	    */
+	unsigned test1 : 1;	    /* Reserved - set to 0	    */
+	unsigned test2 : 1;	    /* Reserved - set to 0	    */
+	unsigned test3 : 1;	    /* Reserved - set t0 0	    */
+	unsigned test4 : 1;	    /* Reserved - set to 0	    */
+	unsigned test5 : 1;	    /* Reserved - set t0 0	    */
+	unsigned test6 : 1;	    /* Reserved - set t0 0	    */
+	unsigned test7 : 1;	    /* Reserved - set to 0	    */
+    }	by_field;
+} SMC37c669_CR0F;
+
+/*
+** CR10 - default value 0x00
+*/
+typedef union _SMC37c669_CR10 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned reserved1 : 3;	     /* RAZ			    */
+	unsigned pll_gain : 1;	     /* 1 = 3V, 2 = 5V operation    */
+	unsigned pll_stop : 1;	     /* 1 = stop PLLs		    */
+	unsigned ace_stop : 1;	     /* 1 = stop UART clocks	    */
+	unsigned pll_clock_ctrl : 1; /* 0 = 14.318 MHz, 1 = 24 MHz  */
+	unsigned ir_test : 1;	     /* Enable IR test mode	    */
+    }	by_field;
+} SMC37c669_CR10;
+
+/*
+** CR11 - default value 0x00
+*/
+typedef union _SMC37c669_CR11 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned ir_loopback : 1;   /* Internal IR loop back		    */
+	unsigned test_10ms : 1;	    /* Test 10ms autopowerdown FDC timeout  */
+	unsigned reserved1 : 6;	    /* RAZ				    */
+    }	by_field;
+} SMC37c669_CR11;
+
+/*
+** CR12 - CR1D are reserved registers
+*/
+
+/*
+** CR1E - default value 0x80
+**
+**  GAMECS:
+**	00 - GAMECS disabled
+**	01 - 1 byte decode ADR<3:0> = 0001b
+**	10 - 8 byte block decode ADR<3:0> = 0XXXb
+**	11 - 16 byte block decode ADR<3:0> = XXXXb
+**
+*/
+typedef union _SMC37c66_CR1E {
+    unsigned char as_uchar;
+    struct {
+    	unsigned gamecs_config: 2;   /* See note above		    */
+	unsigned gamecs_addr9_4 : 6; /* GAMECS Addr<9:4>	    */
+    }	by_field;
+} SMC37c669_CR1E;
+
+/*
+** CR1F - default value 0x00
+**
+**  DT0 DT1 DRVDEN0 DRVDEN1 Drive Type
+**  --- --- ------- ------- ----------
+**   0   0  DENSEL  DRATE0  4/2/1 MB 3.5"
+**                          2/1 MB 5.25"
+**                          2/1.6/1 MB 3.5" (3-mode)
+**   0   1  DRATE1  DRATE0
+**   1   0  nDENSEL DRATE0  PS/2
+**   1   1  DRATE0  DRATE1
+**
+**  Note: DENSEL, DRATE1, and DRATE0 map onto two output
+**	  pins - DRVDEN0 and DRVDEN1.
+**
+*/
+typedef union _SMC37c669_CR1F {
+    unsigned char as_uchar;
+    struct {
+    	unsigned fdd0_drive_type : 2;	/* FDD0 drive type	    */
+	unsigned fdd1_drive_type : 2;	/* FDD1 drive type	    */
+	unsigned fdd2_drive_type : 2;	/* FDD2 drive type	    */
+	unsigned fdd3_drive_type : 2;	/* FDD3 drive type	    */
+    }	by_field;
+} SMC37c669_CR1F;
+
+/*
+** CR20 - default value 0x3C
+**
+**  FDC Base Address Register
+**	- To disable this decode set Addr<9:8> = 0
+**	- A<10> = 0, A<3:0> = 0XXXb to access.
+**
+*/
+typedef union _SMC37c669_CR20 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned zero : 2;	    /* 0			    */
+	unsigned addr9_4 : 6;	    /* FDC Addr<9:4>		    */
+    }	by_field;
+} SMC37c669_CR20;
+
+/*
+** CR21 - default value 0x3C
+**
+**  IDE Base Address Register
+**	- To disable this decode set Addr<9:8> = 0
+**	- A<10> = 0, A<3:0> = 0XXXb to access.
+**
+*/
+typedef union _SMC37c669_CR21 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned zero : 2;	    /* 0			    */
+	unsigned addr9_4 : 6;	    /* IDE Addr<9:4>		    */
+    }	by_field;
+} SMC37c669_CR21;
+
+/*
+** CR22 - default value 0x3D
+**
+**  IDE Alternate Status Base Address Register
+**	- To disable this decode set Addr<9:8> = 0
+**	- A<10> = 0, A<3:0> = 0110b to access.
+**
+*/
+typedef union _SMC37c669_CR22 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned zero : 2;	    /* 0			    */
+	unsigned addr9_4 : 6;	    /* IDE Alt Status Addr<9:4>	    */
+    }	by_field;
+} SMC37c669_CR22;
+
+/*
+** CR23 - default value 0x00
+**
+**  Parallel Port Base Address Register
+**	- To disable this decode set Addr<9:8> = 0
+**	- A<10> = 0 to access.
+**	- If EPP is enabled, A<2:0> = XXXb to access.
+**	  If EPP is NOT enabled, A<1:0> = XXb to access
+**
+*/
+typedef union _SMC37c669_CR23 {
+    unsigned char as_uchar;
+    struct {
+	unsigned addr9_2 : 8;	    /* Parallel Port Addr<9:2>	    */
+    }	by_field;
+} SMC37c669_CR23;
+
+/*
+** CR24 - default value 0x00
+**
+**  UART1 Base Address Register
+**	- To disable this decode set Addr<9:8> = 0
+**	- A<10> = 0, A<2:0> = XXXb to access.
+**
+*/
+typedef union _SMC37c669_CR24 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned zero : 1;	    /* 0			    */
+	unsigned addr9_3 : 7;	    /* UART1 Addr<9:3>		    */
+    }	by_field;
+} SMC37c669_CR24;
+
+/*
+** CR25 - default value 0x00
+**
+**  UART2 Base Address Register
+**	- To disable this decode set Addr<9:8> = 0
+**	- A<10> = 0, A<2:0> = XXXb to access.
+**
+*/
+typedef union _SMC37c669_CR25 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned zero : 1;	    /* 0			    */
+	unsigned addr9_3 : 7;	    /* UART2 Addr<9:3>		    */
+    }	by_field;
+} SMC37c669_CR25;
+
+/*
+** CR26 - default value 0x00
+**
+**  Parallel Port / FDC DMA Select Register
+**
+**  D3 - D0	  DMA
+**  D7 - D4	Selected
+**  -------	--------
+**   0000	 None
+**   0001	 DMA_A
+**   0010	 DMA_B
+**   0011	 DMA_C
+**
+*/
+typedef union _SMC37c669_CR26 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned ppt_drq : 4;	    /* See note above		    */
+	unsigned fdc_drq : 4;	    /* See note above		    */
+    }	by_field;
+} SMC37c669_CR26;
+
+/*
+** CR27 - default value 0x00
+**
+**  Parallel Port / FDC IRQ Select Register
+**
+**  D3 - D0	  IRQ
+**  D7 - D4	Selected
+**  -------	--------
+**   0000	 None
+**   0001	 IRQ_A
+**   0010	 IRQ_B
+**   0011	 IRQ_C
+**   0100	 IRQ_D
+**   0101	 IRQ_E
+**   0110	 IRQ_F
+**   0111	 Reserved
+**   1000	 IRQ_H
+**
+**  Any unselected IRQ REQ is in tristate
+**
+*/
+typedef union _SMC37c669_CR27 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned ppt_irq : 4;	    /* See note above		    */
+	unsigned fdc_irq : 4;	    /* See note above		    */
+    }	by_field;
+} SMC37c669_CR27;
+
+/*
+** CR28 - default value 0x00
+**
+**  UART IRQ Select Register
+**
+**  D3 - D0	  IRQ
+**  D7 - D4	Selected
+**  -------	--------
+**   0000	 None
+**   0001	 IRQ_A
+**   0010	 IRQ_B
+**   0011	 IRQ_C
+**   0100	 IRQ_D
+**   0101	 IRQ_E
+**   0110	 IRQ_F
+**   0111	 Reserved
+**   1000	 IRQ_H
+**   1111	 share with UART1 (only for UART2)
+**
+**  Any unselected IRQ REQ is in tristate
+**
+**  To share an IRQ between UART1 and UART2, set
+**  UART1 to use the desired IRQ and set UART2 to
+**  0xF to enable sharing mechanism.
+**
+*/
+typedef union _SMC37c669_CR28 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned uart2_irq : 4;	    /* See note above		    */
+	unsigned uart1_irq : 4;	    /* See note above		    */
+    }	by_field;
+} SMC37c669_CR28;
+
+/*
+** CR29 - default value 0x00
+**
+**  IRQIN IRQ Select Register
+**
+**  D3 - D0	  IRQ
+**  D7 - D4	Selected
+**  -------	--------
+**   0000	 None
+**   0001	 IRQ_A
+**   0010	 IRQ_B
+**   0011	 IRQ_C
+**   0100	 IRQ_D
+**   0101	 IRQ_E
+**   0110	 IRQ_F
+**   0111	 Reserved
+**   1000	 IRQ_H
+**
+**  Any unselected IRQ REQ is in tristate
+**
+*/
+typedef union _SMC37c669_CR29 {
+    unsigned char as_uchar;
+    struct {
+    	unsigned irqin_irq : 4;	    /* See note above		    */
+	unsigned reserved1 : 4;	    /* RAZ			    */
+    }	by_field;
+} SMC37c669_CR29;
+
+/*
+** Aliases of Configuration Register formats (should match
+** the set of index aliases).
+**
+** Note that CR24 and CR25 have the same format and are the
+** base address registers for UART1 and UART2.  Because of
+** this we only define 1 alias here - for CR24 - as the serial
+** base address register.
+**
+** Note that CR21 and CR22 have the same format and are the
+** base address and alternate status address registers for
+** the IDE controller.  Because of this we only define 1 alias
+** here - for CR21 - as the IDE address register.
+**
+*/
+typedef SMC37c669_CR0D SMC37c669_DEVICE_ID_REGISTER;
+typedef SMC37c669_CR0E SMC37c669_DEVICE_REVISION_REGISTER;
+typedef SMC37c669_CR20 SMC37c669_FDC_BASE_ADDRESS_REGISTER;
+typedef SMC37c669_CR21 SMC37c669_IDE_ADDRESS_REGISTER;
+typedef SMC37c669_CR23 SMC37c669_PARALLEL_BASE_ADDRESS_REGISTER;
+typedef SMC37c669_CR24 SMC37c669_SERIAL_BASE_ADDRESS_REGISTER;
+typedef SMC37c669_CR26 SMC37c669_PARALLEL_FDC_DRQ_REGISTER;
+typedef SMC37c669_CR27 SMC37c669_PARALLEL_FDC_IRQ_REGISTER;
+typedef SMC37c669_CR28 SMC37c669_SERIAL_IRQ_REGISTER;
+
+/*
+** ISA/Device IRQ Translation Table Entry Definition
+*/
+typedef struct _SMC37c669_IRQ_TRANSLATION_ENTRY {
+    int device_irq;
+    int isa_irq;
+} SMC37c669_IRQ_TRANSLATION_ENTRY;
+
+/*
+** ISA/Device DMA Translation Table Entry Definition
+*/
+typedef struct _SMC37c669_DRQ_TRANSLATION_ENTRY {
+    int device_drq;
+    int isa_drq;
+} SMC37c669_DRQ_TRANSLATION_ENTRY;
+
+/*
+** External Interface Function Prototype Declarations
+*/
+
+SMC37c669_CONFIG_REGS *SMC37c669_detect( 
+    int
+);
+
+unsigned int SMC37c669_enable_device( 
+    unsigned int func 
+);
+
+unsigned int SMC37c669_disable_device( 
+    unsigned int func 
+);
+
+unsigned int SMC37c669_configure_device( 
+    unsigned int func, 
+    int port, 
+    int irq, 
+    int drq 
+);
+
+void SMC37c669_display_device_info( 
+    void 
+);
+
+#endif	/* __SMC37c669_H */
+
+/* file:	smcc669.c
+ *
+ * Copyright (C) 1997 by
+ * Digital Equipment Corporation, Maynard, Massachusetts.
+ * All rights reserved.
+ *
+ * This software is furnished under a license and may be used and copied
+ * only  in  accordance  of  the  terms  of  such  license  and with the
+ * inclusion of the above copyright notice. This software or  any  other
+ * copies thereof may not be provided or otherwise made available to any
+ * other person.  No title to and  ownership of the  software is  hereby
+ * transferred.
+ *
+ * The information in this software is  subject to change without notice
+ * and  should  not  be  construed  as a commitment by digital equipment
+ * corporation.
+ *
+ * Digital assumes no responsibility for the use  or  reliability of its
+ * software on equipment which is not supplied by digital.
+ */
+
+/*
+ *++
+ *  FACILITY:
+ *
+ *      Alpha SRM Console Firmware
+ *
+ *  MODULE DESCRIPTION:
+ *
+ *	SMC37c669 Super I/O controller configuration routines.
+ *
+ *  AUTHORS:
+ *
+ *	Eric Rasmussen
+ *
+ *  CREATION DATE:
+ *  
+ *	28-Jan-1997
+ *
+ *  MODIFICATION HISTORY:
+ *	
+ *	er	01-May-1997	Fixed pointer conversion errors in 
+ *				SMC37c669_get_device_config().
+ *      er	28-Jan-1997	Initial version.
+ *
+ *--
+ */
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#define wb( _x_, _y_ )	outb( _y_, (unsigned int)((unsigned long)_x_) )
+#define rb( _x_ )	inb( (unsigned int)((unsigned long)_x_) )
+
+/*
+** Local storage for device configuration information.
+**
+** Since the SMC37c669 does not provide an explicit
+** mechanism for enabling/disabling individual device 
+** functions, other than unmapping the device, local 
+** storage for device configuration information is 
+** allocated here for use in implementing our own 
+** function enable/disable scheme.
+*/
+static struct DEVICE_CONFIG {
+    unsigned int port1;
+    unsigned int port2;
+    int irq;
+    int drq;
+} local_config [NUM_FUNCS];
+
+/*
+** List of all possible addresses for the Super I/O chip
+*/
+static unsigned long SMC37c669_Addresses[] __initdata =
+    {
+	0x3F0UL,	    /* Primary address	    */
+	0x370UL,	    /* Secondary address    */
+	0UL		    /* End of list	    */
+    };
+
+/*
+** Global Pointer to the Super I/O device
+*/
+static SMC37c669_CONFIG_REGS *SMC37c669 __initdata = NULL;
+
+/*
+** IRQ Translation Table
+**
+** The IRQ translation table is a list of SMC37c669 device 
+** and standard ISA IRQs.
+**
+*/
+static SMC37c669_IRQ_TRANSLATION_ENTRY *SMC37c669_irq_table __initdata; 
+
+/*
+** The following definition is for the default IRQ 
+** translation table.
+*/
+static SMC37c669_IRQ_TRANSLATION_ENTRY SMC37c669_default_irq_table[]
+__initdata = 
+    { 
+	{ SMC37c669_DEVICE_IRQ_A, -1 }, 
+	{ SMC37c669_DEVICE_IRQ_B, -1 }, 
+	{ SMC37c669_DEVICE_IRQ_C, 7 }, 
+	{ SMC37c669_DEVICE_IRQ_D, 6 }, 
+	{ SMC37c669_DEVICE_IRQ_E, 4 }, 
+	{ SMC37c669_DEVICE_IRQ_F, 3 }, 
+	{ SMC37c669_DEVICE_IRQ_H, -1 }, 
+	{ -1, -1 } /* End of table */
+    };
+
+/*
+** The following definition is for the MONET (XP1000) IRQ 
+** translation table.
+*/
+static SMC37c669_IRQ_TRANSLATION_ENTRY SMC37c669_monet_irq_table[]
+__initdata = 
+    { 
+	{ SMC37c669_DEVICE_IRQ_A, -1 }, 
+	{ SMC37c669_DEVICE_IRQ_B, -1 }, 
+	{ SMC37c669_DEVICE_IRQ_C, 6 }, 
+	{ SMC37c669_DEVICE_IRQ_D, 7 }, 
+	{ SMC37c669_DEVICE_IRQ_E, 4 }, 
+	{ SMC37c669_DEVICE_IRQ_F, 3 }, 
+	{ SMC37c669_DEVICE_IRQ_H, -1 }, 
+	{ -1, -1 } /* End of table */
+    };
+
+static SMC37c669_IRQ_TRANSLATION_ENTRY *SMC37c669_irq_tables[] __initdata =
+    {
+	SMC37c669_default_irq_table,
+	SMC37c669_monet_irq_table
+    }; 
+
+/*
+** DRQ Translation Table
+**
+** The DRQ translation table is a list of SMC37c669 device and
+** ISA DMA channels.
+**
+*/
+static SMC37c669_DRQ_TRANSLATION_ENTRY *SMC37c669_drq_table __initdata;
+
+/*
+** The following definition is the default DRQ
+** translation table.
+*/
+static SMC37c669_DRQ_TRANSLATION_ENTRY SMC37c669_default_drq_table[]
+__initdata = 
+    { 
+	{ SMC37c669_DEVICE_DRQ_A, 2 }, 
+	{ SMC37c669_DEVICE_DRQ_B, 3 }, 
+	{ SMC37c669_DEVICE_DRQ_C, -1 }, 
+	{ -1, -1 } /* End of table */
+    };
+
+/*
+** Local Function Prototype Declarations
+*/
+
+static unsigned int SMC37c669_is_device_enabled( 
+    unsigned int func 
+);
+
+#if 0
+static unsigned int SMC37c669_get_device_config( 
+    unsigned int func, 
+    int *port, 
+    int *irq, 
+    int *drq 
+);
+#endif
+
+static void SMC37c669_config_mode( 
+    unsigned int enable 
+);
+
+static unsigned char SMC37c669_read_config( 
+    unsigned char index 
+);
+
+static void SMC37c669_write_config( 
+    unsigned char index, 
+    unsigned char data 
+);
+
+static void SMC37c669_init_local_config( void );
+
+static struct DEVICE_CONFIG *SMC37c669_get_config(
+    unsigned int func
+);
+
+static int SMC37c669_xlate_irq(
+    int irq 
+);
+
+static int SMC37c669_xlate_drq(
+    int drq 
+);
+
+static  __cacheline_aligned DEFINE_SPINLOCK(smc_lock);
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function detects the presence of an SMC37c669 Super I/O
+**	controller.
+**
+**  FORMAL PARAMETERS:
+**
+**	None
+**
+**  RETURN VALUE:
+**
+**      Returns a pointer to the device if found, otherwise,
+**	the NULL pointer is returned.
+**
+**  SIDE EFFECTS:
+**
+**      None
+**
+**--
+*/
+SMC37c669_CONFIG_REGS * __init SMC37c669_detect( int index )
+{
+    int i;
+    SMC37c669_DEVICE_ID_REGISTER id;
+
+    for ( i = 0;  SMC37c669_Addresses[i] != 0;  i++ ) {
+/*
+** Initialize the device pointer even though we don't yet know if
+** the controller is at this address.  The support functions access
+** the controller through this device pointer so we need to set it
+** even when we are looking ...
+*/
+    	SMC37c669 = ( SMC37c669_CONFIG_REGS * )SMC37c669_Addresses[i];
+/*
+** Enter configuration mode
+*/
+	SMC37c669_config_mode( TRUE );
+/*
+** Read the device id
+*/
+	id.as_uchar = SMC37c669_read_config( SMC37c669_DEVICE_ID_INDEX );
+/*
+** Exit configuration mode
+*/
+	SMC37c669_config_mode( FALSE );
+/*
+** Does the device id match?  If so, assume we have found an
+** SMC37c669 controller at this address.
+*/
+	if ( id.by_field.device_id == SMC37c669_DEVICE_ID ) {
+/*
+** Initialize the IRQ and DRQ translation tables.
+*/
+    	    SMC37c669_irq_table = SMC37c669_irq_tables[ index ];
+	    SMC37c669_drq_table = SMC37c669_default_drq_table;
+/*
+** erfix
+**
+** If the platform can't use the IRQ and DRQ defaults set up in this 
+** file, it should call a platform-specific external routine at this 
+** point to reset the IRQ and DRQ translation table pointers to point 
+** at the appropriate tables for the platform.  If the defaults are 
+** acceptable, then the external routine should do nothing.
+*/
+
+/*
+** Put the chip back into configuration mode
+*/
+	    SMC37c669_config_mode( TRUE );
+/*
+** Initialize local storage for configuration information
+*/
+	    SMC37c669_init_local_config( );
+/*
+** Exit configuration mode
+*/
+	    SMC37c669_config_mode( FALSE );
+/*
+** SMC37c669 controller found, break out of search loop
+*/
+	    break;
+	}
+	else {
+/*
+** Otherwise, we did not find an SMC37c669 controller at this
+** address so set the device pointer to NULL.
+*/
+	    SMC37c669 = NULL;
+	}
+    }
+    return SMC37c669;
+}
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function enables an SMC37c669 device function.
+**
+**  FORMAL PARAMETERS:
+**
+**      func:
+**          Which device function to enable
+**
+**  RETURN VALUE:
+**
+**      Returns TRUE is the device function was enabled, otherwise, FALSE
+**
+**  SIDE EFFECTS:
+**
+**      {@description or none@}
+**
+**  DESIGN:
+**
+**      Enabling a device function in the SMC37c669 controller involves
+**	setting all of its mappings (port, irq, drq ...).  A local 
+**	"shadow" copy of the device configuration is kept so we can
+**	just set each mapping to what the local copy says.
+**
+**	This function ALWAYS updates the local shadow configuration of
+**	the device function being enabled, even if the device is always
+**	enabled.  To avoid replication of code, functions such as
+**	configure_device set up the local copy and then call this 
+**	function to the update the real device.
+**
+**--
+*/
+unsigned int __init SMC37c669_enable_device ( unsigned int func )
+{
+    unsigned int ret_val = FALSE;
+/*
+** Put the device into configuration mode
+*/
+    SMC37c669_config_mode( TRUE );
+    switch ( func ) {
+    	case SERIAL_0:
+	    {
+	    	SMC37c669_SERIAL_BASE_ADDRESS_REGISTER base_addr;
+		SMC37c669_SERIAL_IRQ_REGISTER irq;
+/*
+** Enable the serial 1 IRQ mapping
+*/
+	    	irq.as_uchar = 
+		    SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX );
+
+		irq.by_field.uart1_irq =
+		    SMC37c669_RAW_DEVICE_IRQ(
+			SMC37c669_xlate_irq( local_config[ func ].irq )
+		    );
+
+		SMC37c669_write_config( SMC37c669_SERIAL_IRQ_INDEX, irq.as_uchar );
+/*
+** Enable the serial 1 port base address mapping
+*/
+		base_addr.as_uchar = 0;
+		base_addr.by_field.addr9_3 = local_config[ func ].port1 >> 3;
+
+		SMC37c669_write_config( 
+		    SMC37c669_SERIAL0_BASE_ADDRESS_INDEX,
+		    base_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+	case SERIAL_1:
+	    {
+	    	SMC37c669_SERIAL_BASE_ADDRESS_REGISTER base_addr;
+		SMC37c669_SERIAL_IRQ_REGISTER irq;
+/*
+** Enable the serial 2 IRQ mapping
+*/
+	    	irq.as_uchar = 
+		    SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX );
+
+		irq.by_field.uart2_irq =
+		    SMC37c669_RAW_DEVICE_IRQ(
+			SMC37c669_xlate_irq( local_config[ func ].irq )
+		    );
+
+		SMC37c669_write_config( SMC37c669_SERIAL_IRQ_INDEX, irq.as_uchar );
+/*
+** Enable the serial 2 port base address mapping
+*/
+		base_addr.as_uchar = 0;
+		base_addr.by_field.addr9_3 = local_config[ func ].port1 >> 3;
+
+		SMC37c669_write_config( 
+		    SMC37c669_SERIAL1_BASE_ADDRESS_INDEX,
+		    base_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+	case PARALLEL_0:
+	    {
+	    	SMC37c669_PARALLEL_BASE_ADDRESS_REGISTER base_addr;
+		SMC37c669_PARALLEL_FDC_IRQ_REGISTER irq;
+		SMC37c669_PARALLEL_FDC_DRQ_REGISTER drq;
+/*
+** Enable the parallel port DMA channel mapping
+*/
+	    	drq.as_uchar =
+		    SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX );
+
+		drq.by_field.ppt_drq = 
+		    SMC37c669_RAW_DEVICE_DRQ(
+			SMC37c669_xlate_drq( local_config[ func ].drq )
+		    );
+
+		SMC37c669_write_config(
+		    SMC37c669_PARALLEL_FDC_DRQ_INDEX,
+		    drq.as_uchar
+		);
+/*
+** Enable the parallel port IRQ mapping
+*/
+		irq.as_uchar = 
+		    SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX );
+
+		irq.by_field.ppt_irq =
+		    SMC37c669_RAW_DEVICE_IRQ(
+			SMC37c669_xlate_irq( local_config[ func ].irq )
+		    );
+
+		SMC37c669_write_config( 
+		    SMC37c669_PARALLEL_FDC_IRQ_INDEX,
+		    irq.as_uchar
+		);
+/*
+** Enable the parallel port base address mapping
+*/
+		base_addr.as_uchar = 0;
+		base_addr.by_field.addr9_2 = local_config[ func ].port1 >> 2;
+
+		SMC37c669_write_config(
+		    SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX,
+		    base_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+	case FLOPPY_0:
+	    {
+	    	SMC37c669_FDC_BASE_ADDRESS_REGISTER base_addr;
+		SMC37c669_PARALLEL_FDC_IRQ_REGISTER irq;
+		SMC37c669_PARALLEL_FDC_DRQ_REGISTER drq;
+/*
+** Enable the floppy controller DMA channel mapping
+*/
+	    	drq.as_uchar =
+		    SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX );
+		 
+		drq.by_field.fdc_drq =
+		    SMC37c669_RAW_DEVICE_DRQ(
+			SMC37c669_xlate_drq( local_config[ func ].drq )
+		    );
+		 
+		SMC37c669_write_config( 
+		    SMC37c669_PARALLEL_FDC_DRQ_INDEX,
+		    drq.as_uchar
+		);
+/*
+** Enable the floppy controller IRQ mapping
+*/
+		irq.as_uchar =
+		    SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX );
+		 
+		irq.by_field.fdc_irq =
+		    SMC37c669_RAW_DEVICE_IRQ(
+			SMC37c669_xlate_irq( local_config[ func ].irq )
+		    );
+		 
+		SMC37c669_write_config(
+		    SMC37c669_PARALLEL_FDC_IRQ_INDEX,
+		    irq.as_uchar
+		);
+/*
+** Enable the floppy controller base address mapping
+*/
+		base_addr.as_uchar = 0;
+		base_addr.by_field.addr9_4 = local_config[ func ].port1 >> 4;
+		 
+		SMC37c669_write_config(
+		    SMC37c669_FDC_BASE_ADDRESS_INDEX,
+		    base_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+	case IDE_0:
+	    {
+	    	SMC37c669_IDE_ADDRESS_REGISTER ide_addr;
+/*
+** Enable the IDE alternate status base address mapping
+*/
+	    	ide_addr.as_uchar = 0;
+		ide_addr.by_field.addr9_4 = local_config[ func ].port2 >> 4;
+		 
+		SMC37c669_write_config(
+		    SMC37c669_IDE_ALTERNATE_ADDRESS_INDEX,
+		    ide_addr.as_uchar
+		);
+/*
+** Enable the IDE controller base address mapping
+*/
+		ide_addr.as_uchar = 0;
+		ide_addr.by_field.addr9_4 = local_config[ func ].port1 >> 4;
+		 
+		SMC37c669_write_config(
+		    SMC37c669_IDE_BASE_ADDRESS_INDEX,
+		    ide_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+    }
+/*
+** Exit configuration mode and return
+*/
+    SMC37c669_config_mode( FALSE );
+
+    return ret_val;
+}
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function disables a device function within the
+**	SMC37c669 Super I/O controller.
+**
+**  FORMAL PARAMETERS:
+**
+**      func:
+**          Which function to disable
+**
+**  RETURN VALUE:
+**
+**      Return TRUE if the device function was disabled, otherwise, FALSE
+**
+**  SIDE EFFECTS:
+**
+**      {@description or none@}
+**
+**  DESIGN:
+**
+**      Disabling a function in the SMC37c669 device involves
+**	disabling all the function's mappings (port, irq, drq ...).
+**	A shadow copy of the device configuration is maintained
+**	in local storage so we won't worry aboving saving the
+**	current configuration information.
+**
+**--
+*/
+unsigned int __init SMC37c669_disable_device ( unsigned int func )
+{
+    unsigned int ret_val = FALSE;
+
+/*
+** Put the device into configuration mode
+*/
+    SMC37c669_config_mode( TRUE );
+    switch ( func ) {
+    	case SERIAL_0:
+	    {
+	    	SMC37c669_SERIAL_BASE_ADDRESS_REGISTER base_addr;
+		SMC37c669_SERIAL_IRQ_REGISTER irq;
+/*
+** Disable the serial 1 IRQ mapping
+*/
+	    	irq.as_uchar = 
+		    SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX );
+
+		irq.by_field.uart1_irq = 0;
+
+		SMC37c669_write_config( SMC37c669_SERIAL_IRQ_INDEX, irq.as_uchar );
+/*
+** Disable the serial 1 port base address mapping
+*/
+		base_addr.as_uchar = 0;
+		SMC37c669_write_config( 
+		    SMC37c669_SERIAL0_BASE_ADDRESS_INDEX,
+		    base_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+	case SERIAL_1:
+	    {
+	    	SMC37c669_SERIAL_BASE_ADDRESS_REGISTER base_addr;
+		SMC37c669_SERIAL_IRQ_REGISTER irq;
+/*
+** Disable the serial 2 IRQ mapping
+*/
+	    	irq.as_uchar = 
+		    SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX );
+
+		irq.by_field.uart2_irq = 0;
+
+		SMC37c669_write_config( SMC37c669_SERIAL_IRQ_INDEX, irq.as_uchar );
+/*
+** Disable the serial 2 port base address mapping
+*/
+		base_addr.as_uchar = 0;
+
+		SMC37c669_write_config( 
+		    SMC37c669_SERIAL1_BASE_ADDRESS_INDEX,
+		    base_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+	case PARALLEL_0:
+	    {
+	    	SMC37c669_PARALLEL_BASE_ADDRESS_REGISTER base_addr;
+		SMC37c669_PARALLEL_FDC_IRQ_REGISTER irq;
+		SMC37c669_PARALLEL_FDC_DRQ_REGISTER drq;
+/*
+** Disable the parallel port DMA channel mapping
+*/
+	    	drq.as_uchar =
+		    SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX );
+
+		drq.by_field.ppt_drq = 0;
+
+		SMC37c669_write_config(
+		    SMC37c669_PARALLEL_FDC_DRQ_INDEX,
+		    drq.as_uchar
+		);
+/*
+** Disable the parallel port IRQ mapping
+*/
+		irq.as_uchar = 
+		    SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX );
+
+		irq.by_field.ppt_irq = 0;
+
+		SMC37c669_write_config( 
+		    SMC37c669_PARALLEL_FDC_IRQ_INDEX,
+		    irq.as_uchar
+		);
+/*
+** Disable the parallel port base address mapping
+*/
+		base_addr.as_uchar = 0;
+
+		SMC37c669_write_config(
+		    SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX,
+		    base_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+	case FLOPPY_0:
+	    {
+	    	SMC37c669_FDC_BASE_ADDRESS_REGISTER base_addr;
+		SMC37c669_PARALLEL_FDC_IRQ_REGISTER irq;
+		SMC37c669_PARALLEL_FDC_DRQ_REGISTER drq;
+/*
+** Disable the floppy controller DMA channel mapping
+*/
+	    	drq.as_uchar =
+		    SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX );
+		 
+		drq.by_field.fdc_drq = 0;
+		 
+		SMC37c669_write_config( 
+		    SMC37c669_PARALLEL_FDC_DRQ_INDEX,
+		    drq.as_uchar
+		);
+/*
+** Disable the floppy controller IRQ mapping
+*/
+		irq.as_uchar =
+		    SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX );
+		 
+		irq.by_field.fdc_irq = 0;
+		 
+		SMC37c669_write_config(
+		    SMC37c669_PARALLEL_FDC_IRQ_INDEX,
+		    irq.as_uchar
+		);
+/*
+** Disable the floppy controller base address mapping
+*/
+		base_addr.as_uchar = 0;
+		 
+		SMC37c669_write_config(
+		    SMC37c669_FDC_BASE_ADDRESS_INDEX,
+		    base_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+	case IDE_0:
+	    {
+	    	SMC37c669_IDE_ADDRESS_REGISTER ide_addr;
+/*
+** Disable the IDE alternate status base address mapping
+*/
+	    	ide_addr.as_uchar = 0;
+		 
+		SMC37c669_write_config(
+		    SMC37c669_IDE_ALTERNATE_ADDRESS_INDEX,
+		    ide_addr.as_uchar
+		);
+/*
+** Disable the IDE controller base address mapping
+*/
+		ide_addr.as_uchar = 0;
+		 
+		SMC37c669_write_config(
+		    SMC37c669_IDE_BASE_ADDRESS_INDEX,
+		    ide_addr.as_uchar
+		);
+		ret_val = TRUE;
+		break;
+	    }
+    }
+/*
+** Exit configuration mode and return
+*/
+    SMC37c669_config_mode( FALSE );
+
+    return ret_val;
+}
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function configures a device function within the 
+**	SMC37c669 Super I/O controller.
+**
+**  FORMAL PARAMETERS:
+**
+**      func:
+**          Which device function
+**       
+**      port:
+**          I/O port for the function to use
+**	 
+**      irq:
+**          IRQ for the device function to use
+**	 
+**      drq:
+**          DMA channel for the device function to use
+**
+**  RETURN VALUE:
+**
+**      Returns TRUE if the device function was configured, 
+**	otherwise, FALSE.
+**
+**  SIDE EFFECTS:
+**
+**      {@description or none@}
+**
+**  DESIGN:
+**
+**	If this function returns TRUE, the local shadow copy of
+**	the configuration is also updated.  If the device function
+**	is currently disabled, only the local shadow copy is 
+**	updated and the actual device function will be updated
+**	if/when it is enabled.
+**
+**--
+*/
+unsigned int __init SMC37c669_configure_device (
+    unsigned int func,
+    int port,
+    int irq,
+    int drq )
+{
+    struct DEVICE_CONFIG *cp;
+
+/*
+** Check for a valid configuration
+*/
+    if ( ( cp = SMC37c669_get_config ( func ) ) != NULL ) {
+/*
+** Configuration is valid, update the local shadow copy
+*/
+    	if ( ( drq & ~0xFF ) == 0 ) {
+	    cp->drq = drq;
+	}
+	if ( ( irq & ~0xFF ) == 0 ) {
+	    cp->irq = irq;
+	}
+	if ( ( port & ~0xFFFF ) == 0 ) {
+	    cp->port1 = port;
+	}
+/*
+** If the device function is enabled, update the actual
+** device configuration.
+*/
+	if ( SMC37c669_is_device_enabled( func ) ) {
+	    SMC37c669_enable_device( func );
+	}
+	return TRUE;
+    }
+    return FALSE;
+}
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function determines whether a device function
+**	within the SMC37c669 controller is enabled.
+**
+**  FORMAL PARAMETERS:
+**
+**      func:
+**          Which device function
+**
+**  RETURN VALUE:
+**
+**      Returns TRUE if the device function is enabled, otherwise, FALSE
+**
+**  SIDE EFFECTS:
+**
+**      {@description or none@}
+**
+**  DESIGN:
+**
+**      To check whether a device is enabled we will only look at 
+**	the port base address mapping.  According to the SMC37c669
+**	specification, all of the port base address mappings are
+**	disabled if the addr<9:8> (bits <7:6> of the register) are
+**	zero.
+**
+**--
+*/
+static unsigned int __init SMC37c669_is_device_enabled ( unsigned int func )
+{
+    unsigned char base_addr = 0;
+    unsigned int dev_ok = FALSE;
+    unsigned int ret_val = FALSE;
+/*
+** Enter configuration mode
+*/
+    SMC37c669_config_mode( TRUE );
+     
+    switch ( func ) {
+    	case SERIAL_0:
+	    base_addr =
+		SMC37c669_read_config( SMC37c669_SERIAL0_BASE_ADDRESS_INDEX );
+	    dev_ok = TRUE;
+	    break;
+	case SERIAL_1:
+	    base_addr =
+		SMC37c669_read_config( SMC37c669_SERIAL1_BASE_ADDRESS_INDEX );
+	    dev_ok = TRUE;
+	    break;
+	case PARALLEL_0:
+	    base_addr =
+		SMC37c669_read_config( SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX );
+	    dev_ok = TRUE;
+	    break;
+	case FLOPPY_0:
+	    base_addr =
+		SMC37c669_read_config( SMC37c669_FDC_BASE_ADDRESS_INDEX );
+	    dev_ok = TRUE;
+	    break;
+	case IDE_0:
+	    base_addr =
+		SMC37c669_read_config( SMC37c669_IDE_BASE_ADDRESS_INDEX );
+	    dev_ok = TRUE;
+	    break;
+    }
+/*
+** If we have a valid device, check base_addr<7:6> to see if the
+** device is enabled (mapped).
+*/
+    if ( ( dev_ok ) && ( ( base_addr & 0xC0 ) != 0 ) ) {
+/*
+** The mapping is not disabled, so assume that the function is 
+** enabled.
+*/
+    	ret_val = TRUE;
+    }
+/*
+** Exit configuration mode 
+*/
+    SMC37c669_config_mode( FALSE );
+
+    return ret_val;
+}
+
+
+#if 0
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function retrieves the configuration information of a 
+**	device function within the SMC37c699 Super I/O controller.
+**
+**  FORMAL PARAMETERS:
+**
+**      func:
+**          Which device function
+**       
+**      port:
+**          I/O port returned
+**	 
+**      irq:
+**          IRQ returned
+**	 
+**      drq:
+**          DMA channel returned
+**
+**  RETURN VALUE:
+**
+**      Returns TRUE if the device configuration was successfully
+**	retrieved, otherwise, FALSE.
+**
+**  SIDE EFFECTS:
+**
+**      The data pointed to by the port, irq, and drq parameters
+**	my be modified even if the configuration is not successfully
+**	retrieved.
+**
+**  DESIGN:
+**
+**      The device configuration is fetched from the local shadow
+**	copy.  Any unused parameters will be set to -1.  Any
+**	parameter which is not desired can specify the NULL
+**	pointer.
+**
+**--
+*/
+static unsigned int __init SMC37c669_get_device_config (
+    unsigned int func,
+    int *port,
+    int *irq,
+    int *drq )
+{
+    struct DEVICE_CONFIG *cp;
+    unsigned int ret_val = FALSE;
+/*
+** Check for a valid device configuration
+*/
+    if ( ( cp = SMC37c669_get_config( func ) ) != NULL ) {
+    	if ( drq != NULL ) {
+	    *drq = cp->drq;
+	    ret_val = TRUE;
+	}
+	if ( irq != NULL ) {
+	    *irq = cp->irq;
+	    ret_val = TRUE;
+	}
+	if ( port != NULL ) {
+	    *port = cp->port1;
+	    ret_val = TRUE;
+	}
+    }
+    return ret_val;
+}
+#endif
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function displays the current state of the SMC37c699
+**	Super I/O controller's device functions.
+**
+**  FORMAL PARAMETERS:
+**
+**      None
+**
+**  RETURN VALUE:
+**
+**      None
+**
+**  SIDE EFFECTS:
+**
+**      None
+**
+**--
+*/
+void __init SMC37c669_display_device_info ( void )
+{
+    if ( SMC37c669_is_device_enabled( SERIAL_0 ) ) {
+    	printk( "  Serial 0:    Enabled [ Port 0x%x, IRQ %d ]\n",
+		 local_config[ SERIAL_0 ].port1,
+		 local_config[ SERIAL_0 ].irq
+	);
+    }
+    else {
+    	printk( "  Serial 0:    Disabled\n" );
+    }
+
+    if ( SMC37c669_is_device_enabled( SERIAL_1 ) ) {
+    	printk( "  Serial 1:    Enabled [ Port 0x%x, IRQ %d ]\n",
+		 local_config[ SERIAL_1 ].port1,
+		 local_config[ SERIAL_1 ].irq
+	);
+    }
+    else {
+    	printk( "  Serial 1:    Disabled\n" );
+    }
+
+    if ( SMC37c669_is_device_enabled( PARALLEL_0 ) ) {
+    	printk( "  Parallel:    Enabled [ Port 0x%x, IRQ %d/%d ]\n",
+		 local_config[ PARALLEL_0 ].port1,
+		 local_config[ PARALLEL_0 ].irq,
+		 local_config[ PARALLEL_0 ].drq
+	);
+    }
+    else {
+    	printk( "  Parallel:    Disabled\n" );
+    }
+
+    if ( SMC37c669_is_device_enabled( FLOPPY_0 ) ) {
+    	printk( "  Floppy Ctrl: Enabled [ Port 0x%x, IRQ %d/%d ]\n",
+		 local_config[ FLOPPY_0 ].port1,
+		 local_config[ FLOPPY_0 ].irq,
+		 local_config[ FLOPPY_0 ].drq
+	);
+    }
+    else {
+    	printk( "  Floppy Ctrl: Disabled\n" );
+    }
+
+    if ( SMC37c669_is_device_enabled( IDE_0 ) ) {
+    	printk( "  IDE 0:       Enabled [ Port 0x%x, IRQ %d ]\n",
+		 local_config[ IDE_0 ].port1,
+		 local_config[ IDE_0 ].irq
+	);
+    }
+    else {
+    	printk( "  IDE 0:       Disabled\n" );
+    }
+}
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function puts the SMC37c669 Super I/O controller into,
+**	and takes it out of, configuration mode.
+**
+**  FORMAL PARAMETERS:
+**
+**      enable:
+**          TRUE to enter configuration mode, FALSE to exit.
+**
+**  RETURN VALUE:
+**
+**      None
+**
+**  SIDE EFFECTS:
+**
+**      The SMC37c669 controller may be left in configuration mode.
+**
+**--
+*/
+static void __init SMC37c669_config_mode( 
+    unsigned int enable )
+{
+    if ( enable ) {
+/*
+** To enter configuration mode, two writes in succession to the index
+** port are required.  If a write to another address or port occurs
+** between these two writes, the chip does not enter configuration
+** mode.  Therefore, a spinlock is placed around the two writes to 
+** guarantee that they complete uninterrupted.
+*/
+	spin_lock(&smc_lock);
+    	wb( &SMC37c669->index_port, SMC37c669_CONFIG_ON_KEY );
+    	wb( &SMC37c669->index_port, SMC37c669_CONFIG_ON_KEY );
+	spin_unlock(&smc_lock);
+    }
+    else {
+    	wb( &SMC37c669->index_port, SMC37c669_CONFIG_OFF_KEY );
+    }
+}
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function reads an SMC37c669 Super I/O controller
+**	configuration register.  This function assumes that the
+**	device is already in configuration mode.
+**
+**  FORMAL PARAMETERS:
+**
+**      index:
+**          Index value of configuration register to read
+**
+**  RETURN VALUE:
+**
+**      Data read from configuration register
+**
+**  SIDE EFFECTS:
+**
+**      None
+**
+**--
+*/
+static unsigned char __init SMC37c669_read_config( 
+    unsigned char index )
+{
+	wb(&SMC37c669->index_port, index);
+	return rb(&SMC37c669->data_port);
+}
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function writes an SMC37c669 Super I/O controller
+**	configuration register.  This function assumes that the
+**	device is already in configuration mode.
+**
+**  FORMAL PARAMETERS:
+**
+**      index:
+**          Index of configuration register to write
+**       
+**      data:
+**          Data to be written
+**
+**  RETURN VALUE:
+**
+**      None
+**
+**  SIDE EFFECTS:
+**
+**      None
+**
+**--
+*/
+static void __init SMC37c669_write_config( 
+    unsigned char index, 
+    unsigned char data )
+{
+    wb( &SMC37c669->index_port, index );
+    wb( &SMC37c669->data_port, data );
+}
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function initializes the local device
+**	configuration storage.  This function assumes
+**	that the device is already in configuration
+**	mode.
+**
+**  FORMAL PARAMETERS:
+**
+**      None
+**
+**  RETURN VALUE:
+**
+**      None
+**
+**  SIDE EFFECTS:
+**
+**      Local storage for device configuration information
+**	is initialized.
+**
+**--
+*/
+static void __init SMC37c669_init_local_config ( void )
+{
+    SMC37c669_SERIAL_BASE_ADDRESS_REGISTER uart_base;
+    SMC37c669_SERIAL_IRQ_REGISTER uart_irqs;
+    SMC37c669_PARALLEL_BASE_ADDRESS_REGISTER ppt_base;
+    SMC37c669_PARALLEL_FDC_IRQ_REGISTER ppt_fdc_irqs;
+    SMC37c669_PARALLEL_FDC_DRQ_REGISTER ppt_fdc_drqs;
+    SMC37c669_FDC_BASE_ADDRESS_REGISTER fdc_base;
+    SMC37c669_IDE_ADDRESS_REGISTER ide_base;
+    SMC37c669_IDE_ADDRESS_REGISTER ide_alt;
+
+/*
+** Get serial port 1 base address 
+*/
+    uart_base.as_uchar = 
+	SMC37c669_read_config( SMC37c669_SERIAL0_BASE_ADDRESS_INDEX );
+/*
+** Get IRQs for serial ports 1 & 2
+*/
+    uart_irqs.as_uchar = 
+	SMC37c669_read_config( SMC37c669_SERIAL_IRQ_INDEX );
+/*
+** Store local configuration information for serial port 1
+*/
+    local_config[SERIAL_0].port1 = uart_base.by_field.addr9_3 << 3;
+    local_config[SERIAL_0].irq = 
+	SMC37c669_xlate_irq( 
+	    SMC37c669_DEVICE_IRQ( uart_irqs.by_field.uart1_irq ) 
+	);
+/*
+** Get serial port 2 base address
+*/
+    uart_base.as_uchar = 
+	SMC37c669_read_config( SMC37c669_SERIAL1_BASE_ADDRESS_INDEX );
+/*
+** Store local configuration information for serial port 2
+*/
+    local_config[SERIAL_1].port1 = uart_base.by_field.addr9_3 << 3;
+    local_config[SERIAL_1].irq = 
+	SMC37c669_xlate_irq( 
+	    SMC37c669_DEVICE_IRQ( uart_irqs.by_field.uart2_irq ) 
+	);
+/*
+** Get parallel port base address
+*/
+    ppt_base.as_uchar =
+	SMC37c669_read_config( SMC37c669_PARALLEL0_BASE_ADDRESS_INDEX );
+/*
+** Get IRQs for parallel port and floppy controller
+*/
+    ppt_fdc_irqs.as_uchar =
+	SMC37c669_read_config( SMC37c669_PARALLEL_FDC_IRQ_INDEX );
+/*
+** Get DRQs for parallel port and floppy controller
+*/
+    ppt_fdc_drqs.as_uchar =
+	SMC37c669_read_config( SMC37c669_PARALLEL_FDC_DRQ_INDEX );
+/*
+** Store local configuration information for parallel port
+*/
+    local_config[PARALLEL_0].port1 = ppt_base.by_field.addr9_2 << 2;
+    local_config[PARALLEL_0].irq =
+	SMC37c669_xlate_irq(
+	    SMC37c669_DEVICE_IRQ( ppt_fdc_irqs.by_field.ppt_irq )
+	);
+    local_config[PARALLEL_0].drq =
+	SMC37c669_xlate_drq(
+	    SMC37c669_DEVICE_DRQ( ppt_fdc_drqs.by_field.ppt_drq )
+	);
+/*
+** Get floppy controller base address
+*/
+    fdc_base.as_uchar = 
+	SMC37c669_read_config( SMC37c669_FDC_BASE_ADDRESS_INDEX );
+/*
+** Store local configuration information for floppy controller
+*/
+    local_config[FLOPPY_0].port1 = fdc_base.by_field.addr9_4 << 4;
+    local_config[FLOPPY_0].irq =
+	SMC37c669_xlate_irq(
+	    SMC37c669_DEVICE_IRQ( ppt_fdc_irqs.by_field.fdc_irq )
+	);
+    local_config[FLOPPY_0].drq =
+	SMC37c669_xlate_drq(
+	    SMC37c669_DEVICE_DRQ( ppt_fdc_drqs.by_field.fdc_drq )
+	);
+/*
+** Get IDE controller base address
+*/
+    ide_base.as_uchar =
+	SMC37c669_read_config( SMC37c669_IDE_BASE_ADDRESS_INDEX );
+/*
+** Get IDE alternate status base address
+*/
+    ide_alt.as_uchar =
+	SMC37c669_read_config( SMC37c669_IDE_ALTERNATE_ADDRESS_INDEX );
+/*
+** Store local configuration information for IDE controller
+*/
+    local_config[IDE_0].port1 = ide_base.by_field.addr9_4 << 4;
+    local_config[IDE_0].port2 = ide_alt.by_field.addr9_4 << 4;
+    local_config[IDE_0].irq = 14;
+}
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function returns a pointer to the local shadow
+**	configuration of the requested device function.
+**
+**  FORMAL PARAMETERS:
+**
+**      func:
+**          Which device function
+**
+**  RETURN VALUE:
+**
+**      Returns a pointer to the DEVICE_CONFIG structure for the
+**	requested function, otherwise, NULL.
+**
+**  SIDE EFFECTS:
+**
+**      {@description or none@}
+**
+**--
+*/
+static struct DEVICE_CONFIG * __init SMC37c669_get_config( unsigned int func )
+{
+    struct DEVICE_CONFIG *cp = NULL;
+
+    switch ( func ) {
+    	case SERIAL_0:
+	    cp = &local_config[ SERIAL_0 ];
+	    break;
+	case SERIAL_1:
+	    cp = &local_config[ SERIAL_1 ];
+	    break;
+	case PARALLEL_0:
+	    cp = &local_config[ PARALLEL_0 ];
+	    break;
+	case FLOPPY_0:
+	    cp = &local_config[ FLOPPY_0 ];
+	    break;
+	case IDE_0:
+	    cp = &local_config[ IDE_0 ];
+	    break;
+    }
+    return cp;
+}
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function translates IRQs back and forth between ISA
+**	IRQs and SMC37c669 device IRQs.
+**
+**  FORMAL PARAMETERS:
+**
+**      irq:
+**          The IRQ to translate
+**
+**  RETURN VALUE:
+**
+**      Returns the translated IRQ, otherwise, returns -1.
+**
+**  SIDE EFFECTS:
+**
+**      {@description or none@}
+**
+**--
+*/
+static int __init SMC37c669_xlate_irq ( int irq )
+{
+    int i, translated_irq = -1;
+
+    if ( SMC37c669_IS_DEVICE_IRQ( irq ) ) {
+/*
+** We are translating a device IRQ to an ISA IRQ
+*/
+    	for ( i = 0; ( SMC37c669_irq_table[i].device_irq != -1 ) || ( SMC37c669_irq_table[i].isa_irq != -1 ); i++ ) {
+	    if ( irq == SMC37c669_irq_table[i].device_irq ) {
+	    	translated_irq = SMC37c669_irq_table[i].isa_irq;
+		break;
+	    }
+	}
+    }
+    else {
+/*
+** We are translating an ISA IRQ to a device IRQ
+*/
+    	for ( i = 0; ( SMC37c669_irq_table[i].isa_irq != -1 ) || ( SMC37c669_irq_table[i].device_irq != -1 ); i++ ) {
+	    if ( irq == SMC37c669_irq_table[i].isa_irq ) {
+	    	translated_irq = SMC37c669_irq_table[i].device_irq;
+		break;
+	    }
+	}
+    }
+    return translated_irq;
+}
+
+
+/*
+**++
+**  FUNCTIONAL DESCRIPTION:
+**
+**      This function translates DMA channels back and forth between
+**	ISA DMA channels and SMC37c669 device DMA channels.
+**
+**  FORMAL PARAMETERS:
+**
+**      drq:
+**          The DMA channel to translate
+**
+**  RETURN VALUE:
+**
+**      Returns the translated DMA channel, otherwise, returns -1
+**
+**  SIDE EFFECTS:
+**
+**      {@description or none@}
+**
+**--
+*/
+static int __init SMC37c669_xlate_drq ( int drq )
+{
+    int i, translated_drq = -1;
+
+    if ( SMC37c669_IS_DEVICE_DRQ( drq ) ) {
+/*
+** We are translating a device DMA channel to an ISA DMA channel
+*/
+    	for ( i = 0; ( SMC37c669_drq_table[i].device_drq != -1 ) || ( SMC37c669_drq_table[i].isa_drq != -1 ); i++ ) {
+	    if ( drq == SMC37c669_drq_table[i].device_drq ) {
+	    	translated_drq = SMC37c669_drq_table[i].isa_drq;
+		break;
+	    }
+	}
+    }
+    else {
+/*
+** We are translating an ISA DMA channel to a device DMA channel
+*/
+    	for ( i = 0; ( SMC37c669_drq_table[i].isa_drq != -1 ) || ( SMC37c669_drq_table[i].device_drq != -1 ); i++ ) {
+	    if ( drq == SMC37c669_drq_table[i].isa_drq ) {
+	    	translated_drq = SMC37c669_drq_table[i].device_drq;
+		break;
+	    }
+	}
+    }
+    return translated_drq;
+}
+
+#if 0
+int __init smcc669_init ( void )
+{
+    struct INODE *ip;
+
+    allocinode( smc_ddb.name, 1, &ip );
+    ip->dva = &smc_ddb;
+    ip->attr = ATTR$M_WRITE | ATTR$M_READ;
+    ip->len[0] = 0x30;
+    ip->misc = 0;
+    INODE_UNLOCK( ip );
+
+    return msg_success;
+}
+
+int __init smcc669_open( struct FILE *fp, char *info, char *next, char *mode )
+{
+    struct INODE *ip;
+/*
+** Allow multiple readers but only one writer.  ip->misc keeps track
+** of the number of writers
+*/
+    ip = fp->ip;
+    INODE_LOCK( ip );
+    if ( fp->mode & ATTR$M_WRITE ) {
+	if ( ip->misc ) {
+	    INODE_UNLOCK( ip );
+	    return msg_failure;	    /* too many writers */
+	}
+	ip->misc++;
+    }
+/*
+** Treat the information field as a byte offset
+*/
+    *fp->offset = xtoi( info );
+    INODE_UNLOCK( ip );
+
+    return msg_success;
+}
+
+int __init smcc669_close( struct FILE *fp )
+{
+    struct INODE *ip;
+
+    ip = fp->ip;
+    if ( fp->mode & ATTR$M_WRITE ) {
+	INODE_LOCK( ip );
+	ip->misc--;
+	INODE_UNLOCK( ip );
+    }
+    return msg_success;
+}
+
+int __init smcc669_read( struct FILE *fp, int size, int number, unsigned char *buf )
+{
+    int i;
+    int length;
+    int nbytes;
+    struct INODE *ip;
+
+/*
+** Always access a byte at a time
+*/
+    ip = fp->ip;
+    length = size * number;
+    nbytes = 0;
+
+    SMC37c669_config_mode( TRUE );
+    for ( i = 0; i < length; i++ ) {
+	if ( !inrange( *fp->offset, 0, ip->len[0] ) ) 
+	    break;
+	*buf++ = SMC37c669_read_config( *fp->offset );
+	*fp->offset += 1;
+	nbytes++;
+    }
+    SMC37c669_config_mode( FALSE );
+    return nbytes;
+}
+
+int __init smcc669_write( struct FILE *fp, int size, int number, unsigned char *buf )
+{
+    int i;
+    int length;
+    int nbytes;
+    struct INODE *ip;
+/*
+** Always access a byte at a time
+*/
+    ip = fp->ip;
+    length = size * number;
+    nbytes = 0;
+
+    SMC37c669_config_mode( TRUE );
+    for ( i = 0; i < length; i++ ) {
+	if ( !inrange( *fp->offset, 0, ip->len[0] ) ) 
+	    break;
+	SMC37c669_write_config( *fp->offset, *buf );
+	*fp->offset += 1;
+	buf++;
+	nbytes++;
+    }
+    SMC37c669_config_mode( FALSE );
+    return nbytes;
+}
+#endif
+
+void __init
+SMC37c669_dump_registers(void)
+{
+  int i;
+  for (i = 0; i <= 0x29; i++)
+    printk("-- CR%02x : %02x\n", i, SMC37c669_read_config(i));
+}
+/*+
+ * ============================================================================
+ * = SMC_init - SMC37c669 Super I/O controller initialization                 =
+ * ============================================================================
+ *
+ * OVERVIEW:
+ *
+ *      This routine configures and enables device functions on the
+ *      SMC37c669 Super I/O controller.
+ *
+ * FORM OF CALL:
+ *
+ *      SMC_init( );
+ *
+ * RETURNS:
+ *
+ *      Nothing
+ *
+ * ARGUMENTS:
+ *
+ *      None
+ *
+ * SIDE EFFECTS:
+ *
+ *      None
+ *
+ */
+void __init SMC669_Init ( int index )
+{
+    SMC37c669_CONFIG_REGS *SMC_base;
+    unsigned long flags;
+
+    local_irq_save(flags);
+    if ( ( SMC_base = SMC37c669_detect( index ) ) != NULL ) {
+#if SMC_DEBUG
+	SMC37c669_config_mode( TRUE );
+	SMC37c669_dump_registers( );
+	SMC37c669_config_mode( FALSE );
+        SMC37c669_display_device_info( );
+#endif
+        SMC37c669_disable_device( SERIAL_0 );
+        SMC37c669_configure_device(
+            SERIAL_0,
+            COM1_BASE,
+            COM1_IRQ,
+            -1
+        );
+        SMC37c669_enable_device( SERIAL_0 );
+
+        SMC37c669_disable_device( SERIAL_1 );
+        SMC37c669_configure_device(
+            SERIAL_1,
+            COM2_BASE,
+            COM2_IRQ,
+            -1
+        );
+        SMC37c669_enable_device( SERIAL_1 );
+
+        SMC37c669_disable_device( PARALLEL_0 );
+        SMC37c669_configure_device(
+            PARALLEL_0,
+            PARP_BASE,
+            PARP_IRQ,
+            PARP_DRQ
+        );
+        SMC37c669_enable_device( PARALLEL_0 );
+
+        SMC37c669_disable_device( FLOPPY_0 );
+        SMC37c669_configure_device(
+            FLOPPY_0,
+            FDC_BASE,
+            FDC_IRQ,
+            FDC_DRQ
+        );
+        SMC37c669_enable_device( FLOPPY_0 );
+          
+	/* Wake up sometimes forgotten floppy, especially on DP264. */
+	outb(0xc, 0x3f2);
+
+        SMC37c669_disable_device( IDE_0 );
+
+#if SMC_DEBUG
+	SMC37c669_config_mode( TRUE );
+	SMC37c669_dump_registers( );
+	SMC37c669_config_mode( FALSE );
+        SMC37c669_display_device_info( );
+#endif
+	local_irq_restore(flags);
+        printk( "SMC37c669 Super I/O Controller found @ 0x%p\n",
+		SMC_base );
+    }
+    else {
+	local_irq_restore(flags);
+#if SMC_DEBUG
+        printk( "No SMC37c669 Super I/O Controller found\n" );
+#endif
+    }
+}
diff --git a/arch/alpha/kernel/smc37c93x.c b/arch/alpha/kernel/smc37c93x.c
new file mode 100644
index 0000000000..71cd7aca38
--- /dev/null
+++ b/arch/alpha/kernel/smc37c93x.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SMC 37C93X initialization code
+ */
+
+#include <linux/kernel.h>
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+
+#include <asm/hwrpb.h>
+#include <asm/io.h>
+
+#define SMC_DEBUG 0
+
+#if SMC_DEBUG
+# define DBG_DEVS(args)         printk args
+#else
+# define DBG_DEVS(args)
+#endif
+
+#define KB              1024
+#define MB              (1024*KB)
+#define GB              (1024*MB)
+
+/* device "activate" register contents */
+#define DEVICE_ON		1
+#define DEVICE_OFF		0
+
+/* configuration on/off keys */
+#define CONFIG_ON_KEY		0x55
+#define CONFIG_OFF_KEY		0xaa
+
+/* configuration space device definitions */
+#define FDC			0
+#define IDE1			1
+#define IDE2			2
+#define PARP			3
+#define SER1			4
+#define SER2			5
+#define RTCL			6
+#define KYBD			7
+#define AUXIO			8
+
+/* Chip register offsets from base */
+#define CONFIG_CONTROL		0x02
+#define INDEX_ADDRESS		0x03
+#define LOGICAL_DEVICE_NUMBER	0x07
+#define DEVICE_ID		0x20
+#define DEVICE_REV		0x21
+#define POWER_CONTROL		0x22
+#define POWER_MGMT		0x23
+#define OSC			0x24
+
+#define ACTIVATE		0x30
+#define ADDR_HI			0x60
+#define ADDR_LO			0x61
+#define INTERRUPT_SEL		0x70
+#define INTERRUPT_SEL_2		0x72 /* KYBD/MOUS only */
+#define DMA_CHANNEL_SEL		0x74 /* FDC/PARP only */
+
+#define FDD_MODE_REGISTER	0x90
+#define FDD_OPTION_REGISTER	0x91
+
+/* values that we read back that are expected ... */
+#define VALID_DEVICE_ID		2
+
+/* default device addresses */
+#define KYBD_INTERRUPT		1
+#define MOUS_INTERRUPT		12
+#define COM2_BASE		0x2f8
+#define COM2_INTERRUPT		3
+#define COM1_BASE		0x3f8
+#define COM1_INTERRUPT		4
+#define PARP_BASE		0x3bc
+#define PARP_INTERRUPT		7
+
+static unsigned long __init SMCConfigState(unsigned long baseAddr)
+{
+	unsigned char devId;
+
+	unsigned long configPort;
+	unsigned long indexPort;
+	unsigned long dataPort;
+
+	int i;
+
+	configPort = indexPort = baseAddr;
+	dataPort = configPort + 1;
+
+#define NUM_RETRIES 5
+
+	for (i = 0; i < NUM_RETRIES; i++)
+	{
+		outb(CONFIG_ON_KEY, configPort);
+		outb(CONFIG_ON_KEY, configPort);
+		outb(DEVICE_ID, indexPort);
+		devId = inb(dataPort);
+		if (devId == VALID_DEVICE_ID) {
+			outb(DEVICE_REV, indexPort);
+			/* unsigned char devRev = */ inb(dataPort);
+			break;
+		}
+		else
+			udelay(100);
+	}
+	return (i != NUM_RETRIES) ? baseAddr : 0L;
+}
+
+static void __init SMCRunState(unsigned long baseAddr)
+{
+	outb(CONFIG_OFF_KEY, baseAddr);
+}
+
+static unsigned long __init SMCDetectUltraIO(void)
+{
+	unsigned long baseAddr;
+
+	baseAddr = 0x3F0;
+	if ( ( baseAddr = SMCConfigState( baseAddr ) ) == 0x3F0 ) {
+		return( baseAddr );
+	}
+	baseAddr = 0x370;
+	if ( ( baseAddr = SMCConfigState( baseAddr ) ) == 0x370 ) {
+		return( baseAddr );
+	}
+	return( ( unsigned long )0 );
+}
+
+static void __init SMCEnableDevice(unsigned long baseAddr,
+			    unsigned long device,
+			    unsigned long portaddr,
+			    unsigned long interrupt)
+{
+	unsigned long indexPort;
+	unsigned long dataPort;
+
+	indexPort = baseAddr;
+	dataPort = baseAddr + 1;
+
+	outb(LOGICAL_DEVICE_NUMBER, indexPort);
+	outb(device, dataPort);
+
+	outb(ADDR_LO, indexPort);
+	outb(( portaddr & 0xFF ), dataPort);
+
+	outb(ADDR_HI, indexPort);
+	outb((portaddr >> 8) & 0xFF, dataPort);
+
+	outb(INTERRUPT_SEL, indexPort);
+	outb(interrupt, dataPort);
+
+	outb(ACTIVATE, indexPort);
+	outb(DEVICE_ON, dataPort);
+}
+
+static void __init SMCEnableKYBD(unsigned long baseAddr)
+{
+	unsigned long indexPort;
+	unsigned long dataPort;
+
+	indexPort = baseAddr;
+	dataPort = baseAddr + 1;
+
+	outb(LOGICAL_DEVICE_NUMBER, indexPort);
+	outb(KYBD, dataPort);
+
+	outb(INTERRUPT_SEL, indexPort); /* Primary interrupt select */
+	outb(KYBD_INTERRUPT, dataPort);
+
+	outb(INTERRUPT_SEL_2, indexPort); /* Secondary interrupt select */
+	outb(MOUS_INTERRUPT, dataPort);
+
+	outb(ACTIVATE, indexPort);
+	outb(DEVICE_ON, dataPort);
+}
+
+static void __init SMCEnableFDC(unsigned long baseAddr)
+{
+	unsigned long indexPort;
+	unsigned long dataPort;
+
+	unsigned char oldValue;
+
+	indexPort = baseAddr;
+	dataPort = baseAddr + 1;
+
+	outb(LOGICAL_DEVICE_NUMBER, indexPort);
+	outb(FDC, dataPort);
+
+	outb(FDD_MODE_REGISTER, indexPort);
+	oldValue = inb(dataPort);
+
+	oldValue |= 0x0E;                   /* Enable burst mode */
+	outb(oldValue, dataPort);
+
+	outb(INTERRUPT_SEL, indexPort);	    /* Primary interrupt select */
+	outb(0x06, dataPort );
+
+	outb(DMA_CHANNEL_SEL, indexPort);   /* DMA channel select */
+	outb(0x02, dataPort);
+
+	outb(ACTIVATE, indexPort);
+	outb(DEVICE_ON, dataPort);
+}
+
+#if SMC_DEBUG
+static void __init SMCReportDeviceStatus(unsigned long baseAddr)
+{
+	unsigned long indexPort;
+	unsigned long dataPort;
+	unsigned char currentControl;
+
+	indexPort = baseAddr;
+	dataPort = baseAddr + 1;
+
+	outb(POWER_CONTROL, indexPort);
+	currentControl = inb(dataPort);
+
+	printk(currentControl & (1 << FDC)
+	       ? "\t+FDC Enabled\n" : "\t-FDC Disabled\n");
+	printk(currentControl & (1 << IDE1)
+	       ? "\t+IDE1 Enabled\n" : "\t-IDE1 Disabled\n");
+	printk(currentControl & (1 << IDE2)
+	       ? "\t+IDE2 Enabled\n" : "\t-IDE2 Disabled\n");
+	printk(currentControl & (1 << PARP)
+	       ? "\t+PARP Enabled\n" : "\t-PARP Disabled\n");
+	printk(currentControl & (1 << SER1)
+	       ? "\t+SER1 Enabled\n" : "\t-SER1 Disabled\n");
+	printk(currentControl & (1 << SER2)
+	       ? "\t+SER2 Enabled\n" : "\t-SER2 Disabled\n");
+
+	printk( "\n" );
+}
+#endif
+
+int __init SMC93x_Init(void)
+{
+	unsigned long SMCUltraBase;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if ((SMCUltraBase = SMCDetectUltraIO()) != 0UL) {
+#if SMC_DEBUG
+		SMCReportDeviceStatus(SMCUltraBase);
+#endif
+		SMCEnableDevice(SMCUltraBase, SER1, COM1_BASE, COM1_INTERRUPT);
+		DBG_DEVS(("SMC FDC37C93X: SER1 done\n"));
+		SMCEnableDevice(SMCUltraBase, SER2, COM2_BASE, COM2_INTERRUPT);
+		DBG_DEVS(("SMC FDC37C93X: SER2 done\n"));
+		SMCEnableDevice(SMCUltraBase, PARP, PARP_BASE, PARP_INTERRUPT);
+		DBG_DEVS(("SMC FDC37C93X: PARP done\n"));
+		/* On PC164, IDE on the SMC is not enabled;
+		   CMD646 (PCI) on MB */
+		SMCEnableKYBD(SMCUltraBase);
+		DBG_DEVS(("SMC FDC37C93X: KYB done\n"));
+		SMCEnableFDC(SMCUltraBase);
+		DBG_DEVS(("SMC FDC37C93X: FDC done\n"));
+#if SMC_DEBUG
+		SMCReportDeviceStatus(SMCUltraBase);
+#endif
+		SMCRunState(SMCUltraBase);
+		local_irq_restore(flags);
+		printk("SMC FDC37C93X Ultra I/O Controller found @ 0x%lx\n",
+		       SMCUltraBase);
+		return 1;
+	}
+	else {
+		local_irq_restore(flags);
+		DBG_DEVS(("No SMC FDC37C93X Ultra I/O Controller found\n"));
+		return 0;
+	}
+}
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
new file mode 100644
index 0000000000..cb64e4797d
--- /dev/null
+++ b/arch/alpha/kernel/smp.c
@@ -0,0 +1,768 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/smp.c
+ *
+ *      2001-07-09 Phil Ezolt (Phillip.Ezolt@compaq.com)
+ *            Renamed modified smp_call_function to smp_call_function_on_cpu()
+ *            Created an function that conforms to the old calling convention
+ *            of smp_call_function().
+ *
+ *            This is helpful for DCPI.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/sched/mm.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/cache.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#include <linux/cpu.h>
+
+#include <asm/hwrpb.h>
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+
+
+#define DEBUG_SMP 0
+#if DEBUG_SMP
+#define DBGS(args)	printk args
+#else
+#define DBGS(args)
+#endif
+
+/* A collection of per-processor data.  */
+struct cpuinfo_alpha cpu_data[NR_CPUS];
+EXPORT_SYMBOL(cpu_data);
+
+/* A collection of single bit ipi messages.  */
+static struct {
+	unsigned long bits ____cacheline_aligned;
+} ipi_data[NR_CPUS] __cacheline_aligned;
+
+enum ipi_message_type {
+	IPI_RESCHEDULE,
+	IPI_CALL_FUNC,
+	IPI_CPU_STOP,
+};
+
+/* Set to a secondary's cpuid when it comes online.  */
+static int smp_secondary_alive = 0;
+
+int smp_num_probed;		/* Internal processor count */
+int smp_num_cpus = 1;		/* Number that came online.  */
+EXPORT_SYMBOL(smp_num_cpus);
+
+/*
+ * Called by both boot and secondaries to move global data into
+ *  per-processor storage.
+ */
+static inline void __init
+smp_store_cpu_info(int cpuid)
+{
+	cpu_data[cpuid].loops_per_jiffy = loops_per_jiffy;
+	cpu_data[cpuid].last_asn = ASN_FIRST_VERSION;
+	cpu_data[cpuid].need_new_asn = 0;
+	cpu_data[cpuid].asn_lock = 0;
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks.  Doesn't do much now...
+ */
+static inline void __init
+smp_setup_percpu_timer(int cpuid)
+{
+	cpu_data[cpuid].prof_counter = 1;
+	cpu_data[cpuid].prof_multiplier = 1;
+}
+
+static void __init
+wait_boot_cpu_to_stop(int cpuid)
+{
+	unsigned long stop = jiffies + 10*HZ;
+
+	while (time_before(jiffies, stop)) {
+	        if (!smp_secondary_alive)
+			return;
+		barrier();
+	}
+
+	printk("wait_boot_cpu_to_stop: FAILED on CPU %d, hanging now\n", cpuid);
+	for (;;)
+		barrier();
+}
+
+/*
+ * Where secondaries begin a life of C.
+ */
+void __init
+smp_callin(void)
+{
+	int cpuid = hard_smp_processor_id();
+
+	if (cpu_online(cpuid)) {
+		printk("??, cpu 0x%x already present??\n", cpuid);
+		BUG();
+	}
+	set_cpu_online(cpuid, true);
+
+	/* Turn on machine checks.  */
+	wrmces(7);
+
+	/* Set trap vectors.  */
+	trap_init();
+
+	/* Set interrupt vector.  */
+	wrent(entInt, 0);
+
+	/* Get our local ticker going. */
+	smp_setup_percpu_timer(cpuid);
+	init_clockevent();
+
+	/* Call platform-specific callin, if specified */
+	if (alpha_mv.smp_callin)
+		alpha_mv.smp_callin();
+
+	/* All kernel threads share the same mm context.  */
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+
+	/* inform the notifiers about the new cpu */
+	notify_cpu_starting(cpuid);
+
+	/* Must have completely accurate bogos.  */
+	local_irq_enable();
+
+	/* Wait boot CPU to stop with irq enabled before running
+	   calibrate_delay. */
+	wait_boot_cpu_to_stop(cpuid);
+	mb();
+	calibrate_delay();
+
+	smp_store_cpu_info(cpuid);
+	/* Allow master to continue only after we written loops_per_jiffy.  */
+	wmb();
+	smp_secondary_alive = 1;
+
+	DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n",
+	      cpuid, current, current->active_mm));
+
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+/* Wait until hwrpb->txrdy is clear for cpu.  Return -1 on timeout.  */
+static int
+wait_for_txrdy (unsigned long cpumask)
+{
+	unsigned long timeout;
+
+	if (!(hwrpb->txrdy & cpumask))
+		return 0;
+
+	timeout = jiffies + 10*HZ;
+	while (time_before(jiffies, timeout)) {
+		if (!(hwrpb->txrdy & cpumask))
+			return 0;
+		udelay(10);
+		barrier();
+	}
+
+	return -1;
+}
+
+/*
+ * Send a message to a secondary's console.  "START" is one such
+ * interesting message.  ;-)
+ */
+static void
+send_secondary_console_msg(char *str, int cpuid)
+{
+	struct percpu_struct *cpu;
+	register char *cp1, *cp2;
+	unsigned long cpumask;
+	size_t len;
+
+	cpu = (struct percpu_struct *)
+		((char*)hwrpb
+		 + hwrpb->processor_offset
+		 + cpuid * hwrpb->processor_size);
+
+	cpumask = (1UL << cpuid);
+	if (wait_for_txrdy(cpumask))
+		goto timeout;
+
+	cp2 = str;
+	len = strlen(cp2);
+	*(unsigned int *)&cpu->ipc_buffer[0] = len;
+	cp1 = (char *) &cpu->ipc_buffer[1];
+	memcpy(cp1, cp2, len);
+
+	/* atomic test and set */
+	wmb();
+	set_bit(cpuid, &hwrpb->rxrdy);
+
+	if (wait_for_txrdy(cpumask))
+		goto timeout;
+	return;
+
+ timeout:
+	printk("Processor %x not ready\n", cpuid);
+}
+
+/*
+ * A secondary console wants to send a message.  Receive it.
+ */
+static void
+recv_secondary_console_msg(void)
+{
+	int mycpu, i, cnt;
+	unsigned long txrdy = hwrpb->txrdy;
+	char *cp1, *cp2, buf[80];
+	struct percpu_struct *cpu;
+
+	DBGS(("recv_secondary_console_msg: TXRDY 0x%lx.\n", txrdy));
+
+	mycpu = hard_smp_processor_id();
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!(txrdy & (1UL << i)))
+			continue;
+
+		DBGS(("recv_secondary_console_msg: "
+		      "TXRDY contains CPU %d.\n", i));
+
+		cpu = (struct percpu_struct *)
+		  ((char*)hwrpb
+		   + hwrpb->processor_offset
+		   + i * hwrpb->processor_size);
+
+ 		DBGS(("recv_secondary_console_msg: on %d from %d"
+		      " HALT_REASON 0x%lx FLAGS 0x%lx\n",
+		      mycpu, i, cpu->halt_reason, cpu->flags));
+
+		cnt = cpu->ipc_buffer[0] >> 32;
+		if (cnt <= 0 || cnt >= 80)
+			strcpy(buf, "<<< BOGUS MSG >>>");
+		else {
+			cp1 = (char *) &cpu->ipc_buffer[1];
+			cp2 = buf;
+			memcpy(cp2, cp1, cnt);
+			cp2[cnt] = '\0';
+			
+			while ((cp2 = strchr(cp2, '\r')) != 0) {
+				*cp2 = ' ';
+				if (cp2[1] == '\n')
+					cp2[1] = ' ';
+			}
+		}
+
+		DBGS((KERN_INFO "recv_secondary_console_msg: on %d "
+		      "message is '%s'\n", mycpu, buf));
+	}
+
+	hwrpb->txrdy = 0;
+}
+
+/*
+ * Convince the console to have a secondary cpu begin execution.
+ */
+static int
+secondary_cpu_start(int cpuid, struct task_struct *idle)
+{
+	struct percpu_struct *cpu;
+	struct pcb_struct *hwpcb, *ipcb;
+	unsigned long timeout;
+	  
+	cpu = (struct percpu_struct *)
+		((char*)hwrpb
+		 + hwrpb->processor_offset
+		 + cpuid * hwrpb->processor_size);
+	hwpcb = (struct pcb_struct *) cpu->hwpcb;
+	ipcb = &task_thread_info(idle)->pcb;
+
+	/* Initialize the CPU's HWPCB to something just good enough for
+	   us to get started.  Immediately after starting, we'll swpctx
+	   to the target idle task's pcb.  Reuse the stack in the mean
+	   time.  Precalculate the target PCBB.  */
+	hwpcb->ksp = (unsigned long)ipcb + sizeof(union thread_union) - 16;
+	hwpcb->usp = 0;
+	hwpcb->ptbr = ipcb->ptbr;
+	hwpcb->pcc = 0;
+	hwpcb->asn = 0;
+	hwpcb->unique = virt_to_phys(ipcb);
+	hwpcb->flags = ipcb->flags;
+	hwpcb->res1 = hwpcb->res2 = 0;
+
+#if 0
+	DBGS(("KSP 0x%lx PTBR 0x%lx VPTBR 0x%lx UNIQUE 0x%lx\n",
+	      hwpcb->ksp, hwpcb->ptbr, hwrpb->vptb, hwpcb->unique));
+#endif
+	DBGS(("Starting secondary cpu %d: state 0x%lx pal_flags 0x%lx\n",
+	      cpuid, idle->state, ipcb->flags));
+
+	/* Setup HWRPB fields that SRM uses to activate secondary CPU */
+	hwrpb->CPU_restart = __smp_callin;
+	hwrpb->CPU_restart_data = (unsigned long) __smp_callin;
+
+	/* Recalculate and update the HWRPB checksum */
+	hwrpb_update_checksum(hwrpb);
+
+	/*
+	 * Send a "start" command to the specified processor.
+	 */
+
+	/* SRM III 3.4.1.3 */
+	cpu->flags |= 0x22;	/* turn on Context Valid and Restart Capable */
+	cpu->flags &= ~1;	/* turn off Bootstrap In Progress */
+	wmb();
+
+	send_secondary_console_msg("START\r\n", cpuid);
+
+	/* Wait 10 seconds for an ACK from the console.  */
+	timeout = jiffies + 10*HZ;
+	while (time_before(jiffies, timeout)) {
+		if (cpu->flags & 1)
+			goto started;
+		udelay(10);
+		barrier();
+	}
+	printk(KERN_ERR "SMP: Processor %d failed to start.\n", cpuid);
+	return -1;
+
+ started:
+	DBGS(("secondary_cpu_start: SUCCESS for CPU %d!!!\n", cpuid));
+	return 0;
+}
+
+/*
+ * Bring one cpu online.
+ */
+static int
+smp_boot_one_cpu(int cpuid, struct task_struct *idle)
+{
+	unsigned long timeout;
+
+	/* Signal the secondary to wait a moment.  */
+	smp_secondary_alive = -1;
+
+	/* Whirrr, whirrr, whirrrrrrrrr... */
+	if (secondary_cpu_start(cpuid, idle))
+		return -1;
+
+	/* Notify the secondary CPU it can run calibrate_delay.  */
+	mb();
+	smp_secondary_alive = 0;
+
+	/* We've been acked by the console; wait one second for
+	   the task to start up for real.  */
+	timeout = jiffies + 1*HZ;
+	while (time_before(jiffies, timeout)) {
+		if (smp_secondary_alive == 1)
+			goto alive;
+		udelay(10);
+		barrier();
+	}
+
+	/* We failed to boot the CPU.  */
+
+	printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid);
+	return -1;
+
+ alive:
+	/* Another "Red Snapper". */
+	return 0;
+}
+
+/*
+ * Called from setup_arch.  Detect an SMP system and which processors
+ * are present.
+ */
+void __init
+setup_smp(void)
+{
+	struct percpu_struct *cpubase, *cpu;
+	unsigned long i;
+
+	if (boot_cpuid != 0) {
+		printk(KERN_WARNING "SMP: Booting off cpu %d instead of 0?\n",
+		       boot_cpuid);
+	}
+
+	if (hwrpb->nr_processors > 1) {
+		int boot_cpu_palrev;
+
+		DBGS(("setup_smp: nr_processors %ld\n",
+		      hwrpb->nr_processors));
+
+		cpubase = (struct percpu_struct *)
+			((char*)hwrpb + hwrpb->processor_offset);
+		boot_cpu_palrev = cpubase->pal_revision;
+
+		for (i = 0; i < hwrpb->nr_processors; i++) {
+			cpu = (struct percpu_struct *)
+				((char *)cpubase + i*hwrpb->processor_size);
+			if ((cpu->flags & 0x1cc) == 0x1cc) {
+				smp_num_probed++;
+				set_cpu_possible(i, true);
+				set_cpu_present(i, true);
+				cpu->pal_revision = boot_cpu_palrev;
+			}
+
+			DBGS(("setup_smp: CPU %d: flags 0x%lx type 0x%lx\n",
+			      i, cpu->flags, cpu->type));
+			DBGS(("setup_smp: CPU %d: PAL rev 0x%lx\n",
+			      i, cpu->pal_revision));
+		}
+	} else {
+		smp_num_probed = 1;
+	}
+
+	printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n",
+	       smp_num_probed, cpumask_bits(cpu_present_mask)[0]);
+}
+
+/*
+ * Called by smp_init prepare the secondaries
+ */
+void __init
+smp_prepare_cpus(unsigned int max_cpus)
+{
+	/* Take care of some initial bookkeeping.  */
+	memset(ipi_data, 0, sizeof(ipi_data));
+
+	current_thread_info()->cpu = boot_cpuid;
+
+	smp_store_cpu_info(boot_cpuid);
+	smp_setup_percpu_timer(boot_cpuid);
+
+	/* Nothing to do on a UP box, or when told not to.  */
+	if (smp_num_probed == 1 || max_cpus == 0) {
+		init_cpu_possible(cpumask_of(boot_cpuid));
+		init_cpu_present(cpumask_of(boot_cpuid));
+		printk(KERN_INFO "SMP mode deactivated.\n");
+		return;
+	}
+
+	printk(KERN_INFO "SMP starting up secondaries.\n");
+
+	smp_num_cpus = smp_num_probed;
+}
+
+void
+smp_prepare_boot_cpu(void)
+{
+}
+
+int
+__cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	smp_boot_one_cpu(cpu, tidle);
+
+	return cpu_online(cpu) ? 0 : -ENOSYS;
+}
+
+void __init
+smp_cpus_done(unsigned int max_cpus)
+{
+	int cpu;
+	unsigned long bogosum = 0;
+
+	for(cpu = 0; cpu < NR_CPUS; cpu++) 
+		if (cpu_online(cpu))
+			bogosum += cpu_data[cpu].loops_per_jiffy;
+	
+	printk(KERN_INFO "SMP: Total of %d processors activated "
+	       "(%lu.%02lu BogoMIPS).\n",
+	       num_online_cpus(), 
+	       (bogosum + 2500) / (500000/HZ),
+	       ((bogosum + 2500) / (5000/HZ)) % 100);
+}
+
+int
+setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+static void
+send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
+{
+	int i;
+
+	mb();
+	for_each_cpu(i, to_whom)
+		set_bit(operation, &ipi_data[i].bits);
+
+	mb();
+	for_each_cpu(i, to_whom)
+		wripir(i);
+}
+
+void
+handle_ipi(struct pt_regs *regs)
+{
+	int this_cpu = smp_processor_id();
+	unsigned long *pending_ipis = &ipi_data[this_cpu].bits;
+	unsigned long ops;
+
+#if 0
+	DBGS(("handle_ipi: on CPU %d ops 0x%lx PC 0x%lx\n",
+	      this_cpu, *pending_ipis, regs->pc));
+#endif
+
+	mb();	/* Order interrupt and bit testing. */
+	while ((ops = xchg(pending_ipis, 0)) != 0) {
+	  mb();	/* Order bit clearing and data access. */
+	  do {
+		unsigned long which;
+
+		which = ops & -ops;
+		ops &= ~which;
+		which = __ffs(which);
+
+		switch (which) {
+		case IPI_RESCHEDULE:
+			scheduler_ipi();
+			break;
+
+		case IPI_CALL_FUNC:
+			generic_smp_call_function_interrupt();
+			break;
+
+		case IPI_CPU_STOP:
+			halt();
+
+		default:
+			printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
+			       this_cpu, which);
+			break;
+		}
+	  } while (ops);
+
+	  mb();	/* Order data access and bit testing. */
+	}
+
+	cpu_data[this_cpu].ipi_count++;
+
+	if (hwrpb->txrdy)
+		recv_secondary_console_msg();
+}
+
+void
+smp_send_reschedule(int cpu)
+{
+#ifdef DEBUG_IPI_MSG
+	if (cpu == hard_smp_processor_id())
+		printk(KERN_WARNING
+		       "smp_send_reschedule: Sending IPI to self.\n");
+#endif
+	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+void
+smp_send_stop(void)
+{
+	cpumask_t to_whom;
+	cpumask_copy(&to_whom, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &to_whom);
+#ifdef DEBUG_IPI_MSG
+	if (hard_smp_processor_id() != boot_cpu_id)
+		printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n");
+#endif
+	send_ipi_message(&to_whom, IPI_CPU_STOP);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	send_ipi_message(mask, IPI_CALL_FUNC);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
+}
+
+static void
+ipi_imb(void *ignored)
+{
+	imb();
+}
+
+void
+smp_imb(void)
+{
+	/* Must wait other processors to flush their icache before continue. */
+	on_each_cpu(ipi_imb, NULL, 1);
+}
+EXPORT_SYMBOL(smp_imb);
+
+static void
+ipi_flush_tlb_all(void *ignored)
+{
+	tbia();
+}
+
+void
+flush_tlb_all(void)
+{
+	/* Although we don't have any data to pass, we do want to
+	   synchronize with the other processors.  */
+	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+}
+
+#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
+
+static void
+ipi_flush_tlb_mm(void *x)
+{
+	struct mm_struct *mm = (struct mm_struct *) x;
+	if (mm == current->active_mm && !asn_locked())
+		flush_tlb_current(mm);
+	else
+		flush_tlb_other(mm);
+}
+
+void
+flush_tlb_mm(struct mm_struct *mm)
+{
+	preempt_disable();
+
+	if (mm == current->active_mm) {
+		flush_tlb_current(mm);
+		if (atomic_read(&mm->mm_users) <= 1) {
+			int cpu, this_cpu = smp_processor_id();
+			for (cpu = 0; cpu < NR_CPUS; cpu++) {
+				if (!cpu_online(cpu) || cpu == this_cpu)
+					continue;
+				if (mm->context[cpu])
+					mm->context[cpu] = 0;
+			}
+			preempt_enable();
+			return;
+		}
+	}
+
+	smp_call_function(ipi_flush_tlb_mm, mm, 1);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_mm);
+
+struct flush_tlb_page_struct {
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	unsigned long addr;
+};
+
+static void
+ipi_flush_tlb_page(void *x)
+{
+	struct flush_tlb_page_struct *data = (struct flush_tlb_page_struct *)x;
+	struct mm_struct * mm = data->mm;
+
+	if (mm == current->active_mm && !asn_locked())
+		flush_tlb_current_page(mm, data->vma, data->addr);
+	else
+		flush_tlb_other(mm);
+}
+
+void
+flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct flush_tlb_page_struct data;
+	struct mm_struct *mm = vma->vm_mm;
+
+	preempt_disable();
+
+	if (mm == current->active_mm) {
+		flush_tlb_current_page(mm, vma, addr);
+		if (atomic_read(&mm->mm_users) <= 1) {
+			int cpu, this_cpu = smp_processor_id();
+			for (cpu = 0; cpu < NR_CPUS; cpu++) {
+				if (!cpu_online(cpu) || cpu == this_cpu)
+					continue;
+				if (mm->context[cpu])
+					mm->context[cpu] = 0;
+			}
+			preempt_enable();
+			return;
+		}
+	}
+
+	data.vma = vma;
+	data.mm = mm;
+	data.addr = addr;
+
+	smp_call_function(ipi_flush_tlb_page, &data, 1);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+void
+flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+	/* On the Alpha we always flush the whole user tlb.  */
+	flush_tlb_mm(vma->vm_mm);
+}
+EXPORT_SYMBOL(flush_tlb_range);
+
+static void
+ipi_flush_icache_page(void *x)
+{
+	struct mm_struct *mm = (struct mm_struct *) x;
+	if (mm == current->active_mm && !asn_locked())
+		__load_new_mm_context(mm);
+	else
+		flush_tlb_other(mm);
+}
+
+void
+flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+			unsigned long addr, int len)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if ((vma->vm_flags & VM_EXEC) == 0)
+		return;
+
+	preempt_disable();
+
+	if (mm == current->active_mm) {
+		__load_new_mm_context(mm);
+		if (atomic_read(&mm->mm_users) <= 1) {
+			int cpu, this_cpu = smp_processor_id();
+			for (cpu = 0; cpu < NR_CPUS; cpu++) {
+				if (!cpu_online(cpu) || cpu == this_cpu)
+					continue;
+				if (mm->context[cpu])
+					mm->context[cpu] = 0;
+			}
+			preempt_enable();
+			return;
+		}
+	}
+
+	smp_call_function(ipi_flush_icache_page, mm, 1);
+
+	preempt_enable();
+}
diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c
new file mode 100644
index 0000000000..528d2be581
--- /dev/null
+++ b/arch/alpha/kernel/srm_env.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * srm_env.c - Access to SRM environment
+ *             variables through linux' procfs
+ *
+ * (C) 2001,2002,2006 by Jan-Benedict Glaw <jbglaw@lug-owl.de>
+ *
+ * This driver is a modified version of Erik Mouw's example proc
+ * interface, so: thank you, Erik! He can be reached via email at
+ * <J.A.K.Mouw@its.tudelft.nl>. It is based on an idea
+ * provided by DEC^WCompaq^WIntel's "Jumpstart" CD. They
+ * included a patch like this as well. Thanks for idea!
+ */
+
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/console.h>
+#include <linux/uaccess.h>
+#include <asm/machvec.h>
+
+#define BASE_DIR	"srm_environment"	/* Subdir in /proc/		*/
+#define NAMED_DIR	"named_variables"	/* Subdir for known variables	*/
+#define NUMBERED_DIR	"numbered_variables"	/* Subdir for all variables	*/
+#define VERSION		"0.0.6"			/* Module version		*/
+#define NAME		"srm_env"		/* Module name			*/
+
+MODULE_AUTHOR("Jan-Benedict Glaw <jbglaw@lug-owl.de>");
+MODULE_DESCRIPTION("Accessing Alpha SRM environment through procfs interface");
+MODULE_LICENSE("GPL");
+
+typedef struct _srm_env {
+	char			*name;
+	unsigned long		id;
+} srm_env_t;
+
+static struct proc_dir_entry	*base_dir;
+static struct proc_dir_entry	*named_dir;
+static struct proc_dir_entry	*numbered_dir;
+
+static srm_env_t	srm_named_entries[] = {
+	{ "auto_action",	ENV_AUTO_ACTION		},
+	{ "boot_dev",		ENV_BOOT_DEV		},
+	{ "bootdef_dev",	ENV_BOOTDEF_DEV		},
+	{ "booted_dev",		ENV_BOOTED_DEV		},
+	{ "boot_file",		ENV_BOOT_FILE		},
+	{ "booted_file",	ENV_BOOTED_FILE		},
+	{ "boot_osflags",	ENV_BOOT_OSFLAGS	},
+	{ "booted_osflags",	ENV_BOOTED_OSFLAGS	},
+	{ "boot_reset",		ENV_BOOT_RESET		},
+	{ "dump_dev",		ENV_DUMP_DEV		},
+	{ "enable_audit",	ENV_ENABLE_AUDIT	},
+	{ "license",		ENV_LICENSE		},
+	{ "char_set",		ENV_CHAR_SET		},
+	{ "language",		ENV_LANGUAGE		},
+	{ "tty_dev",		ENV_TTY_DEV		},
+	{ NULL,			0			},
+};
+
+static int srm_env_proc_show(struct seq_file *m, void *v)
+{
+	unsigned long	ret;
+	unsigned long	id = (unsigned long)m->private;
+	char		*page;
+
+	page = (char *)__get_free_page(GFP_USER);
+	if (!page)
+		return -ENOMEM;
+
+	ret = callback_getenv(id, page, PAGE_SIZE);
+
+	if ((ret >> 61) == 0) {
+		seq_write(m, page, ret);
+		ret = 0;
+	} else
+		ret = -EFAULT;
+	free_page((unsigned long)page);
+	return ret;
+}
+
+static int srm_env_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, srm_env_proc_show, PDE_DATA(inode));
+}
+
+static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer,
+				  size_t count, loff_t *pos)
+{
+	int res;
+	unsigned long	id = (unsigned long)PDE_DATA(file_inode(file));
+	char		*buf = (char *) __get_free_page(GFP_USER);
+	unsigned long	ret1, ret2;
+
+	if (!buf)
+		return -ENOMEM;
+
+	res = -EINVAL;
+	if (count >= PAGE_SIZE)
+		goto out;
+
+	res = -EFAULT;
+	if (copy_from_user(buf, buffer, count))
+		goto out;
+	buf[count] = '\0';
+
+	ret1 = callback_setenv(id, buf, count);
+	if ((ret1 >> 61) == 0) {
+		do
+			ret2 = callback_save_env();
+		while((ret2 >> 61) == 1);
+		res = (int) ret1;
+	}
+
+ out:
+	free_page((unsigned long)buf);
+	return res;
+}
+
+static const struct proc_ops srm_env_proc_ops = {
+	.proc_open	= srm_env_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+	.proc_write	= srm_env_proc_write,
+};
+
+static int __init
+srm_env_init(void)
+{
+	srm_env_t	*entry;
+	unsigned long	var_num;
+
+	/*
+	 * Check system
+	 */
+	if (!alpha_using_srm) {
+		printk(KERN_INFO "%s: This Alpha system doesn't "
+				"know about SRM (or you've booted "
+				"SRM->MILO->Linux, which gets "
+				"misdetected)...\n", __func__);
+		return -ENODEV;
+	}
+
+	/*
+	 * Create base directory
+	 */
+	base_dir = proc_mkdir(BASE_DIR, NULL);
+	if (!base_dir) {
+		printk(KERN_ERR "Couldn't create base dir /proc/%s\n",
+				BASE_DIR);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Create per-name subdirectory
+	 */
+	named_dir = proc_mkdir(NAMED_DIR, base_dir);
+	if (!named_dir) {
+		printk(KERN_ERR "Couldn't create dir /proc/%s/%s\n",
+				BASE_DIR, NAMED_DIR);
+		goto cleanup;
+	}
+
+	/*
+	 * Create per-number subdirectory
+	 */
+	numbered_dir = proc_mkdir(NUMBERED_DIR, base_dir);
+	if (!numbered_dir) {
+		printk(KERN_ERR "Couldn't create dir /proc/%s/%s\n",
+				BASE_DIR, NUMBERED_DIR);
+		goto cleanup;
+
+	}
+
+	/*
+	 * Create all named nodes
+	 */
+	entry = srm_named_entries;
+	while (entry->name && entry->id) {
+		if (!proc_create_data(entry->name, 0644, named_dir,
+			     &srm_env_proc_ops, (void *)entry->id))
+			goto cleanup;
+		entry++;
+	}
+
+	/*
+	 * Create all numbered nodes
+	 */
+	for (var_num = 0; var_num <= 255; var_num++) {
+		char name[4];
+		sprintf(name, "%ld", var_num);
+		if (!proc_create_data(name, 0644, numbered_dir,
+			     &srm_env_proc_ops, (void *)var_num))
+			goto cleanup;
+	}
+
+	printk(KERN_INFO "%s: version %s loaded successfully\n", NAME,
+			VERSION);
+
+	return 0;
+
+cleanup:
+	remove_proc_subtree(BASE_DIR, NULL);
+	return -ENOMEM;
+}
+
+static void __exit
+srm_env_exit(void)
+{
+	remove_proc_subtree(BASE_DIR, NULL);
+	printk(KERN_INFO "%s: unloaded successfully\n", NAME);
+}
+
+module_init(srm_env_init);
+module_exit(srm_env_exit);
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
new file mode 100644
index 0000000000..6dc952b0df
--- /dev/null
+++ b/arch/alpha/kernel/srmcons.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/srmcons.c
+ *
+ * Callback based driver for SRM Console console device.
+ * (TTY driver and console driver)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+
+#include <asm/console.h>
+#include <linux/uaccess.h>
+
+
+static DEFINE_SPINLOCK(srmcons_callback_lock);
+static int srm_is_registered_console = 0;
+
+/* 
+ * The TTY driver
+ */
+#define MAX_SRM_CONSOLE_DEVICES 1	/* only support 1 console device */
+
+struct srmcons_private {
+	struct tty_port port;
+	struct timer_list timer;
+} srmcons_singleton;
+
+typedef union _srmcons_result {
+	struct {
+		unsigned long c :61;
+		unsigned long status :3;
+	} bits;
+	long as_long;
+} srmcons_result;
+
+/* called with callback_lock held */
+static int
+srmcons_do_receive_chars(struct tty_port *port)
+{
+	srmcons_result result;
+	int count = 0, loops = 0;
+
+	do {
+		result.as_long = callback_getc(0);
+		if (result.bits.status < 2) {
+			tty_insert_flip_char(port, (char)result.bits.c, 0);
+			count++;
+		}
+	} while((result.bits.status & 1) && (++loops < 10));
+
+	if (count)
+		tty_flip_buffer_push(port);
+
+	return count;
+}
+
+static void
+srmcons_receive_chars(struct timer_list *t)
+{
+	struct srmcons_private *srmconsp = from_timer(srmconsp, t, timer);
+	struct tty_port *port = &srmconsp->port;
+	unsigned long flags;
+	int incr = 10;
+
+	local_irq_save(flags);
+	if (spin_trylock(&srmcons_callback_lock)) {
+		if (!srmcons_do_receive_chars(port))
+			incr = 100;
+		spin_unlock(&srmcons_callback_lock);
+	} 
+
+	spin_lock(&port->lock);
+	if (port->tty)
+		mod_timer(&srmconsp->timer, jiffies + incr);
+	spin_unlock(&port->lock);
+
+	local_irq_restore(flags);
+}
+
+/* called with callback_lock held */
+static int
+srmcons_do_write(struct tty_port *port, const char *buf, int count)
+{
+	static char str_cr[1] = "\r";
+	long c, remaining = count;
+	srmcons_result result;
+	char *cur;
+	int need_cr;
+
+	for (cur = (char *)buf; remaining > 0; ) {
+		need_cr = 0;
+		/* 
+		 * Break it up into reasonable size chunks to allow a chance
+		 * for input to get in
+		 */
+		for (c = 0; c < min_t(long, 128L, remaining) && !need_cr; c++)
+			if (cur[c] == '\n')
+				need_cr = 1;
+		
+		while (c > 0) {
+			result.as_long = callback_puts(0, cur, c);
+			c -= result.bits.c;
+			remaining -= result.bits.c;
+			cur += result.bits.c;
+
+			/*
+			 * Check for pending input iff a tty port was provided
+			 */
+			if (port)
+				srmcons_do_receive_chars(port);
+		}
+
+		while (need_cr) {
+			result.as_long = callback_puts(0, str_cr, 1);
+			if (result.bits.c > 0)
+				need_cr = 0;
+		}
+	}
+	return count;
+}
+
+static int
+srmcons_write(struct tty_struct *tty,
+	      const unsigned char *buf, int count)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&srmcons_callback_lock, flags);
+	srmcons_do_write(tty->port, (const char *) buf, count);
+	spin_unlock_irqrestore(&srmcons_callback_lock, flags);
+
+	return count;
+}
+
+static unsigned int
+srmcons_write_room(struct tty_struct *tty)
+{
+	return 512;
+}
+
+static int
+srmcons_open(struct tty_struct *tty, struct file *filp)
+{
+	struct srmcons_private *srmconsp = &srmcons_singleton;
+	struct tty_port *port = &srmconsp->port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	if (!port->tty) {
+		tty->driver_data = srmconsp;
+		tty->port = port;
+		port->tty = tty; /* XXX proper refcounting */
+		mod_timer(&srmconsp->timer, jiffies + 10);
+	}
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 0;
+}
+
+static void
+srmcons_close(struct tty_struct *tty, struct file *filp)
+{
+	struct srmcons_private *srmconsp = tty->driver_data;
+	struct tty_port *port = &srmconsp->port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	if (tty->count == 1) {
+		port->tty = NULL;
+		del_timer(&srmconsp->timer);
+	}
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+
+static struct tty_driver *srmcons_driver;
+
+static const struct tty_operations srmcons_ops = {
+	.open		= srmcons_open,
+	.close		= srmcons_close,
+	.write		= srmcons_write,
+	.write_room	= srmcons_write_room,
+};
+
+static int __init
+srmcons_init(void)
+{
+	timer_setup(&srmcons_singleton.timer, srmcons_receive_chars, 0);
+	if (srm_is_registered_console) {
+		struct tty_driver *driver;
+		int err;
+
+		driver = tty_alloc_driver(MAX_SRM_CONSOLE_DEVICES, 0);
+		if (IS_ERR(driver))
+			return PTR_ERR(driver);
+
+		tty_port_init(&srmcons_singleton.port);
+
+		driver->driver_name = "srm";
+		driver->name = "srm";
+		driver->major = 0; 	/* dynamic */
+		driver->minor_start = 0;
+		driver->type = TTY_DRIVER_TYPE_SYSTEM;
+		driver->subtype = SYSTEM_TYPE_SYSCONS;
+		driver->init_termios = tty_std_termios;
+		tty_set_operations(driver, &srmcons_ops);
+		tty_port_link_device(&srmcons_singleton.port, driver, 0);
+		err = tty_register_driver(driver);
+		if (err) {
+			tty_driver_kref_put(driver);
+			tty_port_destroy(&srmcons_singleton.port);
+			return err;
+		}
+		srmcons_driver = driver;
+	}
+
+	return -ENODEV;
+}
+device_initcall(srmcons_init);
+
+
+/*
+ * The console driver
+ */
+static void
+srm_console_write(struct console *co, const char *s, unsigned count)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&srmcons_callback_lock, flags);
+	srmcons_do_write(NULL, s, count);
+	spin_unlock_irqrestore(&srmcons_callback_lock, flags);
+}
+
+static struct tty_driver *
+srm_console_device(struct console *co, int *index)
+{
+	*index = co->index;
+	return srmcons_driver;
+}
+
+static int
+srm_console_setup(struct console *co, char *options)
+{
+	return 0;
+}
+
+static struct console srmcons = {
+	.name		= "srm",
+	.write		= srm_console_write,
+	.device		= srm_console_device,
+	.setup		= srm_console_setup,
+	.flags		= CON_PRINTBUFFER | CON_BOOT,
+	.index		= -1,
+};
+
+void __init
+register_srm_console(void)
+{
+	if (!srm_is_registered_console) {
+		callback_open_console();
+		register_console(&srmcons);
+		srm_is_registered_console = 1;
+	}
+}
+
+void __init
+unregister_srm_console(void)
+{
+	if (srm_is_registered_console) {
+		callback_close_console();
+		unregister_console(&srmcons);
+		srm_is_registered_console = 0;
+	}
+}
diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c
new file mode 100644
index 0000000000..e063b3857b
--- /dev/null
+++ b/arch/alpha/kernel/sys_alcor.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_alcor.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the ALCOR and XLT (XL-300/366/433).
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/reboot.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+#include <asm/mmu_context.h>
+#include <asm/irq.h>
+#include <asm/core_cia.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+/* Note mask bit is true for ENABLED irqs.  */
+static unsigned long cached_irq_mask;
+
+static inline void
+alcor_update_irq_hw(unsigned long mask)
+{
+	*(vuip)GRU_INT_MASK = mask;
+	mb();
+}
+
+static inline void
+alcor_enable_irq(struct irq_data *d)
+{
+	alcor_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
+}
+
+static void
+alcor_disable_irq(struct irq_data *d)
+{
+	alcor_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
+}
+
+static void
+alcor_mask_and_ack_irq(struct irq_data *d)
+{
+	alcor_disable_irq(d);
+
+	/* On ALCOR/XLT, need to dismiss interrupt via GRU. */
+	*(vuip)GRU_INT_CLEAR = 1 << (d->irq - 16); mb();
+	*(vuip)GRU_INT_CLEAR = 0; mb();
+}
+
+static void
+alcor_isa_mask_and_ack_irq(struct irq_data *d)
+{
+	i8259a_mask_and_ack_irq(d);
+
+	/* On ALCOR/XLT, need to dismiss interrupt via GRU. */
+	*(vuip)GRU_INT_CLEAR = 0x80000000; mb();
+	*(vuip)GRU_INT_CLEAR = 0; mb();
+}
+
+static struct irq_chip alcor_irq_type = {
+	.name		= "ALCOR",
+	.irq_unmask	= alcor_enable_irq,
+	.irq_mask	= alcor_disable_irq,
+	.irq_mask_ack	= alcor_mask_and_ack_irq,
+};
+
+static void
+alcor_device_interrupt(unsigned long vector)
+{
+	unsigned long pld;
+	unsigned int i;
+
+	/* Read the interrupt summary register of the GRU */
+	pld = (*(vuip)GRU_INT_REQ) & GRU_INT_REQ_BITS;
+
+	/*
+	 * Now for every possible bit set, work through them and call
+	 * the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1; /* clear least bit set */
+		if (i == 31) {
+			isa_device_interrupt(vector);
+		} else {
+			handle_irq(16 + i);
+		}
+	}
+}
+
+static void __init
+alcor_init_irq(void)
+{
+	long i;
+
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = srm_device_interrupt;
+
+	*(vuip)GRU_INT_MASK  = 0; mb();			/* all disabled */
+	*(vuip)GRU_INT_EDGE  = 0; mb();			/* all are level */
+	*(vuip)GRU_INT_HILO  = 0x80000000U; mb();	/* ISA only HI */
+	*(vuip)GRU_INT_CLEAR = 0; mb();			/* all clear */
+
+	for (i = 16; i < 48; ++i) {
+		/* On Alcor, at least, lines 20..30 are not connected
+		   and can generate spurious interrupts if we turn them
+		   on while IRQ probing.  */
+		if (i >= 16+20 && i <= 16+30)
+			continue;
+		irq_set_chip_and_handler(i, &alcor_irq_type, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+	i8259a_irq_type.irq_ack = alcor_isa_mask_and_ack_irq;
+
+	init_i8259a_irqs();
+	common_init_isa_dma();
+
+	if (request_irq(16 + 31, no_action, 0, "isa-cascade", NULL))
+		pr_err("Failed to register isa-cascade interrupt\n");
+}
+
+
+/*
+ * PCI Fixup configuration.
+ *
+ * Summary @ GRU_INT_REQ:
+ * Bit      Meaning
+ * 0        Interrupt Line A from slot 2
+ * 1        Interrupt Line B from slot 2
+ * 2        Interrupt Line C from slot 2
+ * 3        Interrupt Line D from slot 2
+ * 4        Interrupt Line A from slot 1
+ * 5        Interrupt line B from slot 1
+ * 6        Interrupt Line C from slot 1
+ * 7        Interrupt Line D from slot 1
+ * 8        Interrupt Line A from slot 0
+ * 9        Interrupt Line B from slot 0
+ *10        Interrupt Line C from slot 0
+ *11        Interrupt Line D from slot 0
+ *12        Interrupt Line A from slot 4
+ *13        Interrupt Line B from slot 4
+ *14        Interrupt Line C from slot 4
+ *15        Interrupt Line D from slot 4
+ *16        Interrupt Line D from slot 3
+ *17        Interrupt Line D from slot 3
+ *18        Interrupt Line D from slot 3
+ *19        Interrupt Line D from slot 3
+ *20-30     Reserved
+ *31        EISA interrupt
+ *
+ * The device to slot mapping looks like:
+ *
+ * Slot     Device
+ *  6       built-in TULIP (XLT only)
+ *  7       PCI on board slot 0
+ *  8       PCI on board slot 3
+ *  9       PCI on board slot 4
+ * 10       PCEB (PCI-EISA bridge)
+ * 11       PCI on board slot 2
+ * 12       PCI on board slot 1
+ *   
+ *
+ * This two layered interrupt approach means that we allocate IRQ 16 and 
+ * above for PCI interrupts.  The IRQ relates to which bit the interrupt
+ * comes in on.  This makes interrupt processing much easier.
+ */
+
+static int
+alcor_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[7][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		/* note: IDSEL 17 is XLT only */
+		{16+13, 16+13, 16+13, 16+13, 16+13},	/* IdSel 17,  TULIP  */
+		{ 16+8,  16+8,  16+9, 16+10, 16+11},	/* IdSel 18,  slot 0 */
+		{16+16, 16+16, 16+17, 16+18, 16+19},	/* IdSel 19,  slot 3 */
+		{16+12, 16+12, 16+13, 16+14, 16+15},	/* IdSel 20,  slot 4 */
+		{   -1,    -1,    -1,    -1,    -1},	/* IdSel 21,  PCEB   */
+		{ 16+0,  16+0,  16+1,  16+2,  16+3},	/* IdSel 22,  slot 2 */
+		{ 16+4,  16+4,  16+5,  16+6,  16+7},	/* IdSel 23,  slot 1 */
+	};
+	const long min_idsel = 6, max_idsel = 12, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static void
+alcor_kill_arch(int mode)
+{
+	cia_kill_arch(mode);
+
+#ifndef ALPHA_RESTORE_SRM_SETUP
+	switch(mode) {
+	case LINUX_REBOOT_CMD_RESTART:
+		/* Who said DEC engineer's have no sense of humor? ;-)  */
+		if (alpha_using_srm) {
+			*(vuip) GRU_RESET = 0x0000dead;
+			mb();
+		}
+		break;
+	case LINUX_REBOOT_CMD_HALT:
+		break;
+	case LINUX_REBOOT_CMD_POWER_OFF:
+		break;
+	}
+
+	halt();
+#endif
+}
+
+static void __init
+alcor_init_pci(void)
+{
+	struct pci_dev *dev;
+
+	cia_init_pci();
+
+	/*
+	 * Now we can look to see if we are really running on an XLT-type
+	 * motherboard, by looking for a 21040 TULIP in slot 6, which is
+	 * built into XLT and BRET/MAVERICK, but not available on ALCOR.
+	 */
+	dev = pci_get_device(PCI_VENDOR_ID_DEC,
+			      PCI_DEVICE_ID_DEC_TULIP,
+			      NULL);
+	if (dev && dev->devfn == PCI_DEVFN(6,0)) {
+		alpha_mv.sys.cia.gru_int_req_bits = XLT_GRU_INT_REQ_BITS; 
+		printk(KERN_INFO "%s: Detected AS500 or XLT motherboard.\n",
+		       __func__);
+	}
+	pci_dev_put(dev);
+}
+
+
+/*
+ * The System Vectors
+ */
+
+struct alpha_machine_vector alcor_mv __initmv = {
+	.vector_name		= "Alcor",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_CIA_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_ALCOR_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= EISA_DEFAULT_IO_BASE,
+	.min_mem_address	= CIA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 48,
+	.device_interrupt	= alcor_device_interrupt,
+
+	.init_arch		= cia_init_arch,
+	.init_irq		= alcor_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= alcor_init_pci,
+	.kill_arch		= alcor_kill_arch,
+	.pci_map_irq		= alcor_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .cia = {
+		.gru_int_req_bits = ALCOR_GRU_INT_REQ_BITS
+	}}
+};
+ALIAS_MV(alcor)
+
+struct alpha_machine_vector xlt_mv __initmv = {
+	.vector_name		= "XLT",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_CIA_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= EISA_DEFAULT_IO_BASE,
+	.min_mem_address	= CIA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 48,
+	.device_interrupt	= alcor_device_interrupt,
+
+	.init_arch		= cia_init_arch,
+	.init_irq		= alcor_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= alcor_init_pci,
+	.kill_arch		= alcor_kill_arch,
+	.pci_map_irq		= alcor_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .cia = {
+		.gru_int_req_bits = XLT_GRU_INT_REQ_BITS
+	}}
+};
+
+/* No alpha_mv alias for XLT, since we compile it in unconditionally
+   with ALCOR; setup_arch knows how to cope.  */
diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c
new file mode 100644
index 0000000000..47459b73cd
--- /dev/null
+++ b/arch/alpha/kernel/sys_cabriolet.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_cabriolet.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999, 2000 Richard Henderson
+ *
+ * Code supporting the Cabriolet (AlphaPC64), EB66+, and EB164,
+ * PC164 and LX164.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_apecs.h>
+#include <asm/core_cia.h>
+#include <asm/core_lca.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+#include "pc873xx.h"
+
+/* Note mask bit is true for DISABLED irqs.  */
+static unsigned long cached_irq_mask = ~0UL;
+
+static inline void
+cabriolet_update_irq_hw(unsigned int irq, unsigned long mask)
+{
+	int ofs = (irq - 16) / 8;
+	outb(mask >> (16 + ofs * 8), 0x804 + ofs);
+}
+
+static inline void
+cabriolet_enable_irq(struct irq_data *d)
+{
+	cabriolet_update_irq_hw(d->irq, cached_irq_mask &= ~(1UL << d->irq));
+}
+
+static void
+cabriolet_disable_irq(struct irq_data *d)
+{
+	cabriolet_update_irq_hw(d->irq, cached_irq_mask |= 1UL << d->irq);
+}
+
+static struct irq_chip cabriolet_irq_type = {
+	.name		= "CABRIOLET",
+	.irq_unmask	= cabriolet_enable_irq,
+	.irq_mask	= cabriolet_disable_irq,
+	.irq_mask_ack	= cabriolet_disable_irq,
+};
+
+static void 
+cabriolet_device_interrupt(unsigned long v)
+{
+	unsigned long pld;
+	unsigned int i;
+
+	/* Read the interrupt summary registers */
+	pld = inb(0x804) | (inb(0x805) << 8) | (inb(0x806) << 16);
+
+	/*
+	 * Now for every possible bit set, work through them and call
+	 * the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1;	/* clear least bit set */
+		if (i == 4) {
+			isa_device_interrupt(v);
+		} else {
+			handle_irq(16 + i);
+		}
+	}
+}
+
+static void __init
+common_init_irq(void (*srm_dev_int)(unsigned long v))
+{
+	init_i8259a_irqs();
+
+	if (alpha_using_srm) {
+		alpha_mv.device_interrupt = srm_dev_int;
+		init_srm_irqs(35, 0);
+	}
+	else {
+		long i;
+
+		outb(0xff, 0x804);
+		outb(0xff, 0x805);
+		outb(0xff, 0x806);
+
+		for (i = 16; i < 35; ++i) {
+			irq_set_chip_and_handler(i, &cabriolet_irq_type,
+						 handle_level_irq);
+			irq_set_status_flags(i, IRQ_LEVEL);
+		}
+	}
+
+	common_init_isa_dma();
+	if (request_irq(16 + 4, no_action, 0, "isa-cascade", NULL))
+		pr_err("Failed to register isa-cascade interrupt\n");
+}
+
+#ifndef CONFIG_ALPHA_PC164
+static void __init
+cabriolet_init_irq(void)
+{
+	common_init_irq(srm_device_interrupt);
+}
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PC164)
+/* In theory, the PC164 has the same interrupt hardware as the other
+   Cabriolet based systems.  However, something got screwed up late
+   in the development cycle which broke the interrupt masking hardware.
+   Repeat, it is not possible to mask and ack interrupts.  At all.
+
+   In an attempt to work around this, while processing interrupts,
+   we do not allow the IPL to drop below what it is currently.  This
+   prevents the possibility of recursion.  
+
+   ??? Another option might be to force all PCI devices to use edge
+   triggered rather than level triggered interrupts.  That might be
+   too invasive though.  */
+
+static void
+pc164_srm_device_interrupt(unsigned long v)
+{
+	__min_ipl = getipl();
+	srm_device_interrupt(v);
+	__min_ipl = 0;
+}
+
+static void
+pc164_device_interrupt(unsigned long v)
+{
+	__min_ipl = getipl();
+	cabriolet_device_interrupt(v);
+	__min_ipl = 0;
+}
+
+static void __init
+pc164_init_irq(void)
+{
+	common_init_irq(pc164_srm_device_interrupt);
+}
+#endif
+
+/*
+ * The EB66+ is very similar to the EB66 except that it does not have
+ * the on-board NCR and Tulip chips.  In the code below, I have used
+ * slot number to refer to the id select line and *not* the slot
+ * number used in the EB66+ documentation.  However, in the table,
+ * I've given the slot number, the id select line and the Jxx number
+ * that's printed on the board.  The interrupt pins from the PCI slots
+ * are wired into 3 interrupt summary registers at 0x804, 0x805 and
+ * 0x806 ISA.
+ *
+ * In the table, -1 means don't assign an IRQ number.  This is usually
+ * because it is the Saturn IO (SIO) PCI/ISA Bridge Chip.
+ */
+
+static inline int
+eb66p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[5][5] = {
+		/*INT  INTA  INTB  INTC   INTD */
+		{16+0, 16+0, 16+5,  16+9, 16+13},  /* IdSel 6,  slot 0, J25 */
+		{16+1, 16+1, 16+6, 16+10, 16+14},  /* IdSel 7,  slot 1, J26 */
+		{  -1,   -1,   -1,    -1,    -1},  /* IdSel 8,  SIO         */
+		{16+2, 16+2, 16+7, 16+11, 16+15},  /* IdSel 9,  slot 2, J27 */
+		{16+3, 16+3, 16+8, 16+12,  16+6}   /* IdSel 10, slot 3, J28 */
+	};
+	const long min_idsel = 6, max_idsel = 10, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+
+/*
+ * The AlphaPC64 is very similar to the EB66+ except that its slots
+ * are numbered differently.  In the code below, I have used slot
+ * number to refer to the id select line and *not* the slot number
+ * used in the AlphaPC64 documentation.  However, in the table, I've
+ * given the slot number, the id select line and the Jxx number that's
+ * printed on the board.  The interrupt pins from the PCI slots are
+ * wired into 3 interrupt summary registers at 0x804, 0x805 and 0x806
+ * ISA.
+ *
+ * In the table, -1 means don't assign an IRQ number.  This is usually
+ * because it is the Saturn IO (SIO) PCI/ISA Bridge Chip.
+ */
+
+static inline int
+cabriolet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[5][5] = {
+		/*INT   INTA  INTB  INTC   INTD */
+		{ 16+2, 16+2, 16+7, 16+11, 16+15}, /* IdSel 5,  slot 2, J21 */
+		{ 16+0, 16+0, 16+5,  16+9, 16+13}, /* IdSel 6,  slot 0, J19 */
+		{ 16+1, 16+1, 16+6, 16+10, 16+14}, /* IdSel 7,  slot 1, J20 */
+		{   -1,   -1,   -1,    -1,    -1}, /* IdSel 8,  SIO         */
+		{ 16+3, 16+3, 16+8, 16+12, 16+16}  /* IdSel 9,  slot 3, J22 */
+	};
+	const long min_idsel = 5, max_idsel = 9, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static inline void __init
+cabriolet_enable_ide(void)
+{
+	if (pc873xx_probe() == -1) {
+		printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n");
+	 } else {
+		printk(KERN_INFO "Found %s Super IO chip at 0x%x\n",
+			pc873xx_get_model(), pc873xx_get_base());
+
+		pc873xx_enable_ide();
+	}
+}
+
+static inline void __init
+cabriolet_init_pci(void)
+{
+	common_init_pci();
+	cabriolet_enable_ide();
+}
+
+static inline void __init
+cia_cab_init_pci(void)
+{
+	cia_init_pci();
+	cabriolet_enable_ide();
+}
+
+/*
+ * The PC164 and LX164 have 19 PCI interrupts, four from each of the four
+ * PCI slots, the SIO, PCI/IDE, and USB.
+ * 
+ * Each of the interrupts can be individually masked. This is
+ * accomplished by setting the appropriate bit in the mask register.
+ * A bit is set by writing a "1" to the desired position in the mask
+ * register and cleared by writing a "0". There are 3 mask registers
+ * located at ISA address 804h, 805h and 806h.
+ * 
+ * An I/O read at ISA address 804h, 805h, 806h will return the
+ * state of the 11 PCI interrupts and not the state of the MASKED
+ * interrupts.
+ * 
+ * Note: A write to I/O 804h, 805h, and 806h the mask register will be
+ * updated.
+ * 
+ * 
+ * 				ISA DATA<7:0>
+ * ISA     +--------------------------------------------------------------+
+ * ADDRESS |   7   |   6   |   5   |   4   |   3   |   2  |   1   |   0   |
+ *         +==============================================================+
+ * 0x804   | INTB0 |  USB  |  IDE  |  SIO  | INTA3 |INTA2 | INTA1 | INTA0 |
+ *         +--------------------------------------------------------------+
+ * 0x805   | INTD0 | INTC3 | INTC2 | INTC1 | INTC0 |INTB3 | INTB2 | INTB1 |
+ *         +--------------------------------------------------------------+
+ * 0x806   | Rsrv  | Rsrv  | Rsrv  | Rsrv  | Rsrv  |INTD3 | INTD2 | INTD1 |
+ *         +--------------------------------------------------------------+
+ *         * Rsrv = reserved bits
+ *         Note: The mask register is write-only.
+ * 
+ * IdSel	
+ *   5	 32 bit PCI option slot 2
+ *   6	 64 bit PCI option slot 0
+ *   7	 64 bit PCI option slot 1
+ *   8	 Saturn I/O
+ *   9	 32 bit PCI option slot 3
+ *  10	 USB
+ *  11	 IDE
+ * 
+ */
+
+static inline int
+alphapc164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[7][5] = {
+		/*INT   INTA  INTB   INTC   INTD */
+		{ 16+2, 16+2, 16+9,  16+13, 16+17}, /* IdSel  5, slot 2, J20 */
+		{ 16+0, 16+0, 16+7,  16+11, 16+15}, /* IdSel  6, slot 0, J29 */
+		{ 16+1, 16+1, 16+8,  16+12, 16+16}, /* IdSel  7, slot 1, J26 */
+		{   -1,   -1,   -1,    -1,    -1},  /* IdSel  8, SIO */
+		{ 16+3, 16+3, 16+10, 16+14, 16+18}, /* IdSel  9, slot 3, J19 */
+		{ 16+6, 16+6, 16+6,  16+6,  16+6},  /* IdSel 10, USB */
+		{ 16+5, 16+5, 16+5,  16+5,  16+5}   /* IdSel 11, IDE */
+	};
+	const long min_idsel = 5, max_idsel = 11, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static inline void __init
+alphapc164_init_pci(void)
+{
+	cia_init_pci();
+	SMC93x_Init();
+}
+
+
+/*
+ * The System Vector
+ */
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_CABRIOLET)
+struct alpha_machine_vector cabriolet_mv __initmv = {
+	.vector_name		= "Cabriolet",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_APECS_IO,
+	.machine_check		= apecs_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 35,
+	.device_interrupt	= cabriolet_device_interrupt,
+
+	.init_arch		= apecs_init_arch,
+	.init_irq		= cabriolet_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= cabriolet_init_pci,
+	.pci_map_irq		= cabriolet_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+#ifndef CONFIG_ALPHA_EB64P
+ALIAS_MV(cabriolet)
+#endif
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EB164)
+struct alpha_machine_vector eb164_mv __initmv = {
+	.vector_name		= "EB164",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_CIA_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= CIA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 35,
+	.device_interrupt	= cabriolet_device_interrupt,
+
+	.init_arch		= cia_init_arch,
+	.init_irq		= cabriolet_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= cia_cab_init_pci,
+	.kill_arch		= cia_kill_arch,
+	.pci_map_irq		= cabriolet_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(eb164)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EB66P)
+struct alpha_machine_vector eb66p_mv __initmv = {
+	.vector_name		= "EB66+",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_LCA_IO,
+	.machine_check		= lca_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 35,
+	.device_interrupt	= cabriolet_device_interrupt,
+
+	.init_arch		= lca_init_arch,
+	.init_irq		= cabriolet_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= cabriolet_init_pci,
+	.pci_map_irq		= eb66p_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(eb66p)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_LX164)
+struct alpha_machine_vector lx164_mv __initmv = {
+	.vector_name		= "LX164",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_PYXIS_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= PYXIS_DAC_OFFSET,
+
+	.nr_irqs		= 35,
+	.device_interrupt	= cabriolet_device_interrupt,
+
+	.init_arch		= pyxis_init_arch,
+	.init_irq		= cabriolet_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= alphapc164_init_pci,
+	.kill_arch		= cia_kill_arch,
+	.pci_map_irq		= alphapc164_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(lx164)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PC164)
+struct alpha_machine_vector pc164_mv __initmv = {
+	.vector_name		= "PC164",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_CIA_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= CIA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 35,
+	.device_interrupt	= pc164_device_interrupt,
+
+	.init_arch		= cia_init_arch,
+	.init_irq		= pc164_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= alphapc164_init_pci,
+	.kill_arch		= cia_kill_arch,
+	.pci_map_irq		= alphapc164_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(pc164)
+#endif
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
new file mode 100644
index 0000000000..9fb445d7dc
--- /dev/null
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -0,0 +1,665 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_dp264.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996, 1999 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ *	Modified by Christopher C. Chimelis, 2001 to
+ *	add support for the addition of Shark to the
+ *	Tsunami family.
+ *
+ * Code supporting the DP264 (EV6+TSUNAMI).
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_tsunami.h>
+#include <asm/hwrpb.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+/* Note mask bit is true for ENABLED irqs.  */
+static unsigned long cached_irq_mask;
+/* dp264 boards handle at max four CPUs */
+static unsigned long cpu_irq_affinity[4] = { 0UL, 0UL, 0UL, 0UL };
+
+DEFINE_SPINLOCK(dp264_irq_lock);
+
+static void
+tsunami_update_irq_hw(unsigned long mask)
+{
+	register tsunami_cchip *cchip = TSUNAMI_cchip;
+	unsigned long isa_enable = 1UL << 55;
+	register int bcpu = boot_cpuid;
+
+#ifdef CONFIG_SMP
+	volatile unsigned long *dim0, *dim1, *dim2, *dim3;
+	unsigned long mask0, mask1, mask2, mask3, dummy;
+
+	mask &= ~isa_enable;
+	mask0 = mask & cpu_irq_affinity[0];
+	mask1 = mask & cpu_irq_affinity[1];
+	mask2 = mask & cpu_irq_affinity[2];
+	mask3 = mask & cpu_irq_affinity[3];
+
+	if (bcpu == 0) mask0 |= isa_enable;
+	else if (bcpu == 1) mask1 |= isa_enable;
+	else if (bcpu == 2) mask2 |= isa_enable;
+	else mask3 |= isa_enable;
+
+	dim0 = &cchip->dim0.csr;
+	dim1 = &cchip->dim1.csr;
+	dim2 = &cchip->dim2.csr;
+	dim3 = &cchip->dim3.csr;
+	if (!cpu_possible(0)) dim0 = &dummy;
+	if (!cpu_possible(1)) dim1 = &dummy;
+	if (!cpu_possible(2)) dim2 = &dummy;
+	if (!cpu_possible(3)) dim3 = &dummy;
+
+	*dim0 = mask0;
+	*dim1 = mask1;
+	*dim2 = mask2;
+	*dim3 = mask3;
+	mb();
+	*dim0;
+	*dim1;
+	*dim2;
+	*dim3;
+#else
+	volatile unsigned long *dimB;
+	if (bcpu == 0) dimB = &cchip->dim0.csr;
+	else if (bcpu == 1) dimB = &cchip->dim1.csr;
+	else if (bcpu == 2) dimB = &cchip->dim2.csr;
+	else dimB = &cchip->dim3.csr;
+
+	*dimB = mask | isa_enable;
+	mb();
+	*dimB;
+#endif
+}
+
+static void
+dp264_enable_irq(struct irq_data *d)
+{
+	spin_lock(&dp264_irq_lock);
+	cached_irq_mask |= 1UL << d->irq;
+	tsunami_update_irq_hw(cached_irq_mask);
+	spin_unlock(&dp264_irq_lock);
+}
+
+static void
+dp264_disable_irq(struct irq_data *d)
+{
+	spin_lock(&dp264_irq_lock);
+	cached_irq_mask &= ~(1UL << d->irq);
+	tsunami_update_irq_hw(cached_irq_mask);
+	spin_unlock(&dp264_irq_lock);
+}
+
+static void
+clipper_enable_irq(struct irq_data *d)
+{
+	spin_lock(&dp264_irq_lock);
+	cached_irq_mask |= 1UL << (d->irq - 16);
+	tsunami_update_irq_hw(cached_irq_mask);
+	spin_unlock(&dp264_irq_lock);
+}
+
+static void
+clipper_disable_irq(struct irq_data *d)
+{
+	spin_lock(&dp264_irq_lock);
+	cached_irq_mask &= ~(1UL << (d->irq - 16));
+	tsunami_update_irq_hw(cached_irq_mask);
+	spin_unlock(&dp264_irq_lock);
+}
+
+static void
+cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < 4; cpu++) {
+		unsigned long aff = cpu_irq_affinity[cpu];
+		if (cpumask_test_cpu(cpu, &affinity))
+			aff |= 1UL << irq;
+		else
+			aff &= ~(1UL << irq);
+		cpu_irq_affinity[cpu] = aff;
+	}
+}
+
+static int
+dp264_set_affinity(struct irq_data *d, const struct cpumask *affinity,
+		   bool force)
+{
+	spin_lock(&dp264_irq_lock);
+	cpu_set_irq_affinity(d->irq, *affinity);
+	tsunami_update_irq_hw(cached_irq_mask);
+	spin_unlock(&dp264_irq_lock);
+
+	return 0;
+}
+
+static int
+clipper_set_affinity(struct irq_data *d, const struct cpumask *affinity,
+		     bool force)
+{
+	spin_lock(&dp264_irq_lock);
+	cpu_set_irq_affinity(d->irq - 16, *affinity);
+	tsunami_update_irq_hw(cached_irq_mask);
+	spin_unlock(&dp264_irq_lock);
+
+	return 0;
+}
+
+static struct irq_chip dp264_irq_type = {
+	.name			= "DP264",
+	.irq_unmask		= dp264_enable_irq,
+	.irq_mask		= dp264_disable_irq,
+	.irq_mask_ack		= dp264_disable_irq,
+	.irq_set_affinity	= dp264_set_affinity,
+};
+
+static struct irq_chip clipper_irq_type = {
+	.name			= "CLIPPER",
+	.irq_unmask		= clipper_enable_irq,
+	.irq_mask		= clipper_disable_irq,
+	.irq_mask_ack		= clipper_disable_irq,
+	.irq_set_affinity	= clipper_set_affinity,
+};
+
+static void
+dp264_device_interrupt(unsigned long vector)
+{
+	unsigned long pld;
+	unsigned int i;
+
+	/* Read the interrupt summary register of TSUNAMI */
+	pld = TSUNAMI_cchip->dir0.csr;
+
+	/*
+	 * Now for every possible bit set, work through them and call
+	 * the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1; /* clear least bit set */
+		if (i == 55)
+			isa_device_interrupt(vector);
+		else
+			handle_irq(16 + i);
+	}
+}
+
+static void 
+dp264_srm_device_interrupt(unsigned long vector)
+{
+	int irq;
+
+	irq = (vector - 0x800) >> 4;
+
+	/*
+	 * The SRM console reports PCI interrupts with a vector calculated by:
+	 *
+	 *	0x900 + (0x10 * DRIR-bit)
+	 *
+	 * So bit 16 shows up as IRQ 32, etc.
+	 * 
+	 * On DP264/BRICK/MONET, we adjust it down by 16 because at least
+	 * that many of the low order bits of the DRIR are not used, and
+	 * so we don't count them.
+	 */
+	if (irq >= 32)
+		irq -= 16;
+
+	handle_irq(irq);
+}
+
+static void 
+clipper_srm_device_interrupt(unsigned long vector)
+{
+	int irq;
+
+	irq = (vector - 0x800) >> 4;
+
+/*
+	 * The SRM console reports PCI interrupts with a vector calculated by:
+	 *
+	 *	0x900 + (0x10 * DRIR-bit)
+	 *
+	 * So bit 16 shows up as IRQ 32, etc.
+	 * 
+	 * CLIPPER uses bits 8-47 for PCI interrupts, so we do not need
+	 * to scale down the vector reported, we just use it.
+	 *
+	 * Eg IRQ 24 is DRIR bit 8, etc, etc
+	 */
+	handle_irq(irq);
+}
+
+static void __init
+init_tsunami_irqs(struct irq_chip * ops, int imin, int imax)
+{
+	long i;
+	for (i = imin; i <= imax; ++i) {
+		irq_set_chip_and_handler(i, ops, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+}
+
+static void __init
+dp264_init_irq(void)
+{
+	outb(0, DMA1_RESET_REG);
+	outb(0, DMA2_RESET_REG);
+	outb(DMA_MODE_CASCADE, DMA2_MODE_REG);
+	outb(0, DMA2_MASK_REG);
+
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = dp264_srm_device_interrupt;
+
+	tsunami_update_irq_hw(0);
+
+	init_i8259a_irqs();
+	init_tsunami_irqs(&dp264_irq_type, 16, 47);
+}
+
+static void __init
+clipper_init_irq(void)
+{
+	outb(0, DMA1_RESET_REG);
+	outb(0, DMA2_RESET_REG);
+	outb(DMA_MODE_CASCADE, DMA2_MODE_REG);
+	outb(0, DMA2_MASK_REG);
+
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = clipper_srm_device_interrupt;
+
+	tsunami_update_irq_hw(0);
+
+	init_i8259a_irqs();
+	init_tsunami_irqs(&clipper_irq_type, 24, 63);
+}
+
+
+/*
+ * PCI Fixup configuration.
+ *
+ * Summary @ TSUNAMI_CSR_DIM0:
+ * Bit      Meaning
+ * 0-17     Unused
+ *18        Interrupt SCSI B (Adaptec 7895 builtin)
+ *19        Interrupt SCSI A (Adaptec 7895 builtin)
+ *20        Interrupt Line D from slot 2 PCI0
+ *21        Interrupt Line C from slot 2 PCI0
+ *22        Interrupt Line B from slot 2 PCI0
+ *23        Interrupt Line A from slot 2 PCI0
+ *24        Interrupt Line D from slot 1 PCI0
+ *25        Interrupt Line C from slot 1 PCI0
+ *26        Interrupt Line B from slot 1 PCI0
+ *27        Interrupt Line A from slot 1 PCI0
+ *28        Interrupt Line D from slot 0 PCI0
+ *29        Interrupt Line C from slot 0 PCI0
+ *30        Interrupt Line B from slot 0 PCI0
+ *31        Interrupt Line A from slot 0 PCI0
+ *
+ *32        Interrupt Line D from slot 3 PCI1
+ *33        Interrupt Line C from slot 3 PCI1
+ *34        Interrupt Line B from slot 3 PCI1
+ *35        Interrupt Line A from slot 3 PCI1
+ *36        Interrupt Line D from slot 2 PCI1
+ *37        Interrupt Line C from slot 2 PCI1
+ *38        Interrupt Line B from slot 2 PCI1
+ *39        Interrupt Line A from slot 2 PCI1
+ *40        Interrupt Line D from slot 1 PCI1
+ *41        Interrupt Line C from slot 1 PCI1
+ *42        Interrupt Line B from slot 1 PCI1
+ *43        Interrupt Line A from slot 1 PCI1
+ *44        Interrupt Line D from slot 0 PCI1
+ *45        Interrupt Line C from slot 0 PCI1
+ *46        Interrupt Line B from slot 0 PCI1
+ *47        Interrupt Line A from slot 0 PCI1
+ *48-52     Unused
+ *53        PCI0 NMI (from Cypress)
+ *54        PCI0 SMI INT (from Cypress)
+ *55        PCI0 ISA Interrupt (from Cypress)
+ *56-60     Unused
+ *61        PCI1 Bus Error
+ *62        PCI0 Bus Error
+ *63        Reserved
+ *
+ * IdSel	
+ *   5	 Cypress Bridge I/O
+ *   6	 SCSI Adaptec builtin
+ *   7	 64 bit PCI option slot 0 (all busses)
+ *   8	 64 bit PCI option slot 1 (all busses)
+ *   9	 64 bit PCI option slot 2 (all busses)
+ *  10	 64 bit PCI option slot 3 (not bus 0)
+ */
+
+static int
+isa_irq_fixup(const struct pci_dev *dev, int irq)
+{
+	u8 irq8;
+
+	if (irq > 0)
+		return irq;
+
+	/* This interrupt is routed via ISA bridge, so we'll
+	   just have to trust whatever value the console might
+	   have assigned.  */
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq8);
+
+	return irq8 & 0xf;
+}
+
+static int
+dp264_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[6][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 5 ISA Bridge */
+		{ 16+ 3, 16+ 3, 16+ 2, 16+ 2, 16+ 2}, /* IdSel 6 SCSI builtin*/
+		{ 16+15, 16+15, 16+14, 16+13, 16+12}, /* IdSel 7 slot 0 */
+		{ 16+11, 16+11, 16+10, 16+ 9, 16+ 8}, /* IdSel 8 slot 1 */
+		{ 16+ 7, 16+ 7, 16+ 6, 16+ 5, 16+ 4}, /* IdSel 9 slot 2 */
+		{ 16+ 3, 16+ 3, 16+ 2, 16+ 1, 16+ 0}  /* IdSel 10 slot 3 */
+	};
+	const long min_idsel = 5, max_idsel = 10, irqs_per_slot = 5;
+	struct pci_controller *hose = dev->sysdata;
+	int irq = COMMON_TABLE_LOOKUP;
+
+	if (irq > 0)
+		irq += 16 * hose->index;
+
+	return isa_irq_fixup(dev, irq);
+}
+
+static int
+monet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[13][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{    45,    45,    45,    45,    45}, /* IdSel 3 21143 PCI1 */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 4 unused */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 5 unused */
+		{    47,    47,    47,    47,    47}, /* IdSel 6 SCSI PCI1 */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 7 ISA Bridge */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 8 P2P PCI1 */
+#if 1
+		{    28,    28,    29,    30,    31}, /* IdSel 14 slot 4 PCI2*/
+		{    24,    24,    25,    26,    27}, /* IdSel 15 slot 5 PCI2*/
+#else
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 9 unused */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 10 unused */
+#endif
+		{    40,    40,    41,    42,    43}, /* IdSel 11 slot 1 PCI0*/
+		{    36,    36,    37,    38,    39}, /* IdSel 12 slot 2 PCI0*/
+		{    32,    32,    33,    34,    35}, /* IdSel 13 slot 3 PCI0*/
+		{    28,    28,    29,    30,    31}, /* IdSel 14 slot 4 PCI2*/
+		{    24,    24,    25,    26,    27}  /* IdSel 15 slot 5 PCI2*/
+	};
+	const long min_idsel = 3, max_idsel = 15, irqs_per_slot = 5;
+
+	return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP);
+}
+
+static u8
+monet_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	struct pci_controller *hose = dev->sysdata;
+	int slot, pin = *pinp;
+
+	if (!dev->bus->parent) {
+		slot = PCI_SLOT(dev->devfn);
+	}
+	/* Check for the built-in bridge on hose 1. */
+	else if (hose->index == 1 && PCI_SLOT(dev->bus->self->devfn) == 8) {
+		slot = PCI_SLOT(dev->devfn);
+	} else {
+		/* Must be a card-based bridge.  */
+		do {
+			/* Check for built-in bridge on hose 1. */
+			if (hose->index == 1 &&
+			    PCI_SLOT(dev->bus->self->devfn) == 8) {
+				slot = PCI_SLOT(dev->devfn);
+				break;
+			}
+			pin = pci_swizzle_interrupt_pin(dev, pin);
+
+			/* Move up the chain of bridges.  */
+			dev = dev->bus->self;
+			/* Slot of the next bridge.  */
+			slot = PCI_SLOT(dev->devfn);
+		} while (dev->bus->self);
+	}
+	*pinp = pin;
+	return slot;
+}
+
+static int
+webbrick_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[13][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 7 ISA Bridge */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 8 unused */
+		{    29,    29,    29,    29,    29}, /* IdSel 9 21143 #1 */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 10 unused */
+		{    30,    30,    30,    30,    30}, /* IdSel 11 21143 #2 */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 12 unused */
+		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 13 unused */
+		{    35,    35,    34,    33,    32}, /* IdSel 14 slot 0 */
+		{    39,    39,    38,    37,    36}, /* IdSel 15 slot 1 */
+		{    43,    43,    42,    41,    40}, /* IdSel 16 slot 2 */
+		{    47,    47,    46,    45,    44}, /* IdSel 17 slot 3 */
+	};
+	const long min_idsel = 7, max_idsel = 17, irqs_per_slot = 5;
+
+	return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP);
+}
+
+static int
+clipper_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[7][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{ 16+ 8, 16+ 8, 16+ 9, 16+10, 16+11}, /* IdSel 1 slot 1 */
+		{ 16+12, 16+12, 16+13, 16+14, 16+15}, /* IdSel 2 slot 2 */
+		{ 16+16, 16+16, 16+17, 16+18, 16+19}, /* IdSel 3 slot 3 */
+		{ 16+20, 16+20, 16+21, 16+22, 16+23}, /* IdSel 4 slot 4 */
+		{ 16+24, 16+24, 16+25, 16+26, 16+27}, /* IdSel 5 slot 5 */
+		{ 16+28, 16+28, 16+29, 16+30, 16+31}, /* IdSel 6 slot 6 */
+		{    -1,    -1,    -1,    -1,    -1}  /* IdSel 7 ISA Bridge */
+	};
+	const long min_idsel = 1, max_idsel = 7, irqs_per_slot = 5;
+	struct pci_controller *hose = dev->sysdata;
+	int irq = COMMON_TABLE_LOOKUP;
+
+	if (irq > 0)
+		irq += 16 * hose->index;
+
+	return isa_irq_fixup(dev, irq);
+}
+
+static void __init
+dp264_init_pci(void)
+{
+	common_init_pci();
+	SMC669_Init(0);
+	locate_and_init_vga(NULL);
+}
+
+static void __init
+monet_init_pci(void)
+{
+	common_init_pci();
+	SMC669_Init(1);
+	es1888_init();
+	locate_and_init_vga(NULL);
+}
+
+static void __init
+clipper_init_pci(void)
+{
+	common_init_pci();
+	locate_and_init_vga(NULL);
+}
+
+static void __init
+webbrick_init_arch(void)
+{
+	tsunami_init_arch();
+
+	/* Tsunami caches 4 PTEs at a time; DS10 has only 1 hose. */
+	hose_head->sg_isa->align_entry = 4;
+	hose_head->sg_pci->align_entry = 4;
+}
+
+
+/*
+ * The System Vectors
+ */
+
+struct alpha_machine_vector dp264_mv __initmv = {
+	.vector_name		= "DP264",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TSUNAMI_IO,
+	.machine_check		= tsunami_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= TSUNAMI_DAC_OFFSET,
+
+	.nr_irqs		= 64,
+	.device_interrupt	= dp264_device_interrupt,
+
+	.init_arch		= tsunami_init_arch,
+	.init_irq		= dp264_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= dp264_init_pci,
+	.kill_arch		= tsunami_kill_arch,
+	.pci_map_irq		= dp264_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(dp264)
+
+struct alpha_machine_vector monet_mv __initmv = {
+	.vector_name		= "Monet",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TSUNAMI_IO,
+	.machine_check		= tsunami_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= TSUNAMI_DAC_OFFSET,
+
+	.nr_irqs		= 64,
+	.device_interrupt	= dp264_device_interrupt,
+
+	.init_arch		= tsunami_init_arch,
+	.init_irq		= dp264_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= monet_init_pci,
+	.kill_arch		= tsunami_kill_arch,
+	.pci_map_irq		= monet_map_irq,
+	.pci_swizzle		= monet_swizzle,
+};
+
+struct alpha_machine_vector webbrick_mv __initmv = {
+	.vector_name		= "Webbrick",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TSUNAMI_IO,
+	.machine_check		= tsunami_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= TSUNAMI_DAC_OFFSET,
+
+	.nr_irqs		= 64,
+	.device_interrupt	= dp264_device_interrupt,
+
+	.init_arch		= webbrick_init_arch,
+	.init_irq		= dp264_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.kill_arch		= tsunami_kill_arch,
+	.pci_map_irq		= webbrick_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+
+struct alpha_machine_vector clipper_mv __initmv = {
+	.vector_name		= "Clipper",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TSUNAMI_IO,
+	.machine_check		= tsunami_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= TSUNAMI_DAC_OFFSET,
+
+	.nr_irqs		= 64,
+	.device_interrupt	= dp264_device_interrupt,
+
+	.init_arch		= tsunami_init_arch,
+	.init_irq		= clipper_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= clipper_init_pci,
+	.kill_arch		= tsunami_kill_arch,
+	.pci_map_irq		= clipper_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+
+/* Sharks strongly resemble Clipper, at least as far
+ * as interrupt routing, etc, so we're using the
+ * same functions as Clipper does
+ */
+
+struct alpha_machine_vector shark_mv __initmv = {
+	.vector_name		= "Shark",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TSUNAMI_IO,
+	.machine_check		= tsunami_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= TSUNAMI_DAC_OFFSET,
+
+	.nr_irqs		= 64,
+	.device_interrupt	= dp264_device_interrupt,
+
+	.init_arch		= tsunami_init_arch,
+	.init_irq		= clipper_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.kill_arch		= tsunami_kill_arch,
+	.pci_map_irq		= clipper_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+
+/* No alpha_mv alias for webbrick/monet/clipper, since we compile them
+   in unconditionally with DP264; setup_arch knows how to cope.  */
diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c
new file mode 100644
index 0000000000..3c43fd3475
--- /dev/null
+++ b/arch/alpha/kernel/sys_eb64p.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_eb64p.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the EB64+ and EB66.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_apecs.h>
+#include <asm/core_lca.h>
+#include <asm/hwrpb.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+/* Note mask bit is true for DISABLED irqs.  */
+static unsigned int cached_irq_mask = -1;
+
+static inline void
+eb64p_update_irq_hw(unsigned int irq, unsigned long mask)
+{
+	outb(mask >> (irq >= 24 ? 24 : 16), (irq >= 24 ? 0x27 : 0x26));
+}
+
+static inline void
+eb64p_enable_irq(struct irq_data *d)
+{
+	eb64p_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq));
+}
+
+static void
+eb64p_disable_irq(struct irq_data *d)
+{
+	eb64p_update_irq_hw(d->irq, cached_irq_mask |= 1 << d->irq);
+}
+
+static struct irq_chip eb64p_irq_type = {
+	.name		= "EB64P",
+	.irq_unmask	= eb64p_enable_irq,
+	.irq_mask	= eb64p_disable_irq,
+	.irq_mask_ack	= eb64p_disable_irq,
+};
+
+static void 
+eb64p_device_interrupt(unsigned long vector)
+{
+	unsigned long pld;
+	unsigned int i;
+
+	/* Read the interrupt summary registers */
+	pld = inb(0x26) | (inb(0x27) << 8);
+
+	/*
+	 * Now, for every possible bit set, work through
+	 * them and call the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1;	/* clear least bit set */
+
+		if (i == 5) {
+			isa_device_interrupt(vector);
+		} else {
+			handle_irq(16 + i);
+		}
+	}
+}
+
+static void __init
+eb64p_init_irq(void)
+{
+	long i;
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_CABRIOLET)
+	/*
+	 * CABRIO SRM may not set variation correctly, so here we test
+	 * the high word of the interrupt summary register for the RAZ
+	 * bits, and hope that a true EB64+ would read all ones...
+	 */
+	if (inw(0x806) != 0xffff) {
+		extern struct alpha_machine_vector cabriolet_mv;
+
+		printk("Detected Cabriolet: correcting HWRPB.\n");
+
+		hwrpb->sys_variation |= 2L << 10;
+		hwrpb_update_checksum(hwrpb);
+
+		alpha_mv = cabriolet_mv;
+		alpha_mv.init_irq();
+		return;
+	}
+#endif /* GENERIC */
+
+	outb(0xff, 0x26);
+	outb(0xff, 0x27);
+
+	init_i8259a_irqs();
+
+	for (i = 16; i < 32; ++i) {
+		irq_set_chip_and_handler(i, &eb64p_irq_type, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	common_init_isa_dma();
+	if (request_irq(16 + 5, no_action, 0, "isa-cascade", NULL))
+		pr_err("Failed to register isa-cascade interrupt\n");
+}
+
+/*
+ * PCI Fixup configuration.
+ *
+ * There are two 8 bit external summary registers as follows:
+ *
+ * Summary @ 0x26:
+ * Bit      Meaning
+ * 0        Interrupt Line A from slot 0
+ * 1        Interrupt Line A from slot 1
+ * 2        Interrupt Line B from slot 0
+ * 3        Interrupt Line B from slot 1
+ * 4        Interrupt Line C from slot 0
+ * 5        Interrupt line from the two ISA PICs
+ * 6        Tulip
+ * 7        NCR SCSI
+ *
+ * Summary @ 0x27
+ * Bit      Meaning
+ * 0        Interrupt Line C from slot 1
+ * 1        Interrupt Line D from slot 0
+ * 2        Interrupt Line D from slot 1
+ * 3        RAZ
+ * 4        RAZ
+ * 5        RAZ
+ * 6        RAZ
+ * 7        RAZ
+ *
+ * The device to slot mapping looks like:
+ *
+ * Slot     Device
+ *  5       NCR SCSI controller
+ *  6       PCI on board slot 0
+ *  7       PCI on board slot 1
+ *  8       Intel SIO PCI-ISA bridge chip
+ *  9       Tulip - DECchip 21040 Ethernet controller
+ *   
+ *
+ * This two layered interrupt approach means that we allocate IRQ 16 and 
+ * above for PCI interrupts.  The IRQ relates to which bit the interrupt
+ * comes in on.  This makes interrupt processing much easier.
+ */
+
+static int
+eb64p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[5][5] = {
+		/*INT  INTA  INTB  INTC   INTD */
+		{16+7, 16+7, 16+7, 16+7,  16+7},  /* IdSel 5,  slot ?, ?? */
+		{16+0, 16+0, 16+2, 16+4,  16+9},  /* IdSel 6,  slot ?, ?? */
+		{16+1, 16+1, 16+3, 16+8, 16+10},  /* IdSel 7,  slot ?, ?? */
+		{  -1,   -1,   -1,   -1,    -1},  /* IdSel 8,  SIO */
+		{16+6, 16+6, 16+6, 16+6,  16+6},  /* IdSel 9,  TULIP */
+	};
+	const long min_idsel = 5, max_idsel = 9, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+
+/*
+ * The System Vector
+ */
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EB64P)
+struct alpha_machine_vector eb64p_mv __initmv = {
+	.vector_name		= "EB64+",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_APECS_IO,
+	.machine_check		= apecs_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 32,
+	.device_interrupt	= eb64p_device_interrupt,
+
+	.init_arch		= apecs_init_arch,
+	.init_irq		= eb64p_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.kill_arch		= NULL,
+	.pci_map_irq		= eb64p_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(eb64p)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EB66)
+struct alpha_machine_vector eb66_mv __initmv = {
+	.vector_name		= "EB66",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_LCA_IO,
+	.machine_check		= lca_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 32,
+	.device_interrupt	= eb64p_device_interrupt,
+
+	.init_arch		= lca_init_arch,
+	.init_irq		= eb64p_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.pci_map_irq		= eb64p_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(eb66)
+#endif
diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c
new file mode 100644
index 0000000000..aea8a54da4
--- /dev/null
+++ b/arch/alpha/kernel/sys_eiger.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_eiger.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996, 1999 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *	Copyright (C) 1999 Iain Grant
+ *
+ * Code supporting the EIGER (EV6+TSUNAMI).
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_tsunami.h>
+#include <asm/hwrpb.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+/* Note that this interrupt code is identical to TAKARA.  */
+
+/* Note mask bit is true for DISABLED irqs.  */
+static unsigned long cached_irq_mask[2] = { -1, -1 };
+
+static inline void
+eiger_update_irq_hw(unsigned long irq, unsigned long mask)
+{
+	int regaddr;
+
+	mask = (irq >= 64 ? mask << 16 : mask >> ((irq - 16) & 0x30));
+	regaddr = 0x510 + (((irq - 16) >> 2) & 0x0c);
+	outl(mask & 0xffff0000UL, regaddr);
+}
+
+static inline void
+eiger_enable_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+	unsigned long mask;
+	mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63)));
+	eiger_update_irq_hw(irq, mask);
+}
+
+static void
+eiger_disable_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+	unsigned long mask;
+	mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63));
+	eiger_update_irq_hw(irq, mask);
+}
+
+static struct irq_chip eiger_irq_type = {
+	.name		= "EIGER",
+	.irq_unmask	= eiger_enable_irq,
+	.irq_mask	= eiger_disable_irq,
+	.irq_mask_ack	= eiger_disable_irq,
+};
+
+static void
+eiger_device_interrupt(unsigned long vector)
+{
+	unsigned intstatus;
+
+	/*
+	 * The PALcode will have passed us vectors 0x800 or 0x810,
+	 * which are fairly arbitrary values and serve only to tell
+	 * us whether an interrupt has come in on IRQ0 or IRQ1. If
+	 * it's IRQ1 it's a PCI interrupt; if it's IRQ0, it's
+	 * probably ISA, but PCI interrupts can come through IRQ0
+	 * as well if the interrupt controller isn't in accelerated
+	 * mode.
+	 *
+	 * OTOH, the accelerator thing doesn't seem to be working
+	 * overly well, so what we'll do instead is try directly
+	 * examining the Master Interrupt Register to see if it's a
+	 * PCI interrupt, and if _not_ then we'll pass it on to the
+	 * ISA handler.
+	 */
+
+	intstatus = inw(0x500) & 15;
+	if (intstatus) {
+		/*
+		 * This is a PCI interrupt. Check each bit and
+		 * despatch an interrupt if it's set.
+		 */
+
+		if (intstatus & 8) handle_irq(16+3);
+		if (intstatus & 4) handle_irq(16+2);
+		if (intstatus & 2) handle_irq(16+1);
+		if (intstatus & 1) handle_irq(16+0);
+	} else {
+		isa_device_interrupt(vector);
+	}
+}
+
+static void
+eiger_srm_device_interrupt(unsigned long vector)
+{
+	int irq = (vector - 0x800) >> 4;
+	handle_irq(irq);
+}
+
+static void __init
+eiger_init_irq(void)
+{
+	long i;
+
+	outb(0, DMA1_RESET_REG);
+	outb(0, DMA2_RESET_REG);
+	outb(DMA_MODE_CASCADE, DMA2_MODE_REG);
+	outb(0, DMA2_MASK_REG);
+
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = eiger_srm_device_interrupt;
+
+	for (i = 16; i < 128; i += 16)
+		eiger_update_irq_hw(i, -1);
+
+	init_i8259a_irqs();
+
+	for (i = 16; i < 128; ++i) {
+		irq_set_chip_and_handler(i, &eiger_irq_type, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+}
+
+static int
+eiger_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	u8 irq_orig;
+
+	/* The SRM console has already calculated out the IRQ value's for
+	   option cards. As this works lets just read in the value already
+	   set and change it to a useable value by Linux.
+
+	   All the IRQ values generated by the console are greater than 90,
+	   so we subtract 80 because it is (90 - allocated ISA IRQ's).  */
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq_orig);
+
+	return irq_orig - 0x80;
+}
+
+static u8
+eiger_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	struct pci_controller *hose = dev->sysdata;
+	int slot, pin = *pinp;
+	int bridge_count = 0;
+
+	/* Find the number of backplane bridges.  */
+	int backplane = inw(0x502) & 0x0f;
+
+	switch (backplane)
+	{
+	   case 0x00: bridge_count = 0; break; /* No bridges */
+	   case 0x01: bridge_count = 1; break; /* 1 */
+	   case 0x03: bridge_count = 2; break; /* 2 */
+	   case 0x07: bridge_count = 3; break; /* 3 */
+	   case 0x0f: bridge_count = 4; break; /* 4 */
+	}
+
+	slot = PCI_SLOT(dev->devfn);
+	while (dev->bus->self) {
+		/* Check for built-in bridges on hose 0. */
+		if (hose->index == 0
+		    && (PCI_SLOT(dev->bus->self->devfn)
+			> 20 - bridge_count)) {
+			slot = PCI_SLOT(dev->devfn);
+			break;
+		}
+		/* Must be a card-based bridge.  */
+		pin = pci_swizzle_interrupt_pin(dev, pin);
+
+		/* Move up the chain of bridges.  */
+		dev = dev->bus->self;
+	}
+	*pinp = pin;
+	return slot;
+}
+
+/*
+ * The System Vectors
+ */
+
+struct alpha_machine_vector eiger_mv __initmv = {
+	.vector_name		= "Eiger",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TSUNAMI_IO,
+	.machine_check		= tsunami_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= TSUNAMI_DAC_OFFSET,
+
+	.nr_irqs		= 128,
+	.device_interrupt	= eiger_device_interrupt,
+
+	.init_arch		= tsunami_init_arch,
+	.init_irq		= eiger_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.kill_arch		= tsunami_kill_arch,
+	.pci_map_irq		= eiger_map_irq,
+	.pci_swizzle		= eiger_swizzle,
+};
+ALIAS_MV(eiger)
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c
new file mode 100644
index 0000000000..5c9c884281
--- /dev/null
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_jensen.c
+ *
+ *	Copyright (C) 1995 Linus Torvalds
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the Jensen.
+ */
+#define __EXTERN_INLINE
+#include <asm/io.h>
+#include <asm/jensen.h>
+#undef  __EXTERN_INLINE
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+/*
+ * Jensen is special: the vector is 0x8X0 for EISA interrupt X, and
+ * 0x9X0 for the local motherboard interrupts.
+ *
+ * Note especially that those local interrupts CANNOT be masked,
+ * which causes much of the pain below...
+ *
+ *	0x660 - NMI
+ *
+ *	0x800 - IRQ0  interval timer (not used, as we use the RTC timer)
+ *	0x810 - IRQ1  line printer (duh..)
+ *	0x860 - IRQ6  floppy disk
+ *
+ *	0x900 - COM1
+ *	0x920 - COM2
+ *	0x980 - keyboard
+ *	0x990 - mouse
+ *
+ * PCI-based systems are more sane: they don't have the local
+ * interrupts at all, and have only normal PCI interrupts from
+ * devices.  Happily it's easy enough to do a sane mapping from the
+ * Jensen.
+ * 
+ * Note that this means that we may have to do a hardware
+ * "local_op" to a different interrupt than we report to the rest of the
+ * world.
+ */
+
+static void
+jensen_local_enable(struct irq_data *d)
+{
+	/* the parport is really hw IRQ 1, silly Jensen.  */
+	if (d->irq == 7)
+		i8259a_enable_irq(d);
+}
+
+static void
+jensen_local_disable(struct irq_data *d)
+{
+	/* the parport is really hw IRQ 1, silly Jensen.  */
+	if (d->irq == 7)
+		i8259a_disable_irq(d);
+}
+
+static void
+jensen_local_mask_ack(struct irq_data *d)
+{
+	/* the parport is really hw IRQ 1, silly Jensen.  */
+	if (d->irq == 7)
+		i8259a_mask_and_ack_irq(d);
+}
+
+static struct irq_chip jensen_local_irq_type = {
+	.name		= "LOCAL",
+	.irq_unmask	= jensen_local_enable,
+	.irq_mask	= jensen_local_disable,
+	.irq_mask_ack	= jensen_local_mask_ack,
+};
+
+static void 
+jensen_device_interrupt(unsigned long vector)
+{
+	int irq;
+
+	switch (vector) {
+	case 0x660:
+		printk("Whee.. NMI received. Probable hardware error\n");
+		printk("61=%02x, 461=%02x\n", inb(0x61), inb(0x461));
+		return;
+
+	/* local device interrupts: */
+	case 0x900: irq = 4; break;		/* com1 -> irq 4 */
+	case 0x920: irq = 3; break;		/* com2 -> irq 3 */
+	case 0x980: irq = 1; break;		/* kbd -> irq 1 */
+	case 0x990: irq = 9; break;		/* mouse -> irq 9 */
+
+	default:
+		if (vector > 0x900) {
+			printk("Unknown local interrupt %lx\n", vector);
+			return;
+		}
+
+		irq = (vector - 0x800) >> 4;
+		if (irq == 1)
+			irq = 7;
+		break;
+	}
+
+	/* If there is no handler yet... */
+	if (!irq_has_action(irq)) {
+	    /* If it is a local interrupt that cannot be masked... */
+	    if (vector >= 0x900)
+	    {
+	        /* Clear keyboard/mouse state */
+	    	inb(0x64);
+		inb(0x60);
+		/* Reset serial ports */
+		inb(0x3fa);
+		inb(0x2fa);
+		outb(0x0c, 0x3fc);
+		outb(0x0c, 0x2fc);
+		/* Clear NMI */
+		outb(0,0x61);
+		outb(0,0x461);
+	    }
+	}
+
+#if 0
+        /* A useful bit of code to find out if an interrupt is going wild.  */
+        {
+          static unsigned int last_msg = 0, last_cc = 0;
+          static int last_irq = -1, count = 0;
+          unsigned int cc;
+
+          __asm __volatile("rpcc %0" : "=r"(cc));
+          ++count;
+#define JENSEN_CYCLES_PER_SEC	(150000000)
+          if (cc - last_msg > ((JENSEN_CYCLES_PER_SEC) * 3) ||
+	      irq != last_irq) {
+                printk(KERN_CRIT " irq %d count %d cc %u @ %lx\n",
+                       irq, count, cc-last_cc, get_irq_regs()->pc);
+                count = 0;
+                last_msg = cc;
+                last_irq = irq;
+          }
+          last_cc = cc;
+        }
+#endif
+
+	handle_irq(irq);
+}
+
+static void __init
+jensen_init_irq(void)
+{
+	init_i8259a_irqs();
+
+	irq_set_chip_and_handler(1, &jensen_local_irq_type, handle_level_irq);
+	irq_set_chip_and_handler(4, &jensen_local_irq_type, handle_level_irq);
+	irq_set_chip_and_handler(3, &jensen_local_irq_type, handle_level_irq);
+	irq_set_chip_and_handler(7, &jensen_local_irq_type, handle_level_irq);
+	irq_set_chip_and_handler(9, &jensen_local_irq_type, handle_level_irq);
+
+	common_init_isa_dma();
+}
+
+static void __init
+jensen_init_arch(void)
+{
+	struct pci_controller *hose;
+#ifdef CONFIG_PCI
+	static struct pci_dev fake_isa_bridge = { .dma_mask = 0xffffffffUL, };
+
+	isa_bridge = &fake_isa_bridge;
+#endif
+
+	/* Create a hose so that we can report i/o base addresses to
+	   userland.  */
+
+	pci_isa_hose = hose = alloc_pci_controller();
+	hose->io_space = &ioport_resource;
+	hose->mem_space = &iomem_resource;
+	hose->index = 0;
+
+	hose->sparse_mem_base = EISA_MEM - IDENT_ADDR;
+	hose->dense_mem_base = 0;
+	hose->sparse_io_base = EISA_IO - IDENT_ADDR;
+	hose->dense_io_base = 0;
+
+	hose->sg_isa = hose->sg_pci = NULL;
+	__direct_map_base = 0;
+	__direct_map_size = 0xffffffff;
+}
+
+static void
+jensen_machine_check(unsigned long vector, unsigned long la)
+{
+	printk(KERN_CRIT "Machine check\n");
+}
+
+/*
+ * The System Vector
+ */
+
+struct alpha_machine_vector jensen_mv __initmv = {
+	.vector_name		= "Jensen",
+	DO_EV4_MMU,
+	IO_LITE(JENSEN,jensen),
+	.machine_check		= jensen_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.rtc_port		= 0x170,
+
+	.nr_irqs		= 16,
+	.device_interrupt	= jensen_device_interrupt,
+
+	.init_arch		= jensen_init_arch,
+	.init_irq		= jensen_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= NULL,
+	.kill_arch		= NULL,
+};
+ALIAS_MV(jensen)
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
new file mode 100644
index 0000000000..1f99b03eff
--- /dev/null
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/arch/alpha/kernel/sys_marvel.c
+ *
+ * Marvel / IO7 support
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_marvel.h>
+#include <asm/hwrpb.h>
+#include <asm/tlbflush.h>
+#include <asm/vga.h>
+
+#include "proto.h"
+#include "err_impl.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+#if NR_IRQS < MARVEL_NR_IRQS
+# error NR_IRQS < MARVEL_NR_IRQS !!!
+#endif
+
+
+/*
+ * Interrupt handling.
+ */
+static void 
+io7_device_interrupt(unsigned long vector)
+{
+	unsigned int pid;
+	unsigned int irq;
+
+	/*
+	 * Vector is 0x800 + (interrupt)
+	 *
+	 * where (interrupt) is:
+	 *
+	 *	...16|15 14|13     4|3 0
+	 *	-----+-----+--------+---
+	 *	  PE |  0  |   irq  | 0
+	 *
+	 * where (irq) is 
+	 *
+	 *       0x0800 - 0x0ff0	 - 0x0800 + (LSI id << 4)
+	 *	 0x1000 - 0x2ff0	 - 0x1000 + (MSI_DAT<8:0> << 4)
+	 */
+	pid = vector >> 16;
+	irq = ((vector & 0xffff) - 0x800) >> 4;
+
+	irq += 16;				/* offset for legacy */
+	irq &= MARVEL_IRQ_VEC_IRQ_MASK;		/* not too many bits */
+	irq |= pid << MARVEL_IRQ_VEC_PE_SHIFT;	/* merge the pid     */
+
+	handle_irq(irq);
+}
+
+static volatile unsigned long *
+io7_get_irq_ctl(unsigned int irq, struct io7 **pio7)
+{
+	volatile unsigned long *ctl;
+	unsigned int pid;
+	struct io7 *io7;
+
+	pid = irq >> MARVEL_IRQ_VEC_PE_SHIFT;
+
+	if (!(io7 = marvel_find_io7(pid))) {
+		printk(KERN_ERR 
+		       "%s for nonexistent io7 -- vec %x, pid %d\n",
+		       __func__, irq, pid);
+		return NULL;
+	}
+
+	irq &= MARVEL_IRQ_VEC_IRQ_MASK;	/* isolate the vector    */
+	irq -= 16;			/* subtract legacy bias  */
+
+	if (irq >= 0x180) {
+		printk(KERN_ERR 
+		       "%s for invalid irq -- pid %d adjusted irq %x\n",
+		       __func__, pid, irq);
+		return NULL;
+	}
+
+	ctl = &io7->csrs->PO7_LSI_CTL[irq & 0xff].csr; /* assume LSI */
+	if (irq >= 0x80)	     	/* MSI */
+		ctl = &io7->csrs->PO7_MSI_CTL[((irq - 0x80) >> 5) & 0x0f].csr;
+
+	if (pio7) *pio7 = io7;
+	return ctl;
+}
+
+static void
+io7_enable_irq(struct irq_data *d)
+{
+	volatile unsigned long *ctl;
+	unsigned int irq = d->irq;
+	struct io7 *io7;
+
+	ctl = io7_get_irq_ctl(irq, &io7);
+	if (!ctl || !io7) {
+		printk(KERN_ERR "%s: get_ctl failed for irq %x\n",
+		       __func__, irq);
+		return;
+	}
+
+	raw_spin_lock(&io7->irq_lock);
+	*ctl |= 1UL << 24;
+	mb();
+	*ctl;
+	raw_spin_unlock(&io7->irq_lock);
+}
+
+static void
+io7_disable_irq(struct irq_data *d)
+{
+	volatile unsigned long *ctl;
+	unsigned int irq = d->irq;
+	struct io7 *io7;
+
+	ctl = io7_get_irq_ctl(irq, &io7);
+	if (!ctl || !io7) {
+		printk(KERN_ERR "%s: get_ctl failed for irq %x\n",
+		       __func__, irq);
+		return;
+	}
+
+	raw_spin_lock(&io7->irq_lock);
+	*ctl &= ~(1UL << 24);
+	mb();
+	*ctl;
+	raw_spin_unlock(&io7->irq_lock);
+}
+
+static void
+marvel_irq_noop(struct irq_data *d)
+{
+	return;
+}
+
+static struct irq_chip marvel_legacy_irq_type = {
+	.name		= "LEGACY",
+	.irq_mask	= marvel_irq_noop,
+	.irq_unmask	= marvel_irq_noop,
+};
+
+static struct irq_chip io7_lsi_irq_type = {
+	.name		= "LSI",
+	.irq_unmask	= io7_enable_irq,
+	.irq_mask	= io7_disable_irq,
+	.irq_mask_ack	= io7_disable_irq,
+};
+
+static struct irq_chip io7_msi_irq_type = {
+	.name		= "MSI",
+	.irq_unmask	= io7_enable_irq,
+	.irq_mask	= io7_disable_irq,
+	.irq_ack	= marvel_irq_noop,
+};
+
+static void
+io7_redirect_irq(struct io7 *io7, 
+		 volatile unsigned long *csr, 
+		 unsigned int where)
+{
+	unsigned long val;
+	
+	val = *csr;
+	val &= ~(0x1ffUL << 24);		/* clear the target pid   */
+	val |= ((unsigned long)where << 24);	/* set the new target pid */
+	
+	*csr = val;
+	mb();
+	*csr;
+}
+
+static void 
+io7_redirect_one_lsi(struct io7 *io7, unsigned int which, unsigned int where)
+{
+	unsigned long val;
+
+	/*
+	 * LSI_CTL has target PID @ 14
+	 */
+	val = io7->csrs->PO7_LSI_CTL[which].csr;
+	val &= ~(0x1ffUL << 14);		/* clear the target pid */
+	val |= ((unsigned long)where << 14);	/* set the new target pid */
+
+	io7->csrs->PO7_LSI_CTL[which].csr = val;
+	mb();
+	io7->csrs->PO7_LSI_CTL[which].csr;
+}
+
+static void 
+io7_redirect_one_msi(struct io7 *io7, unsigned int which, unsigned int where)
+{
+	unsigned long val;
+
+	/*
+	 * MSI_CTL has target PID @ 14
+	 */
+	val = io7->csrs->PO7_MSI_CTL[which].csr;
+	val &= ~(0x1ffUL << 14);		/* clear the target pid */
+	val |= ((unsigned long)where << 14);	/* set the new target pid */
+
+	io7->csrs->PO7_MSI_CTL[which].csr = val;
+	mb();
+	io7->csrs->PO7_MSI_CTL[which].csr;
+}
+
+static void __init
+init_one_io7_lsi(struct io7 *io7, unsigned int which, unsigned int where)
+{
+	/*
+	 * LSI_CTL has target PID @ 14
+	 */
+	io7->csrs->PO7_LSI_CTL[which].csr = ((unsigned long)where << 14);
+	mb();
+	io7->csrs->PO7_LSI_CTL[which].csr;
+}
+
+static void __init
+init_one_io7_msi(struct io7 *io7, unsigned int which, unsigned int where)
+{
+	/*
+	 * MSI_CTL has target PID @ 14
+	 */
+	io7->csrs->PO7_MSI_CTL[which].csr = ((unsigned long)where << 14);
+	mb();
+	io7->csrs->PO7_MSI_CTL[which].csr;
+}
+
+static void __init
+init_io7_irqs(struct io7 *io7, 
+	      struct irq_chip *lsi_ops,
+	      struct irq_chip *msi_ops)
+{
+	long base = (io7->pe << MARVEL_IRQ_VEC_PE_SHIFT) + 16;
+	long i;
+
+	printk("Initializing interrupts for IO7 at PE %u - base %lx\n",
+		io7->pe, base);
+
+	/*
+	 * Where should interrupts from this IO7 go?
+	 *
+	 * They really should be sent to the local CPU to avoid having to
+	 * traverse the mesh, but if it's not an SMP kernel, they have to
+	 * go to the boot CPU. Send them all to the boot CPU for now,
+	 * as each secondary starts, it can redirect it's local device 
+	 * interrupts.
+	 */
+	printk("  Interrupts reported to CPU at PE %u\n", boot_cpuid);
+
+	raw_spin_lock(&io7->irq_lock);
+
+	/* set up the error irqs */
+	io7_redirect_irq(io7, &io7->csrs->HLT_CTL.csr, boot_cpuid);
+	io7_redirect_irq(io7, &io7->csrs->HPI_CTL.csr, boot_cpuid);
+	io7_redirect_irq(io7, &io7->csrs->CRD_CTL.csr, boot_cpuid);
+	io7_redirect_irq(io7, &io7->csrs->STV_CTL.csr, boot_cpuid);
+	io7_redirect_irq(io7, &io7->csrs->HEI_CTL.csr, boot_cpuid);
+
+	/* Set up the lsi irqs.  */
+	for (i = 0; i < 128; ++i) {
+		irq_set_chip_and_handler(base + i, lsi_ops, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	/* Disable the implemented irqs in hardware.  */
+	for (i = 0; i < 0x60; ++i) 
+		init_one_io7_lsi(io7, i, boot_cpuid);
+
+	init_one_io7_lsi(io7, 0x74, boot_cpuid);
+	init_one_io7_lsi(io7, 0x75, boot_cpuid);
+
+
+	/* Set up the msi irqs.  */
+	for (i = 128; i < (128 + 512); ++i) {
+		irq_set_chip_and_handler(base + i, msi_ops, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	for (i = 0; i < 16; ++i)
+		init_one_io7_msi(io7, i, boot_cpuid);
+
+	raw_spin_unlock(&io7->irq_lock);
+}
+
+static void __init
+marvel_init_irq(void)
+{
+	int i;
+	struct io7 *io7 = NULL;
+
+	/* Reserve the legacy irqs.  */
+	for (i = 0; i < 16; ++i) {
+		irq_set_chip_and_handler(i, &marvel_legacy_irq_type,
+					 handle_level_irq);
+	}
+
+	/* Init the io7 irqs.  */
+	for (io7 = NULL; (io7 = marvel_next_io7(io7)) != NULL; )
+		init_io7_irqs(io7, &io7_lsi_irq_type, &io7_msi_irq_type);
+}
+
+static int 
+marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
+{
+	struct pci_dev *dev = (struct pci_dev *)cdev;
+	struct pci_controller *hose = dev->sysdata;
+	struct io7_port *io7_port = hose->sysdata;
+	struct io7 *io7 = io7_port->io7;
+	int msi_loc, msi_data_off;
+	u16 msg_ctl;
+	u16 msg_dat;
+	u8 intline; 
+	int irq;
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
+	irq = intline;
+
+	msi_loc = dev->msi_cap;
+	msg_ctl = 0;
+	if (msi_loc) 
+		pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl);
+
+	if (msg_ctl & PCI_MSI_FLAGS_ENABLE) {
+ 		msi_data_off = PCI_MSI_DATA_32;
+		if (msg_ctl & PCI_MSI_FLAGS_64BIT) 
+			msi_data_off = PCI_MSI_DATA_64;
+		pci_read_config_word(dev, msi_loc + msi_data_off, &msg_dat);
+
+		irq = msg_dat & 0x1ff;		/* we use msg_data<8:0> */
+		irq += 0x80;			/* offset for lsi       */
+
+#if 1
+		printk("PCI:%d:%d:%d (hose %d) is using MSI\n",
+		       dev->bus->number, 
+		       PCI_SLOT(dev->devfn), 
+		       PCI_FUNC(dev->devfn),
+		       hose->index);
+		printk("  %d message(s) from 0x%04x\n", 
+		       1 << ((msg_ctl & PCI_MSI_FLAGS_QSIZE) >> 4),
+		       msg_dat);
+		printk("  reporting on %d IRQ(s) from %d (0x%x)\n", 
+		       1 << ((msg_ctl & PCI_MSI_FLAGS_QSIZE) >> 4),
+		       (irq + 16) | (io7->pe << MARVEL_IRQ_VEC_PE_SHIFT),
+		       (irq + 16) | (io7->pe << MARVEL_IRQ_VEC_PE_SHIFT));
+#endif
+
+#if 0
+		pci_write_config_word(dev, msi_loc + PCI_MSI_FLAGS,
+				      msg_ctl & ~PCI_MSI_FLAGS_ENABLE);
+		pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
+		irq = intline;
+
+		printk("  forcing LSI interrupt on irq %d [0x%x]\n", irq, irq);
+#endif
+	}
+
+	irq += 16;					/* offset for legacy */
+	irq |= io7->pe << MARVEL_IRQ_VEC_PE_SHIFT;	/* merge the pid     */
+
+	return irq; 
+}
+
+static void __init
+marvel_init_pci(void)
+{
+	struct io7 *io7;
+
+	marvel_register_error_handlers();
+
+	/* Indicate that we trust the console to configure things properly */
+	pci_set_flags(PCI_PROBE_ONLY);
+	common_init_pci();
+	locate_and_init_vga(NULL);
+
+	/* Clear any io7 errors.  */
+	for (io7 = NULL; (io7 = marvel_next_io7(io7)) != NULL; ) 
+		io7_clear_errors(io7);
+}
+
+static void __init
+marvel_init_rtc(void)
+{
+	init_rtc_irq(NULL);
+}
+
+static void
+marvel_smp_callin(void)
+{
+	int cpuid = hard_smp_processor_id();
+	struct io7 *io7 = marvel_find_io7(cpuid);
+	unsigned int i;
+
+	if (!io7)
+		return;
+
+	/* 
+	 * There is a local IO7 - redirect all of its interrupts here.
+	 */
+	printk("Redirecting IO7 interrupts to local CPU at PE %u\n", cpuid);
+
+	/* Redirect the error IRQS here.  */
+	io7_redirect_irq(io7, &io7->csrs->HLT_CTL.csr, cpuid);
+	io7_redirect_irq(io7, &io7->csrs->HPI_CTL.csr, cpuid);
+	io7_redirect_irq(io7, &io7->csrs->CRD_CTL.csr, cpuid);
+	io7_redirect_irq(io7, &io7->csrs->STV_CTL.csr, cpuid);
+	io7_redirect_irq(io7, &io7->csrs->HEI_CTL.csr, cpuid);
+
+	/* Redirect the implemented LSIs here.  */
+	for (i = 0; i < 0x60; ++i) 
+		io7_redirect_one_lsi(io7, i, cpuid);
+
+	io7_redirect_one_lsi(io7, 0x74, cpuid);
+	io7_redirect_one_lsi(io7, 0x75, cpuid);
+
+	/* Redirect the MSIs here.  */
+	for (i = 0; i < 16; ++i)
+		io7_redirect_one_msi(io7, i, cpuid);
+}
+
+/*
+ * System Vectors
+ */
+struct alpha_machine_vector marvel_ev7_mv __initmv = {
+	.vector_name		= "MARVEL/EV7",
+	DO_EV7_MMU,
+	.rtc_port		= 0x70,
+	.rtc_boot_cpu_only	= 1,
+	DO_MARVEL_IO,
+	.machine_check		= marvel_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= IO7_DAC_OFFSET,
+
+	.nr_irqs		= MARVEL_NR_IRQS,
+	.device_interrupt	= io7_device_interrupt,
+
+	.agp_info		= marvel_agp_info,
+
+	.smp_callin		= marvel_smp_callin,
+	.init_arch		= marvel_init_arch,
+	.init_irq		= marvel_init_irq,
+	.init_rtc		= marvel_init_rtc,
+	.init_pci		= marvel_init_pci,
+	.kill_arch		= marvel_kill_arch,
+	.pci_map_irq		= marvel_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(marvel_ev7)
diff --git a/arch/alpha/kernel/sys_miata.c b/arch/alpha/kernel/sys_miata.c
new file mode 100644
index 0000000000..e1bee8f84c
--- /dev/null
+++ b/arch/alpha/kernel/sys_miata.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_miata.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999, 2000 Richard Henderson
+ *
+ * Code supporting the MIATA (EV56+PYXIS).
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/reboot.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_cia.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+static void 
+miata_srm_device_interrupt(unsigned long vector)
+{
+	int irq;
+
+	irq = (vector - 0x800) >> 4;
+
+	/*
+	 * I really hate to do this, but the MIATA SRM console ignores the
+	 *  low 8 bits in the interrupt summary register, and reports the
+	 *  vector 0x80 *lower* than I expected from the bit numbering in
+	 *  the documentation.
+	 * This was done because the low 8 summary bits really aren't used
+	 *  for reporting any interrupts (the PCI-ISA bridge, bit 7, isn't
+	 *  used for this purpose, as PIC interrupts are delivered as the
+	 *  vectors 0x800-0x8f0).
+	 * But I really don't want to change the fixup code for allocation
+	 *  of IRQs, nor the alpha_irq_mask maintenance stuff, both of which
+	 *  look nice and clean now.
+	 * So, here's this grotty hack... :-(
+	 */
+	if (irq >= 16)
+		irq = irq + 8;
+
+	handle_irq(irq);
+}
+
+static void __init
+miata_init_irq(void)
+{
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = miata_srm_device_interrupt;
+
+#if 0
+	/* These break on MiataGL so we'll try not to do it at all.  */
+	*(vulp)PYXIS_INT_HILO = 0x000000B2UL; mb();	/* ISA/NMI HI */
+	*(vulp)PYXIS_RT_COUNT = 0UL; mb();		/* clear count */
+#endif
+
+	init_i8259a_irqs();
+
+	/* Not interested in the bogus interrupts (3,10), Fan Fault (0),
+           NMI (1), or EIDE (9).
+
+	   We also disable the risers (4,5), since we don't know how to
+	   route the interrupts behind the bridge.  */
+	init_pyxis_irqs(0x63b0000);
+
+	common_init_isa_dma();
+	if (request_irq(16 + 2, no_action, 0, "halt-switch", NULL))
+		pr_err("Failed to register halt-switch interrupt\n");
+	if (request_irq(16 + 6, no_action, 0, "timer-cascade", NULL))
+		pr_err("Failed to register timer-cascade interrupt\n");
+}
+
+
+/*
+ * PCI Fixup configuration.
+ *
+ * Summary @ PYXIS_INT_REQ:
+ * Bit      Meaning
+ * 0        Fan Fault
+ * 1        NMI
+ * 2        Halt/Reset switch
+ * 3        none
+ * 4        CID0 (Riser ID)
+ * 5        CID1 (Riser ID)
+ * 6        Interval timer
+ * 7        PCI-ISA Bridge
+ * 8        Ethernet
+ * 9        EIDE (deprecated, ISA 14/15 used)
+ *10        none
+ *11        USB
+ *12        Interrupt Line A from slot 4
+ *13        Interrupt Line B from slot 4
+ *14        Interrupt Line C from slot 4
+ *15        Interrupt Line D from slot 4
+ *16        Interrupt Line A from slot 5
+ *17        Interrupt line B from slot 5
+ *18        Interrupt Line C from slot 5
+ *19        Interrupt Line D from slot 5
+ *20        Interrupt Line A from slot 1
+ *21        Interrupt Line B from slot 1
+ *22        Interrupt Line C from slot 1
+ *23        Interrupt Line D from slot 1
+ *24        Interrupt Line A from slot 2
+ *25        Interrupt Line B from slot 2
+ *26        Interrupt Line C from slot 2
+ *27        Interrupt Line D from slot 2
+ *27        Interrupt Line A from slot 3
+ *29        Interrupt Line B from slot 3
+ *30        Interrupt Line C from slot 3
+ *31        Interrupt Line D from slot 3
+ *
+ * The device to slot mapping looks like:
+ *
+ * Slot     Device
+ *  3       DC21142 Ethernet
+ *  4       EIDE CMD646
+ *  5       none
+ *  6       USB
+ *  7       PCI-ISA bridge
+ *  8       PCI-PCI Bridge      (SBU Riser)
+ *  9       none
+ * 10       none
+ * 11       PCI on board slot 4 (SBU Riser)
+ * 12       PCI on board slot 5 (SBU Riser)
+ *
+ *  These are behind the bridge, so I'm not sure what to do...
+ *
+ * 13       PCI on board slot 1 (SBU Riser)
+ * 14       PCI on board slot 2 (SBU Riser)
+ * 15       PCI on board slot 3 (SBU Riser)
+ *   
+ *
+ * This two layered interrupt approach means that we allocate IRQ 16 and 
+ * above for PCI interrupts.  The IRQ relates to which bit the interrupt
+ * comes in on.  This makes interrupt processing much easier.
+ */
+
+static int
+miata_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+        static char irq_tab[18][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{16+ 8, 16+ 8, 16+ 8, 16+ 8, 16+ 8},  /* IdSel 14,  DC21142 */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 15,  EIDE    */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 16,  none    */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 17,  none    */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 18,  PCI-ISA */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 19,  PCI-PCI */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 20,  none    */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 21,  none    */
+		{16+12, 16+12, 16+13, 16+14, 16+15},  /* IdSel 22,  slot 4  */
+		{16+16, 16+16, 16+17, 16+18, 16+19},  /* IdSel 23,  slot 5  */
+		/* the next 7 are actually on PCI bus 1, across the bridge */
+		{16+11, 16+11, 16+11, 16+11, 16+11},  /* IdSel 24,  QLISP/GL*/
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 25,  none    */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 26,  none    */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 27,  none    */
+		{16+20, 16+20, 16+21, 16+22, 16+23},  /* IdSel 28,  slot 1  */
+		{16+24, 16+24, 16+25, 16+26, 16+27},  /* IdSel 29,  slot 2  */
+		{16+28, 16+28, 16+29, 16+30, 16+31},  /* IdSel 30,  slot 3  */
+		/* This bridge is on the main bus of the later orig MIATA */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 31,  PCI-PCI */
+        };
+	const long min_idsel = 3, max_idsel = 20, irqs_per_slot = 5;
+	
+	/* the USB function of the 82c693 has it's interrupt connected to 
+           the 2nd 8259 controller. So we have to check for it first. */
+
+	if((slot == 7) && (PCI_FUNC(dev->devfn) == 3)) {
+		u8 irq=0;
+		struct pci_dev *pdev = pci_get_slot(dev->bus, dev->devfn & ~7);
+		if(pdev == NULL || pci_read_config_byte(pdev, 0x40,&irq) != PCIBIOS_SUCCESSFUL) {
+			pci_dev_put(pdev);
+			return -1;
+		}
+		else	{
+			pci_dev_put(pdev);
+			return irq;
+		}
+	}
+
+	return COMMON_TABLE_LOOKUP;
+}
+
+static u8
+miata_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	int slot, pin = *pinp;
+
+	if (dev->bus->number == 0) {
+		slot = PCI_SLOT(dev->devfn);
+	}		
+	/* Check for the built-in bridge.  */
+	else if ((PCI_SLOT(dev->bus->self->devfn) == 8) ||
+		 (PCI_SLOT(dev->bus->self->devfn) == 20)) {
+		slot = PCI_SLOT(dev->devfn) + 9;
+	}
+	else 
+	{
+		/* Must be a card-based bridge.  */
+		do {
+			if ((PCI_SLOT(dev->bus->self->devfn) == 8) ||
+			    (PCI_SLOT(dev->bus->self->devfn) == 20)) {
+				slot = PCI_SLOT(dev->devfn) + 9;
+				break;
+			}
+			pin = pci_swizzle_interrupt_pin(dev, pin);
+
+			/* Move up the chain of bridges.  */
+			dev = dev->bus->self;
+			/* Slot of the next bridge.  */
+			slot = PCI_SLOT(dev->devfn);
+		} while (dev->bus->self);
+	}
+	*pinp = pin;
+	return slot;
+}
+
+static void __init
+miata_init_pci(void)
+{
+	cia_init_pci();
+	SMC669_Init(0); /* it might be a GL (fails harmlessly if not) */
+	es1888_init();
+}
+
+static void
+miata_kill_arch(int mode)
+{
+	cia_kill_arch(mode);
+
+#ifndef ALPHA_RESTORE_SRM_SETUP
+	switch(mode) {
+	case LINUX_REBOOT_CMD_RESTART:
+		/* Who said DEC engineers have no sense of humor? ;-)  */ 
+		if (alpha_using_srm) {
+			*(vuip) PYXIS_RESET = 0x0000dead; 
+			mb(); 
+		}
+		break;
+	case LINUX_REBOOT_CMD_HALT:
+		break;
+	case LINUX_REBOOT_CMD_POWER_OFF:
+		break;
+	}
+
+	halt();
+#endif
+}
+
+
+/*
+ * The System Vector
+ */
+
+struct alpha_machine_vector miata_mv __initmv = {
+	.vector_name		= "Miata",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_PYXIS_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= PYXIS_DAC_OFFSET,
+
+	.nr_irqs		= 48,
+	.device_interrupt	= pyxis_device_interrupt,
+
+	.init_arch		= pyxis_init_arch,
+	.init_irq		= miata_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= miata_init_pci,
+	.kill_arch		= miata_kill_arch,
+	.pci_map_irq		= miata_map_irq,
+	.pci_swizzle		= miata_swizzle,
+};
+ALIAS_MV(miata)
diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c
new file mode 100644
index 0000000000..7690dfd57c
--- /dev/null
+++ b/arch/alpha/kernel/sys_mikasa.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_mikasa.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the MIKASA (AlphaServer 1000).
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/mce.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_apecs.h>
+#include <asm/core_cia.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+/* Note mask bit is true for ENABLED irqs.  */
+static int cached_irq_mask;
+
+static inline void
+mikasa_update_irq_hw(int mask)
+{
+	outw(mask, 0x536);
+}
+
+static inline void
+mikasa_enable_irq(struct irq_data *d)
+{
+	mikasa_update_irq_hw(cached_irq_mask |= 1 << (d->irq - 16));
+}
+
+static void
+mikasa_disable_irq(struct irq_data *d)
+{
+	mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (d->irq - 16)));
+}
+
+static struct irq_chip mikasa_irq_type = {
+	.name		= "MIKASA",
+	.irq_unmask	= mikasa_enable_irq,
+	.irq_mask	= mikasa_disable_irq,
+	.irq_mask_ack	= mikasa_disable_irq,
+};
+
+static void 
+mikasa_device_interrupt(unsigned long vector)
+{
+	unsigned long pld;
+	unsigned int i;
+
+	/* Read the interrupt summary registers */
+	pld = (((~inw(0x534) & 0x0000ffffUL) << 16)
+	       | (((unsigned long) inb(0xa0)) << 8)
+	       | inb(0x20));
+
+	/*
+	 * Now for every possible bit set, work through them and call
+	 * the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1; /* clear least bit set */
+		if (i < 16) {
+			isa_device_interrupt(vector);
+		} else {
+			handle_irq(i);
+		}
+	}
+}
+
+static void __init
+mikasa_init_irq(void)
+{
+	long i;
+
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = srm_device_interrupt;
+
+	mikasa_update_irq_hw(0);
+
+	for (i = 16; i < 32; ++i) {
+		irq_set_chip_and_handler(i, &mikasa_irq_type,
+					 handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	init_i8259a_irqs();
+	common_init_isa_dma();
+}
+
+
+/*
+ * PCI Fixup configuration.
+ *
+ * Summary @ 0x536:
+ * Bit      Meaning
+ * 0        Interrupt Line A from slot 0
+ * 1        Interrupt Line B from slot 0
+ * 2        Interrupt Line C from slot 0
+ * 3        Interrupt Line D from slot 0
+ * 4        Interrupt Line A from slot 1
+ * 5        Interrupt line B from slot 1
+ * 6        Interrupt Line C from slot 1
+ * 7        Interrupt Line D from slot 1
+ * 8        Interrupt Line A from slot 2
+ * 9        Interrupt Line B from slot 2
+ *10        Interrupt Line C from slot 2
+ *11        Interrupt Line D from slot 2
+ *12        NCR 810 SCSI
+ *13        Power Supply Fail
+ *14        Temperature Warn
+ *15        Reserved
+ *
+ * The device to slot mapping looks like:
+ *
+ * Slot     Device
+ *  6       NCR SCSI controller
+ *  7       Intel PCI-EISA bridge chip
+ * 11       PCI on board slot 0
+ * 12       PCI on board slot 1
+ * 13       PCI on board slot 2
+ *   
+ *
+ * This two layered interrupt approach means that we allocate IRQ 16 and 
+ * above for PCI interrupts.  The IRQ relates to which bit the interrupt
+ * comes in on.  This makes interrupt processing much easier.
+ */
+
+static int
+mikasa_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[8][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{16+12, 16+12, 16+12, 16+12, 16+12},	/* IdSel 17,  SCSI */
+		{   -1,    -1,    -1,    -1,    -1},	/* IdSel 18,  PCEB */
+		{   -1,    -1,    -1,    -1,    -1},	/* IdSel 19,  ???? */
+		{   -1,    -1,    -1,    -1,    -1},	/* IdSel 20,  ???? */
+		{   -1,    -1,    -1,    -1,    -1},	/* IdSel 21,  ???? */
+		{ 16+0,  16+0,  16+1,  16+2,  16+3},	/* IdSel 22,  slot 0 */
+		{ 16+4,  16+4,  16+5,  16+6,  16+7},	/* IdSel 23,  slot 1 */
+		{ 16+8,  16+8,  16+9, 16+10, 16+11},	/* IdSel 24,  slot 2 */
+	};
+	const long min_idsel = 6, max_idsel = 13, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+
+#if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO)
+static void
+mikasa_apecs_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+#define MCHK_NO_DEVSEL 0x205U
+#define MCHK_NO_TABT 0x204U
+
+	struct el_common *mchk_header;
+	unsigned int code;
+
+	mchk_header = (struct el_common *)la_ptr;
+
+	/* Clear the error before any reporting.  */
+	mb();
+	mb(); /* magic */
+	draina();
+	apecs_pci_clr_err();
+	wrmces(0x7);
+	mb();
+
+	code = mchk_header->code;
+	process_mcheck_info(vector, la_ptr, "MIKASA APECS",
+			    (mcheck_expected(0)
+			     && (code == MCHK_NO_DEVSEL
+			         || code == MCHK_NO_TABT)));
+}
+#endif
+
+
+/*
+ * The System Vector
+ */
+
+#if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO)
+struct alpha_machine_vector mikasa_mv __initmv = {
+	.vector_name		= "Mikasa",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_APECS_IO,
+	.machine_check		= mikasa_apecs_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 32,
+	.device_interrupt	= mikasa_device_interrupt,
+
+	.init_arch		= apecs_init_arch,
+	.init_irq		= mikasa_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.pci_map_irq		= mikasa_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(mikasa)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PRIMO)
+struct alpha_machine_vector mikasa_primo_mv __initmv = {
+	.vector_name		= "Mikasa-Primo",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_CIA_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= CIA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 32,
+	.device_interrupt	= mikasa_device_interrupt,
+
+	.init_arch		= cia_init_arch,
+	.init_irq		= mikasa_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= cia_init_pci,
+	.kill_arch		= cia_kill_arch,
+	.pci_map_irq		= mikasa_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(mikasa_primo)
+#endif
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
new file mode 100644
index 0000000000..96fd6ff3fe
--- /dev/null
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_nautilus.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1998 Richard Henderson
+ *	Copyright (C) 1999 Alpha Processor, Inc.,
+ *		(David Daniel, Stig Telfer, Soohoon Lee)
+ *
+ * Code supporting NAUTILUS systems.
+ *
+ *
+ * NAUTILUS has the following I/O features:
+ *
+ * a) Driven by AMD 751 aka IRONGATE (northbridge):
+ *     4 PCI slots
+ *     1 AGP slot
+ *
+ * b) Driven by ALI M1543C (southbridge)
+ *     2 ISA slots
+ *     2 IDE connectors
+ *     1 dual drive capable FDD controller
+ *     2 serial ports
+ *     1 ECP/EPP/SP parallel port
+ *     2 USB ports
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/reboot.h>
+#include <linux/memblock.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_irongate.h>
+#include <asm/hwrpb.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "err_impl.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+static void __init
+nautilus_init_irq(void)
+{
+	if (alpha_using_srm) {
+		alpha_mv.device_interrupt = srm_device_interrupt;
+	}
+
+	init_i8259a_irqs();
+	common_init_isa_dma();
+}
+
+static int
+nautilus_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	/* Preserve the IRQ set up by the console.  */
+
+	u8 irq;
+	/* UP1500: AGP INTA is actually routed to IRQ 5, not IRQ 10 as
+	   console reports. Check the device id of AGP bridge to distinguish
+	   UP1500 from UP1000/1100. Note: 'pin' is 2 due to bridge swizzle. */
+	if (slot == 1 && pin == 2 &&
+	    dev->bus->self && dev->bus->self->device == 0x700f)
+		return 5;
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+	return irq;
+}
+
+void
+nautilus_kill_arch(int mode)
+{
+	struct pci_bus *bus = pci_isa_hose->bus;
+	u32 pmuport;
+	int off;
+
+	switch (mode) {
+	case LINUX_REBOOT_CMD_RESTART:
+		if (! alpha_using_srm) {
+			u8 t8;
+			pci_bus_read_config_byte(bus, 0x38, 0x43, &t8);
+			pci_bus_write_config_byte(bus, 0x38, 0x43, t8 | 0x80);
+			outb(1, 0x92);
+			outb(0, 0x92);
+			/* NOTREACHED */
+		}
+		break;
+
+	case LINUX_REBOOT_CMD_POWER_OFF:
+		/* Assume M1543C */
+		off = 0x2000;		/* SLP_TYPE = 0, SLP_EN = 1 */
+		pci_bus_read_config_dword(bus, 0x88, 0x10, &pmuport);
+		if (!pmuport) {
+			/* M1535D/D+ */
+			off = 0x3400;	/* SLP_TYPE = 5, SLP_EN = 1 */
+			pci_bus_read_config_dword(bus, 0x88, 0xe0, &pmuport);
+		}
+		pmuport &= 0xfffe;
+		outw(0xffff, pmuport);	/* Clear pending events. */
+		outw(off, pmuport + 4);
+		/* NOTREACHED */
+		break;
+	}
+}
+
+/* Perform analysis of a machine check that arrived from the system (NMI) */
+
+static void
+naut_sys_machine_check(unsigned long vector, unsigned long la_ptr,
+		       struct pt_regs *regs)
+{
+	printk("PC %lx RA %lx\n", regs->pc, regs->r26);
+	irongate_pci_clr_err();
+}
+
+/* Machine checks can come from two sources - those on the CPU and those
+   in the system.  They are analysed separately but all starts here.  */
+
+void
+nautilus_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+	char *mchk_class;
+
+	/* Now for some analysis.  Machine checks fall into two classes --
+	   those picked up by the system, and those picked up by the CPU.
+	   Add to that the two levels of severity - correctable or not.  */
+
+	if (vector == SCB_Q_SYSMCHK
+	    && ((IRONGATE0->dramms & 0x300) == 0x300)) {
+		unsigned long nmi_ctl;
+
+		/* Clear ALI NMI */
+		nmi_ctl = inb(0x61);
+		nmi_ctl |= 0x0c;
+		outb(nmi_ctl, 0x61);
+		nmi_ctl &= ~0x0c;
+		outb(nmi_ctl, 0x61);
+
+		/* Write again clears error bits.  */
+		IRONGATE0->stat_cmd = IRONGATE0->stat_cmd & ~0x100;
+		mb();
+		IRONGATE0->stat_cmd;
+
+		/* Write again clears error bits.  */
+		IRONGATE0->dramms = IRONGATE0->dramms;
+		mb();
+		IRONGATE0->dramms;
+
+		draina();
+		wrmces(0x7);
+		mb();
+		return;
+	}
+
+	if (vector == SCB_Q_SYSERR)
+		mchk_class = "Correctable";
+	else if (vector == SCB_Q_SYSMCHK)
+		mchk_class = "Fatal";
+	else {
+		ev6_machine_check(vector, la_ptr);
+		return;
+	}
+
+	printk(KERN_CRIT "NAUTILUS Machine check 0x%lx "
+			 "[%s System Machine Check (NMI)]\n",
+	       vector, mchk_class);
+
+	naut_sys_machine_check(vector, la_ptr, get_irq_regs());
+
+	/* Tell the PALcode to clear the machine check */
+	draina();
+	wrmces(0x7);
+	mb();
+}
+
+extern void pcibios_claim_one_bus(struct pci_bus *);
+
+static struct resource irongate_mem = {
+	.name	= "Irongate PCI MEM",
+	.flags	= IORESOURCE_MEM,
+};
+static struct resource busn_resource = {
+	.name	= "PCI busn",
+	.start	= 0,
+	.end	= 255,
+	.flags	= IORESOURCE_BUS,
+};
+
+void __init
+nautilus_init_pci(void)
+{
+	struct pci_controller *hose = hose_head;
+	struct pci_host_bridge *bridge;
+	struct pci_bus *bus;
+	unsigned long bus_align, bus_size, pci_mem;
+	unsigned long memtop = max_low_pfn << PAGE_SHIFT;
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
+		return;
+
+	/* Use default IO. */
+	pci_add_resource(&bridge->windows, &ioport_resource);
+	/* Irongate PCI memory aperture, calculate required size before
+	   setting it up. */
+	pci_add_resource(&bridge->windows, &irongate_mem);
+
+	pci_add_resource(&bridge->windows, &busn_resource);
+	bridge->dev.parent = NULL;
+	bridge->sysdata = hose;
+	bridge->busnr = 0;
+	bridge->ops = alpha_mv.pci_ops;
+	bridge->swizzle_irq = alpha_mv.pci_swizzle;
+	bridge->map_irq = alpha_mv.pci_map_irq;
+	bridge->size_windows = 1;
+
+	/* Scan our single hose.  */
+	if (pci_scan_root_bus_bridge(bridge)) {
+		pci_free_host_bridge(bridge);
+		return;
+	}
+	bus = hose->bus = bridge->bus;
+	pcibios_claim_one_bus(bus);
+
+	pci_bus_size_bridges(bus);
+
+	/* Now we've got the size and alignment of PCI memory resources
+	   stored in irongate_mem. Set up the PCI memory range: limit is
+	   hardwired to 0xffffffff, base must be aligned to 16Mb. */
+	bus_align = irongate_mem.start;
+	bus_size = irongate_mem.end + 1 - bus_align;
+	if (bus_align < 0x1000000UL)
+		bus_align = 0x1000000UL;
+
+	pci_mem = (0x100000000UL - bus_size) & -bus_align;
+	irongate_mem.start = pci_mem;
+	irongate_mem.end = 0xffffffffUL;
+
+	/* Register our newly calculated PCI memory window in the resource
+	   tree. */
+	if (request_resource(&iomem_resource, &irongate_mem) < 0)
+		printk(KERN_ERR "Failed to request MEM on hose 0\n");
+
+	printk(KERN_INFO "Irongate pci_mem %pR\n", &irongate_mem);
+
+	if (pci_mem < memtop)
+		memtop = pci_mem;
+	if (memtop > alpha_mv.min_mem_address) {
+		free_reserved_area(__va(alpha_mv.min_mem_address),
+				   __va(memtop), -1, NULL);
+		printk(KERN_INFO "nautilus_init_pci: %ldk freed\n",
+			(memtop - alpha_mv.min_mem_address) >> 10);
+	}
+	if ((IRONGATE0->dev_vendor >> 16) > 0x7006)	/* Albacore? */
+		IRONGATE0->pci_mem = pci_mem;
+
+	pci_bus_assign_resources(bus);
+	pci_bus_add_devices(bus);
+}
+
+/*
+ * The System Vectors
+ */
+
+struct alpha_machine_vector nautilus_mv __initmv = {
+	.vector_name		= "Nautilus",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_IRONGATE_IO,
+	.machine_check		= nautilus_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= IRONGATE_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 16,
+	.device_interrupt	= isa_device_interrupt,
+
+	.init_arch		= irongate_init_arch,
+	.init_irq		= nautilus_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= nautilus_init_pci,
+	.kill_arch		= nautilus_kill_arch,
+	.pci_map_irq		= nautilus_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(nautilus)
diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c
new file mode 100644
index 0000000000..47f3ce4f71
--- /dev/null
+++ b/arch/alpha/kernel/sys_noritake.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_noritake.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the NORITAKE (AlphaServer 1000A), 
+ * CORELLE (AlphaServer 800), and ALCOR Primo (AlphaStation 600A).
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/mce.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_apecs.h>
+#include <asm/core_cia.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+/* Note mask bit is true for ENABLED irqs.  */
+static int cached_irq_mask;
+
+static inline void
+noritake_update_irq_hw(int irq, int mask)
+{
+	int port = 0x54a;
+	if (irq >= 32) {
+	    mask >>= 16;
+	    port = 0x54c;
+	}
+	outw(mask, port);
+}
+
+static void
+noritake_enable_irq(struct irq_data *d)
+{
+	noritake_update_irq_hw(d->irq, cached_irq_mask |= 1 << (d->irq - 16));
+}
+
+static void
+noritake_disable_irq(struct irq_data *d)
+{
+	noritake_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << (d->irq - 16)));
+}
+
+static struct irq_chip noritake_irq_type = {
+	.name		= "NORITAKE",
+	.irq_unmask	= noritake_enable_irq,
+	.irq_mask	= noritake_disable_irq,
+	.irq_mask_ack	= noritake_disable_irq,
+};
+
+static void 
+noritake_device_interrupt(unsigned long vector)
+{
+	unsigned long pld;
+	unsigned int i;
+
+	/* Read the interrupt summary registers of NORITAKE */
+	pld = (((unsigned long) inw(0x54c) << 32)
+	       | ((unsigned long) inw(0x54a) << 16)
+	       | ((unsigned long) inb(0xa0) << 8)
+	       | inb(0x20));
+
+	/*
+	 * Now for every possible bit set, work through them and call
+	 * the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1; /* clear least bit set */
+		if (i < 16) {
+			isa_device_interrupt(vector);
+		} else {
+			handle_irq(i);
+		}
+	}
+}
+
+static void 
+noritake_srm_device_interrupt(unsigned long vector)
+{
+	int irq;
+
+	irq = (vector - 0x800) >> 4;
+
+	/*
+	 * I really hate to do this, too, but the NORITAKE SRM console also
+	 * reports PCI vectors *lower* than I expected from the bit numbers
+	 * in the documentation.
+	 * But I really don't want to change the fixup code for allocation
+	 * of IRQs, nor the alpha_irq_mask maintenance stuff, both of which
+	 * look nice and clean now.
+	 * So, here's this additional grotty hack... :-(
+	 */
+	if (irq >= 16)
+		irq = irq + 1;
+
+	handle_irq(irq);
+}
+
+static void __init
+noritake_init_irq(void)
+{
+	long i;
+
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = noritake_srm_device_interrupt;
+
+	outw(0, 0x54a);
+	outw(0, 0x54c);
+
+	for (i = 16; i < 48; ++i) {
+		irq_set_chip_and_handler(i, &noritake_irq_type,
+					 handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	init_i8259a_irqs();
+	common_init_isa_dma();
+}
+
+
+/*
+ * PCI Fixup configuration.
+ *
+ * Summary @ 0x542, summary register #1:
+ * Bit      Meaning
+ * 0        All valid ints from summary regs 2 & 3
+ * 1        QLOGIC ISP1020A SCSI
+ * 2        Interrupt Line A from slot 0
+ * 3        Interrupt Line B from slot 0
+ * 4        Interrupt Line A from slot 1
+ * 5        Interrupt line B from slot 1
+ * 6        Interrupt Line A from slot 2
+ * 7        Interrupt Line B from slot 2
+ * 8        Interrupt Line A from slot 3
+ * 9        Interrupt Line B from slot 3
+ *10        Interrupt Line A from slot 4
+ *11        Interrupt Line B from slot 4
+ *12        Interrupt Line A from slot 5
+ *13        Interrupt Line B from slot 5
+ *14        Interrupt Line A from slot 6
+ *15        Interrupt Line B from slot 6
+ *
+ * Summary @ 0x544, summary register #2:
+ * Bit      Meaning
+ * 0        OR of all unmasked ints in SR #2
+ * 1        OR of secondary bus ints
+ * 2        Interrupt Line C from slot 0
+ * 3        Interrupt Line D from slot 0
+ * 4        Interrupt Line C from slot 1
+ * 5        Interrupt line D from slot 1
+ * 6        Interrupt Line C from slot 2
+ * 7        Interrupt Line D from slot 2
+ * 8        Interrupt Line C from slot 3
+ * 9        Interrupt Line D from slot 3
+ *10        Interrupt Line C from slot 4
+ *11        Interrupt Line D from slot 4
+ *12        Interrupt Line C from slot 5
+ *13        Interrupt Line D from slot 5
+ *14        Interrupt Line C from slot 6
+ *15        Interrupt Line D from slot 6
+ *
+ * The device to slot mapping looks like:
+ *
+ * Slot     Device
+ *  7       Intel PCI-EISA bridge chip
+ *  8       DEC PCI-PCI bridge chip
+ * 11       PCI on board slot 0
+ * 12       PCI on board slot 1
+ * 13       PCI on board slot 2
+ *   
+ *
+ * This two layered interrupt approach means that we allocate IRQ 16 and 
+ * above for PCI interrupts.  The IRQ relates to which bit the interrupt
+ * comes in on.  This makes interrupt processing much easier.
+ */
+
+static int
+noritake_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[15][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		/* note: IDSELs 16, 17, and 25 are CORELLE only */
+		{ 16+1,  16+1,  16+1,  16+1,  16+1},  /* IdSel 16,  QLOGIC */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 17, S3 Trio64 */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 18,  PCEB */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 19,  PPB  */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 20,  ???? */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 21,  ???? */
+		{ 16+2,  16+2,  16+3,  32+2,  32+3},  /* IdSel 22,  slot 0 */
+		{ 16+4,  16+4,  16+5,  32+4,  32+5},  /* IdSel 23,  slot 1 */
+		{ 16+6,  16+6,  16+7,  32+6,  32+7},  /* IdSel 24,  slot 2 */
+		{ 16+8,  16+8,  16+9,  32+8,  32+9},  /* IdSel 25,  slot 3 */
+		/* The following 5 are actually on PCI bus 1, which is 
+		   across the built-in bridge of the NORITAKE only.  */
+		{ 16+1,  16+1,  16+1,  16+1,  16+1},  /* IdSel 16,  QLOGIC */
+		{ 16+8,  16+8,  16+9,  32+8,  32+9},  /* IdSel 17,  slot 3 */
+		{16+10, 16+10, 16+11, 32+10, 32+11},  /* IdSel 18,  slot 4 */
+		{16+12, 16+12, 16+13, 32+12, 32+13},  /* IdSel 19,  slot 5 */
+		{16+14, 16+14, 16+15, 32+14, 32+15},  /* IdSel 20,  slot 6 */
+	};
+	const long min_idsel = 5, max_idsel = 19, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static u8
+noritake_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	int slot, pin = *pinp;
+
+	if (dev->bus->number == 0) {
+		slot = PCI_SLOT(dev->devfn);
+	}
+	/* Check for the built-in bridge */
+	else if (PCI_SLOT(dev->bus->self->devfn) == 8) {
+		slot = PCI_SLOT(dev->devfn) + 15; /* WAG! */
+	}
+	else
+	{
+		/* Must be a card-based bridge.  */
+		do {
+			if (PCI_SLOT(dev->bus->self->devfn) == 8) {
+				slot = PCI_SLOT(dev->devfn) + 15;
+				break;
+			}
+			pin = pci_swizzle_interrupt_pin(dev, pin);
+
+			/* Move up the chain of bridges.  */
+			dev = dev->bus->self;
+			/* Slot of the next bridge.  */
+			slot = PCI_SLOT(dev->devfn);
+		} while (dev->bus->self);
+	}
+	*pinp = pin;
+	return slot;
+}
+
+#if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO)
+static void
+noritake_apecs_machine_check(unsigned long vector, unsigned long la_ptr)
+{
+#define MCHK_NO_DEVSEL 0x205U
+#define MCHK_NO_TABT 0x204U
+
+        struct el_common *mchk_header;
+        unsigned int code;
+
+        mchk_header = (struct el_common *)la_ptr;
+
+        /* Clear the error before any reporting.  */
+        mb();
+        mb(); /* magic */
+        draina();
+        apecs_pci_clr_err();
+        wrmces(0x7);
+        mb();
+
+        code = mchk_header->code;
+        process_mcheck_info(vector, la_ptr, "NORITAKE APECS",
+                            (mcheck_expected(0)
+                             && (code == MCHK_NO_DEVSEL
+                                 || code == MCHK_NO_TABT)));
+}
+#endif
+
+
+/*
+ * The System Vectors
+ */
+
+#if defined(CONFIG_ALPHA_GENERIC) || !defined(CONFIG_ALPHA_PRIMO)
+struct alpha_machine_vector noritake_mv __initmv = {
+	.vector_name		= "Noritake",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_APECS_IO,
+	.machine_check		= noritake_apecs_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= EISA_DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 48,
+	.device_interrupt	= noritake_device_interrupt,
+
+	.init_arch		= apecs_init_arch,
+	.init_irq		= noritake_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.pci_map_irq		= noritake_map_irq,
+	.pci_swizzle		= noritake_swizzle,
+};
+ALIAS_MV(noritake)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PRIMO)
+struct alpha_machine_vector noritake_primo_mv __initmv = {
+	.vector_name		= "Noritake-Primo",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_CIA_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= EISA_DEFAULT_IO_BASE,
+	.min_mem_address	= CIA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 48,
+	.device_interrupt	= noritake_device_interrupt,
+
+	.init_arch		= cia_init_arch,
+	.init_irq		= noritake_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= cia_init_pci,
+	.kill_arch		= cia_kill_arch,
+	.pci_map_irq		= noritake_map_irq,
+	.pci_swizzle		= noritake_swizzle,
+};
+ALIAS_MV(noritake_primo)
+#endif
diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c
new file mode 100644
index 0000000000..b5846ffdad
--- /dev/null
+++ b/arch/alpha/kernel/sys_rawhide.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_rawhide.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the RAWHIDE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_mcpcia.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+/*
+ * HACK ALERT! only the boot cpu is used for interrupts.
+ */
+
+
+/* Note mask bit is true for ENABLED irqs.  */
+
+static unsigned int hose_irq_masks[4] = {
+	0xff0000, 0xfe0000, 0xff0000, 0xff0000
+};
+static unsigned int cached_irq_masks[4];
+DEFINE_SPINLOCK(rawhide_irq_lock);
+
+static inline void
+rawhide_update_irq_hw(int hose, int mask)
+{
+	*(vuip)MCPCIA_INT_MASK0(MCPCIA_HOSE2MID(hose)) = mask;
+	mb();
+	*(vuip)MCPCIA_INT_MASK0(MCPCIA_HOSE2MID(hose));
+}
+
+#define hose_exists(h) \
+  (((h) < MCPCIA_MAX_HOSES) && (cached_irq_masks[(h)] != 0))
+
+static inline void 
+rawhide_enable_irq(struct irq_data *d)
+{
+	unsigned int mask, hose;
+	unsigned int irq = d->irq;
+
+	irq -= 16;
+	hose = irq / 24;
+	if (!hose_exists(hose)) /* if hose non-existent, exit */
+		return;
+
+	irq -= hose * 24;
+	mask = 1 << irq;
+
+	spin_lock(&rawhide_irq_lock);
+	mask |= cached_irq_masks[hose];
+	cached_irq_masks[hose] = mask;
+	rawhide_update_irq_hw(hose, mask);
+	spin_unlock(&rawhide_irq_lock);
+}
+
+static void 
+rawhide_disable_irq(struct irq_data *d)
+{
+	unsigned int mask, hose;
+	unsigned int irq = d->irq;
+
+	irq -= 16;
+	hose = irq / 24;
+	if (!hose_exists(hose)) /* if hose non-existent, exit */
+		return;
+
+	irq -= hose * 24;
+	mask = ~(1 << irq) | hose_irq_masks[hose];
+
+	spin_lock(&rawhide_irq_lock);
+	mask &= cached_irq_masks[hose];
+	cached_irq_masks[hose] = mask;
+	rawhide_update_irq_hw(hose, mask);
+	spin_unlock(&rawhide_irq_lock);
+}
+
+static void
+rawhide_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned int mask, mask1, hose;
+	unsigned int irq = d->irq;
+
+	irq -= 16;
+	hose = irq / 24;
+	if (!hose_exists(hose)) /* if hose non-existent, exit */
+		return;
+
+	irq -= hose * 24;
+	mask1 = 1 << irq;
+	mask = ~mask1 | hose_irq_masks[hose];
+
+	spin_lock(&rawhide_irq_lock);
+
+	mask &= cached_irq_masks[hose];
+	cached_irq_masks[hose] = mask;
+	rawhide_update_irq_hw(hose, mask);
+
+	/* Clear the interrupt.  */
+	*(vuip)MCPCIA_INT_REQ(MCPCIA_HOSE2MID(hose)) = mask1;
+
+	spin_unlock(&rawhide_irq_lock);
+}
+
+static struct irq_chip rawhide_irq_type = {
+	.name		= "RAWHIDE",
+	.irq_unmask	= rawhide_enable_irq,
+	.irq_mask	= rawhide_disable_irq,
+	.irq_mask_ack	= rawhide_mask_and_ack_irq,
+};
+
+static void 
+rawhide_srm_device_interrupt(unsigned long vector)
+{
+	int irq;
+
+	irq = (vector - 0x800) >> 4;
+
+        /*
+         * The RAWHIDE SRM console reports PCI interrupts with a vector
+	 * 0x80 *higher* than one might expect, as PCI IRQ 0 (ie bit 0)
+	 * shows up as IRQ 24, etc, etc. We adjust it down by 8 to have
+	 * it line up with the actual bit numbers from the REQ registers,
+	 * which is how we manage the interrupts/mask. Sigh...
+	 *
+	 * Also, PCI #1 interrupts are offset some more... :-(
+         */
+
+	if (irq == 52) {
+		/* SCSI on PCI1 is special.  */
+		irq = 72;
+	}
+
+	/* Adjust by which hose it is from.  */
+	irq -= ((irq + 16) >> 2) & 0x38;
+
+	handle_irq(irq);
+}
+
+static void __init
+rawhide_init_irq(void)
+{
+	struct pci_controller *hose;
+	long i;
+
+	mcpcia_init_hoses();
+
+	/* Clear them all; only hoses that exist will be non-zero. */
+	for (i = 0; i < MCPCIA_MAX_HOSES; i++) cached_irq_masks[i] = 0;
+
+	for (hose = hose_head; hose; hose = hose->next) {
+		unsigned int h = hose->index;
+		unsigned int mask = hose_irq_masks[h];
+
+		cached_irq_masks[h] = mask;
+		*(vuip)MCPCIA_INT_MASK0(MCPCIA_HOSE2MID(h)) = mask;
+		*(vuip)MCPCIA_INT_MASK1(MCPCIA_HOSE2MID(h)) = 0;
+	}
+
+	for (i = 16; i < 128; ++i) {
+		irq_set_chip_and_handler(i, &rawhide_irq_type,
+					 handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	init_i8259a_irqs();
+	common_init_isa_dma();
+}
+
+/*
+ * PCI Fixup configuration.
+ *
+ * Summary @ MCPCIA_PCI0_INT_REQ:
+ * Bit      Meaning
+ * 0        Interrupt Line A from slot 2 PCI0
+ * 1        Interrupt Line B from slot 2 PCI0
+ * 2        Interrupt Line C from slot 2 PCI0
+ * 3        Interrupt Line D from slot 2 PCI0
+ * 4        Interrupt Line A from slot 3 PCI0
+ * 5        Interrupt Line B from slot 3 PCI0
+ * 6        Interrupt Line C from slot 3 PCI0
+ * 7        Interrupt Line D from slot 3 PCI0
+ * 8        Interrupt Line A from slot 4 PCI0
+ * 9        Interrupt Line B from slot 4 PCI0
+ * 10       Interrupt Line C from slot 4 PCI0
+ * 11       Interrupt Line D from slot 4 PCI0
+ * 12       Interrupt Line A from slot 5 PCI0
+ * 13       Interrupt Line B from slot 5 PCI0
+ * 14       Interrupt Line C from slot 5 PCI0
+ * 15       Interrupt Line D from slot 5 PCI0
+ * 16       EISA interrupt (PCI 0) or SCSI interrupt (PCI 1)
+ * 17-23    NA
+ *
+ * IdSel	
+ *   1	 EISA bridge (PCI bus 0 only)
+ *   2 	 PCI option slot 2
+ *   3	 PCI option slot 3
+ *   4   PCI option slot 4
+ *   5   PCI option slot 5
+ * 
+ */
+
+static int
+rawhide_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[5][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{ 16+16, 16+16, 16+16, 16+16, 16+16}, /* IdSel 1 SCSI PCI 1 */
+		{ 16+ 0, 16+ 0, 16+ 1, 16+ 2, 16+ 3}, /* IdSel 2 slot 2 */
+		{ 16+ 4, 16+ 4, 16+ 5, 16+ 6, 16+ 7}, /* IdSel 3 slot 3 */
+		{ 16+ 8, 16+ 8, 16+ 9, 16+10, 16+11}, /* IdSel 4 slot 4 */
+		{ 16+12, 16+12, 16+13, 16+14, 16+15}  /* IdSel 5 slot 5 */
+	};
+	const long min_idsel = 1, max_idsel = 5, irqs_per_slot = 5;
+
+	struct pci_controller *hose = dev->sysdata;
+	int irq = COMMON_TABLE_LOOKUP;
+	if (irq >= 0)
+		irq += 24 * hose->index;
+	return irq;
+}
+
+
+/*
+ * The System Vector
+ */
+
+struct alpha_machine_vector rawhide_mv __initmv = {
+	.vector_name		= "Rawhide",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_MCPCIA_IO,
+	.machine_check		= mcpcia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= MCPCIA_DEFAULT_MEM_BASE,
+	.pci_dac_offset		= MCPCIA_DAC_OFFSET,
+
+	.nr_irqs		= 128,
+	.device_interrupt	= rawhide_srm_device_interrupt,
+
+	.init_arch		= mcpcia_init_arch,
+	.init_irq		= rawhide_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.kill_arch		= NULL,
+	.pci_map_irq		= rawhide_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(rawhide)
diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c
new file mode 100644
index 0000000000..4b1c8d85c4
--- /dev/null
+++ b/arch/alpha/kernel/sys_ruffian.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_ruffian.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999, 2000 Richard Henderson
+ *
+ * Code supporting the RUFFIAN.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_cia.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+static void __init
+ruffian_init_irq(void)
+{
+	/* Invert 6&7 for i82371 */
+	*(vulp)PYXIS_INT_HILO  = 0x000000c0UL; mb();
+	*(vulp)PYXIS_INT_CNFG  = 0x00002064UL; mb();	 /* all clear */
+
+	outb(0x11,0xA0);
+	outb(0x08,0xA1);
+	outb(0x02,0xA1);
+	outb(0x01,0xA1);
+	outb(0xFF,0xA1);
+	
+	outb(0x11,0x20);
+	outb(0x00,0x21);
+	outb(0x04,0x21);
+	outb(0x01,0x21);
+	outb(0xFF,0x21);
+	
+	/* Finish writing the 82C59A PIC Operation Control Words */
+	outb(0x20,0xA0);
+	outb(0x20,0x20);
+	
+	init_i8259a_irqs();
+
+	/* Not interested in the bogus interrupts (0,3,6),
+	   NMI (1), HALT (2), flash (5), or 21142 (8).  */
+	init_pyxis_irqs(0x16f0000);
+
+	common_init_isa_dma();
+}
+
+#define RUFFIAN_LATCH	DIV_ROUND_CLOSEST(PIT_TICK_RATE, HZ)
+
+static void __init
+ruffian_init_rtc(void)
+{
+	/* Ruffian does not have the RTC connected to the CPU timer
+	   interrupt.  Instead, it uses the PIT connected to IRQ 0.  */
+
+	/* Setup interval timer.  */
+	outb(0x34, 0x43);		/* binary, mode 2, LSB/MSB, ch 0 */
+	outb(RUFFIAN_LATCH & 0xff, 0x40);	/* LSB */
+	outb(RUFFIAN_LATCH >> 8, 0x40);		/* MSB */
+
+	outb(0xb6, 0x43);		/* pit counter 2: speaker */
+	outb(0x31, 0x42);
+	outb(0x13, 0x42);
+
+	if (request_irq(0, rtc_timer_interrupt, 0, "timer", NULL))
+		pr_err("Failed to request irq 0 (timer)\n");
+}
+
+static void
+ruffian_kill_arch (int mode)
+{
+	cia_kill_arch(mode);
+#if 0
+	/* This only causes re-entry to ARCSBIOS */
+	/* Perhaps this works for other PYXIS as well?  */
+	*(vuip) PYXIS_RESET = 0x0000dead;
+	mb();
+#endif
+}
+
+/*
+ *  Interrupt routing:
+ *
+ *		Primary bus
+ *	  IdSel		INTA	INTB	INTC	INTD
+ * 21052   13		  -	  -	  -	  -
+ * SIO	   14		 23	  -	  -	  -
+ * 21143   15		 44	  -	  -	  -
+ * Slot 0  17		 43	 42	 41	 40
+ *
+ *		Secondary bus
+ *	  IdSel		INTA	INTB	INTC	INTD
+ * Slot 0   8 (18)	 19	 18	 17	 16
+ * Slot 1   9 (19)	 31	 30	 29	 28
+ * Slot 2  10 (20)	 27	 26	 25	 24
+ * Slot 3  11 (21)	 39	 38	 37	 36
+ * Slot 4  12 (22)	 35	 34	 33	 32
+ * 53c875  13 (23)	 20	  -	  -	  -
+ *
+ */
+
+static int
+ruffian_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+        static char irq_tab[11][5] = {
+	      /*INT  INTA INTB INTC INTD */
+		{-1,  -1,  -1,  -1,  -1},  /* IdSel 13,  21052	     */
+		{-1,  -1,  -1,  -1,  -1},  /* IdSel 14,  SIO	     */
+		{44,  44,  44,  44,  44},  /* IdSel 15,  21143	     */
+		{-1,  -1,  -1,  -1,  -1},  /* IdSel 16,  none	     */
+		{43,  43,  42,  41,  40},  /* IdSel 17,  64-bit slot */
+		/* the next 6 are actually on PCI bus 1, across the bridge */
+		{19,  19,  18,  17,  16},  /* IdSel  8,  slot 0	     */
+		{31,  31,  30,  29,  28},  /* IdSel  9,  slot 1	     */
+		{27,  27,  26,  25,  24},  /* IdSel 10,  slot 2	     */
+		{39,  39,  38,  37,  36},  /* IdSel 11,  slot 3	     */
+		{35,  35,  34,  33,  32},  /* IdSel 12,  slot 4	     */
+		{20,  20,  20,  20,  20},  /* IdSel 13,  53c875	     */
+        };
+	const long min_idsel = 13, max_idsel = 23, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static u8
+ruffian_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	int slot, pin = *pinp;
+
+	if (dev->bus->number == 0) {
+		slot = PCI_SLOT(dev->devfn);
+	}		
+	/* Check for the built-in bridge.  */
+	else if (PCI_SLOT(dev->bus->self->devfn) == 13) {
+		slot = PCI_SLOT(dev->devfn) + 10;
+	}
+	else 
+	{
+		/* Must be a card-based bridge.  */
+		do {
+			if (PCI_SLOT(dev->bus->self->devfn) == 13) {
+				slot = PCI_SLOT(dev->devfn) + 10;
+				break;
+			}
+			pin = pci_swizzle_interrupt_pin(dev, pin);
+
+			/* Move up the chain of bridges.  */
+			dev = dev->bus->self;
+			/* Slot of the next bridge.  */
+			slot = PCI_SLOT(dev->devfn);
+		} while (dev->bus->self);
+	}
+	*pinp = pin;
+	return slot;
+}
+
+#ifdef BUILDING_FOR_MILO
+/*
+ * The DeskStation Ruffian motherboard firmware does not place
+ * the memory size in the PALimpure area.  Therefore, we use
+ * the Bank Configuration Registers in PYXIS to obtain the size.
+ */
+static unsigned long __init
+ruffian_get_bank_size(unsigned long offset)
+{
+	unsigned long bank_addr, bank, ret = 0;
+
+	/* Valid offsets are: 0x800, 0x840 and 0x880
+	   since Ruffian only uses three banks.  */
+	bank_addr = (unsigned long)PYXIS_MCR + offset;
+	bank = *(vulp)bank_addr;
+
+	/* Check BANK_ENABLE */
+	if (bank & 0x01) {
+		static unsigned long size[] __initdata = {
+			0x40000000UL, /* 0x00,   1G */
+			0x20000000UL, /* 0x02, 512M */
+			0x10000000UL, /* 0x04, 256M */
+			0x08000000UL, /* 0x06, 128M */
+			0x04000000UL, /* 0x08,  64M */
+			0x02000000UL, /* 0x0a,  32M */
+			0x01000000UL, /* 0x0c,  16M */
+			0x00800000UL, /* 0x0e,   8M */
+			0x80000000UL, /* 0x10,   2G */
+		};
+
+		bank = (bank & 0x1e) >> 1;
+		if (bank < ARRAY_SIZE(size))
+			ret = size[bank];
+	}
+
+	return ret;
+}
+#endif /* BUILDING_FOR_MILO */
+
+/*
+ * The System Vector
+ */
+
+struct alpha_machine_vector ruffian_mv __initmv = {
+	.vector_name		= "Ruffian",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_PYXIS_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_RUFFIAN_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= PYXIS_DAC_OFFSET,
+
+	.nr_irqs		= 48,
+	.device_interrupt	= pyxis_device_interrupt,
+
+	.init_arch		= pyxis_init_arch,
+	.init_irq		= ruffian_init_irq,
+	.init_rtc		= ruffian_init_rtc,
+	.init_pci		= cia_init_pci,
+	.kill_arch		= ruffian_kill_arch,
+	.pci_map_irq		= ruffian_map_irq,
+	.pci_swizzle		= ruffian_swizzle,
+};
+ALIAS_MV(ruffian)
diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c
new file mode 100644
index 0000000000..94046f9aea
--- /dev/null
+++ b/arch/alpha/kernel/sys_rx164.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_rx164.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the RX164 (PCA56+POLARIS).
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_polaris.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+/* Note mask bit is true for ENABLED irqs.  */
+static unsigned long cached_irq_mask;
+
+static inline void
+rx164_update_irq_hw(unsigned long mask)
+{
+	volatile unsigned int *irq_mask;
+
+	irq_mask = (void *)(POLARIS_DENSE_CONFIG_BASE + 0x74);
+	*irq_mask = mask;
+	mb();
+	*irq_mask;
+}
+
+static inline void
+rx164_enable_irq(struct irq_data *d)
+{
+	rx164_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
+}
+
+static void
+rx164_disable_irq(struct irq_data *d)
+{
+	rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
+}
+
+static struct irq_chip rx164_irq_type = {
+	.name		= "RX164",
+	.irq_unmask	= rx164_enable_irq,
+	.irq_mask	= rx164_disable_irq,
+	.irq_mask_ack	= rx164_disable_irq,
+};
+
+static void 
+rx164_device_interrupt(unsigned long vector)
+{
+	unsigned long pld;
+	volatile unsigned int *dirr;
+	long i;
+
+	/* Read the interrupt summary register.  On Polaris, this is
+	   the DIRR register in PCI config space (offset 0x84).  */
+	dirr = (void *)(POLARIS_DENSE_CONFIG_BASE + 0x84);
+	pld = *dirr;
+
+	/*
+	 * Now for every possible bit set, work through them and call
+	 * the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1; /* clear least bit set */
+		if (i == 20) {
+			isa_no_iack_sc_device_interrupt(vector);
+		} else {
+			handle_irq(16+i);
+		}
+	}
+}
+
+static void __init
+rx164_init_irq(void)
+{
+	long i;
+
+	rx164_update_irq_hw(0);
+	for (i = 16; i < 40; ++i) {
+		irq_set_chip_and_handler(i, &rx164_irq_type, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	init_i8259a_irqs();
+	common_init_isa_dma();
+
+	if (request_irq(16 + 20, no_action, 0, "isa-cascade", NULL))
+		pr_err("Failed to register isa-cascade interrupt\n");
+}
+
+
+/*
+ * The RX164 changed its interrupt routing between pass1 and pass2...
+ *
+ * PASS1:
+ *
+ *      Slot    IDSEL   INTA    INTB    INTC    INTD    
+ *      0       6       5       10      15      20
+ *      1       7       4       9       14      19
+ *      2       5       3       8       13      18
+ *      3       9       2       7       12      17
+ *      4       10      1       6       11      16
+ *
+ * PASS2:
+ *      Slot    IDSEL   INTA    INTB    INTC    INTD    
+ *      0       5       1       7       12      17
+ *      1       6       2       8       13      18
+ *      2       8       3       9       14      19
+ *      3       9       4       10      15      20
+ *      4       10      5       11      16      6
+ *      
+ */
+
+/*
+ * IdSel       
+ *   5  32 bit PCI option slot 0
+ *   6  64 bit PCI option slot 1
+ *   7  PCI-ISA bridge
+ *   7  64 bit PCI option slot 2
+ *   9  32 bit PCI option slot 3
+ *  10  PCI-PCI bridge
+ * 
+ */
+
+static int
+rx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+#if 0
+	static char irq_tab_pass1[6][5] __initdata = {
+	  /*INT   INTA  INTB  INTC   INTD */
+	  { 16+3, 16+3, 16+8, 16+13, 16+18},      /* IdSel 5,  slot 2 */
+	  { 16+5, 16+5, 16+10, 16+15, 16+20},     /* IdSel 6,  slot 0 */
+	  { 16+4, 16+4, 16+9, 16+14, 16+19},      /* IdSel 7,  slot 1 */
+	  { -1,     -1,    -1,    -1,   -1},      /* IdSel 8, PCI/ISA bridge */
+	  { 16+2, 16+2, 16+7, 16+12, 16+17},      /* IdSel 9,  slot 3 */
+	  { 16+1, 16+1, 16+6, 16+11, 16+16},      /* IdSel 10, slot 4 */
+	};
+#else
+	static char irq_tab[6][5] = {
+	  /*INT   INTA  INTB  INTC   INTD */
+	  { 16+0, 16+0, 16+6, 16+11, 16+16},      /* IdSel 5,  slot 0 */
+	  { 16+1, 16+1, 16+7, 16+12, 16+17},      /* IdSel 6,  slot 1 */
+	  { -1,     -1,    -1,    -1,   -1},      /* IdSel 7, PCI/ISA bridge */
+	  { 16+2, 16+2, 16+8, 16+13, 16+18},      /* IdSel 8,  slot 2 */
+	  { 16+3, 16+3, 16+9, 16+14, 16+19},      /* IdSel 9,  slot 3 */
+	  { 16+4, 16+4, 16+10, 16+15, 16+5},      /* IdSel 10, PCI-PCI */
+	};
+#endif
+	const long min_idsel = 5, max_idsel = 10, irqs_per_slot = 5;
+
+	/* JRP - Need to figure out how to distinguish pass1 from pass2,
+	   and use the correct table.  */
+	return COMMON_TABLE_LOOKUP;
+}
+
+
+/*
+ * The System Vector
+ */
+
+struct alpha_machine_vector rx164_mv __initmv = {
+	.vector_name		= "RX164",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_POLARIS_IO,
+	.machine_check		= polaris_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 40,
+	.device_interrupt	= rx164_device_interrupt,
+
+	.init_arch		= polaris_init_arch,
+	.init_irq		= rx164_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.kill_arch		= NULL,
+	.pci_map_irq		= rx164_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(rx164)
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
new file mode 100644
index 0000000000..930005b2f6
--- /dev/null
+++ b/arch/alpha/kernel/sys_sable.c
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_sable.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the Sable, Sable-Gamma, and Lynx systems.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_t2.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+DEFINE_SPINLOCK(sable_lynx_irq_lock);
+
+typedef struct irq_swizzle_struct
+{
+	char irq_to_mask[64];
+	char mask_to_irq[64];
+
+	/* Note mask bit is true for DISABLED irqs.  */
+	unsigned long shadow_mask;
+
+	void (*update_irq_hw)(unsigned long bit, unsigned long mask);
+	void (*ack_irq_hw)(unsigned long bit);
+
+} irq_swizzle_t;
+
+static irq_swizzle_t *sable_lynx_irq_swizzle;
+
+static void sable_lynx_init_irq(int nr_of_irqs);
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
+
+/***********************************************************************/
+/*
+ *   For SABLE, which is really baroque, we manage 40 IRQ's, but the
+ *   hardware really only supports 24, not via normal ISA PIC,
+ *   but cascaded custom 8259's, etc.
+ *	 0-7  (char at 536)
+ *	 8-15 (char at 53a)
+ *	16-23 (char at 53c)
+ *
+ * Summary Registers (536/53a/53c):
+ *
+ * Bit      Meaning               Kernel IRQ
+ *------------------------------------------
+ * 0        PCI slot 0			34
+ * 1        NCR810 (builtin)		33
+ * 2        TULIP (builtin)		32
+ * 3        mouse			12
+ * 4        PCI slot 1			35
+ * 5        PCI slot 2			36
+ * 6        keyboard			1
+ * 7        floppy			6
+ * 8        COM2			3
+ * 9        parallel port		7
+ *10        EISA irq 3			-
+ *11        EISA irq 4			-
+ *12        EISA irq 5			5
+ *13        EISA irq 6			-
+ *14        EISA irq 7			-
+ *15        COM1			4
+ *16        EISA irq 9			9
+ *17        EISA irq 10			10
+ *18        EISA irq 11			11
+ *19        EISA irq 12			-
+ *20        EISA irq 13			-
+ *21        EISA irq 14			14
+ *22        NC				15
+ *23        IIC				-
+ */
+
+static void
+sable_update_irq_hw(unsigned long bit, unsigned long mask)
+{
+	int port = 0x537;
+
+	if (bit >= 16) {
+		port = 0x53d;
+		mask >>= 16;
+	} else if (bit >= 8) {
+		port = 0x53b;
+		mask >>= 8;
+	}
+
+	outb(mask, port);
+}
+
+static void
+sable_ack_irq_hw(unsigned long bit)
+{
+	int port, val1, val2;
+
+	if (bit >= 16) {
+		port = 0x53c;
+		val1 = 0xE0 | (bit - 16);
+		val2 = 0xE0 | 4;
+	} else if (bit >= 8) {
+		port = 0x53a;
+		val1 = 0xE0 | (bit - 8);
+		val2 = 0xE0 | 3;
+	} else {
+		port = 0x536;
+		val1 = 0xE0 | (bit - 0);
+		val2 = 0xE0 | 1;
+	}
+
+	outb(val1, port);	/* ack the slave */
+	outb(val2, 0x534);	/* ack the master */
+}
+
+static irq_swizzle_t sable_irq_swizzle = {
+	{
+		-1,  6, -1,  8, 15, 12,  7,  9,	/* pseudo PIC  0-7  */
+		-1, 16, 17, 18,  3, -1, 21, 22,	/* pseudo PIC  8-15 */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/* pseudo EISA 0-7  */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/* pseudo EISA 8-15  */
+		 2,  1,  0,  4,  5, -1, -1, -1,	/* pseudo PCI */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/*  */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/*  */
+		-1, -1, -1, -1, -1, -1, -1, -1 	/*  */
+	},
+	{
+		34, 33, 32, 12, 35, 36,  1,  6,	/* mask 0-7  */
+		 3,  7, -1, -1,  5, -1, -1,  4,	/* mask 8-15  */
+		 9, 10, 11, -1, -1, 14, 15, -1,	/* mask 16-23  */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/*  */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/*  */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/*  */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/*  */
+		-1, -1, -1, -1, -1, -1, -1, -1	/*  */
+	},
+	-1,
+	sable_update_irq_hw,
+	sable_ack_irq_hw
+};
+
+static void __init
+sable_init_irq(void)
+{
+	outb(-1, 0x537);	/* slave 0 */
+	outb(-1, 0x53b);	/* slave 1 */
+	outb(-1, 0x53d);	/* slave 2 */
+	outb(0x44, 0x535);	/* enable cascades in master */
+
+	sable_lynx_irq_swizzle = &sable_irq_swizzle;
+	sable_lynx_init_irq(40);
+}
+
+/*
+ * PCI Fixup configuration for ALPHA SABLE (2100).
+ *
+ * The device to slot mapping looks like:
+ *
+ * Slot     Device
+ *  0       TULIP
+ *  1       SCSI
+ *  2       PCI-EISA bridge
+ *  3       none
+ *  4       none
+ *  5       none
+ *  6       PCI on board slot 0
+ *  7       PCI on board slot 1
+ *  8       PCI on board slot 2
+ *   
+ *
+ * This two layered interrupt approach means that we allocate IRQ 16 and 
+ * above for PCI interrupts.  The IRQ relates to which bit the interrupt
+ * comes in on.  This makes interrupt processing much easier.
+ */
+/*
+ * NOTE: the IRQ assignments below are arbitrary, but need to be consistent
+ * with the values in the irq swizzling tables above.
+ */
+
+static int
+sable_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[9][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{ 32+0,  32+0,  32+0,  32+0,  32+0},  /* IdSel 0,  TULIP  */
+		{ 32+1,  32+1,  32+1,  32+1,  32+1},  /* IdSel 1,  SCSI   */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 2,  SIO   */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 3,  none   */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 4,  none   */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 5,  none   */
+		{ 32+2,  32+2,  32+2,  32+2,  32+2},  /* IdSel 6,  slot 0 */
+		{ 32+3,  32+3,  32+3,  32+3,  32+3},  /* IdSel 7,  slot 1 */
+		{ 32+4,  32+4,  32+4,  32+4,  32+4}   /* IdSel 8,  slot 2 */
+	};
+	long min_idsel = 0, max_idsel = 8, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+#endif /* defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE) */
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_LYNX)
+
+/***********************************************************************/
+/* LYNX hardware specifics
+ */
+/*
+ *   For LYNX, which is also baroque, we manage 64 IRQs, via a custom IC.
+ *
+ * Bit      Meaning               Kernel IRQ
+ *------------------------------------------
+ * 0        
+ * 1        
+ * 2        
+ * 3        mouse			12
+ * 4        
+ * 5        
+ * 6        keyboard			1
+ * 7        floppy			6
+ * 8        COM2			3
+ * 9        parallel port		7
+ *10        EISA irq 3			-
+ *11        EISA irq 4			-
+ *12        EISA irq 5			5
+ *13        EISA irq 6			-
+ *14        EISA irq 7			-
+ *15        COM1			4
+ *16        EISA irq 9			9
+ *17        EISA irq 10			10
+ *18        EISA irq 11			11
+ *19        EISA irq 12			-
+ *20        
+ *21        EISA irq 14			14
+ *22        EISA irq 15			15
+ *23        IIC				-
+ *24        VGA (builtin)               -
+ *25
+ *26
+ *27
+ *28        NCR810 (builtin)		28
+ *29
+ *30
+ *31
+ *32        PCI 0 slot 4 A primary bus  32
+ *33        PCI 0 slot 4 B primary bus  33
+ *34        PCI 0 slot 4 C primary bus  34
+ *35        PCI 0 slot 4 D primary bus
+ *36        PCI 0 slot 5 A primary bus
+ *37        PCI 0 slot 5 B primary bus
+ *38        PCI 0 slot 5 C primary bus
+ *39        PCI 0 slot 5 D primary bus
+ *40        PCI 0 slot 6 A primary bus
+ *41        PCI 0 slot 6 B primary bus
+ *42        PCI 0 slot 6 C primary bus
+ *43        PCI 0 slot 6 D primary bus
+ *44        PCI 0 slot 7 A primary bus
+ *45        PCI 0 slot 7 B primary bus
+ *46        PCI 0 slot 7 C primary bus
+ *47        PCI 0 slot 7 D primary bus
+ *48        PCI 0 slot 0 A secondary bus
+ *49        PCI 0 slot 0 B secondary bus
+ *50        PCI 0 slot 0 C secondary bus
+ *51        PCI 0 slot 0 D secondary bus
+ *52        PCI 0 slot 1 A secondary bus
+ *53        PCI 0 slot 1 B secondary bus
+ *54        PCI 0 slot 1 C secondary bus
+ *55        PCI 0 slot 1 D secondary bus
+ *56        PCI 0 slot 2 A secondary bus
+ *57        PCI 0 slot 2 B secondary bus
+ *58        PCI 0 slot 2 C secondary bus
+ *59        PCI 0 slot 2 D secondary bus
+ *60        PCI 0 slot 3 A secondary bus
+ *61        PCI 0 slot 3 B secondary bus
+ *62        PCI 0 slot 3 C secondary bus
+ *63        PCI 0 slot 3 D secondary bus
+ */
+
+static void
+lynx_update_irq_hw(unsigned long bit, unsigned long mask)
+{
+	/*
+	 * Write the AIR register on the T3/T4 with the
+	 * address of the IC mask register (offset 0x40)
+	 */
+	*(vulp)T2_AIR = 0x40;
+	mb();
+	*(vulp)T2_AIR; /* re-read to force write */
+	mb();
+	*(vulp)T2_DIR = mask;    
+	mb();
+	mb();
+}
+
+static void
+lynx_ack_irq_hw(unsigned long bit)
+{
+	*(vulp)T2_VAR = (u_long) bit;
+	mb();
+	mb();
+}
+
+static irq_swizzle_t lynx_irq_swizzle = {
+	{ /* irq_to_mask */
+		-1,  6, -1,  8, 15, 12,  7,  9,	/* pseudo PIC  0-7  */
+		-1, 16, 17, 18,  3, -1, 21, 22,	/* pseudo PIC  8-15 */
+		-1, -1, -1, -1, -1, -1, -1, -1,	/* pseudo */
+		-1, -1, -1, -1, 28, -1, -1, -1,	/* pseudo */
+		32, 33, 34, 35, 36, 37, 38, 39,	/* mask 32-39 */
+		40, 41, 42, 43, 44, 45, 46, 47,	/* mask 40-47 */
+		48, 49, 50, 51, 52, 53, 54, 55,	/* mask 48-55 */
+		56, 57, 58, 59, 60, 61, 62, 63	/* mask 56-63 */
+	},
+	{ /* mask_to_irq */
+		-1, -1, -1, 12, -1, -1,  1,  6,	/* mask 0-7   */
+		 3,  7, -1, -1,  5, -1, -1,  4,	/* mask 8-15  */
+		 9, 10, 11, -1, -1, 14, 15, -1,	/* mask 16-23 */
+		-1, -1, -1, -1, 28, -1, -1, -1,	/* mask 24-31 */
+		32, 33, 34, 35, 36, 37, 38, 39,	/* mask 32-39 */
+		40, 41, 42, 43, 44, 45, 46, 47,	/* mask 40-47 */
+		48, 49, 50, 51, 52, 53, 54, 55,	/* mask 48-55 */
+		56, 57, 58, 59, 60, 61, 62, 63	/* mask 56-63 */
+	},
+	-1,
+	lynx_update_irq_hw,
+	lynx_ack_irq_hw
+};
+
+static void __init
+lynx_init_irq(void)
+{
+	sable_lynx_irq_swizzle = &lynx_irq_swizzle;
+	sable_lynx_init_irq(64);
+}
+
+/*
+ * PCI Fixup configuration for ALPHA LYNX (2100A)
+ *
+ * The device to slot mapping looks like:
+ *
+ * Slot     Device
+ *  0       none
+ *  1       none
+ *  2       PCI-EISA bridge
+ *  3       PCI-PCI bridge
+ *  4       NCR 810 (Demi-Lynx only)
+ *  5       none
+ *  6       PCI on board slot 4
+ *  7       PCI on board slot 5
+ *  8       PCI on board slot 6
+ *  9       PCI on board slot 7
+ *
+ * And behind the PPB we have:
+ *
+ * 11       PCI on board slot 0
+ * 12       PCI on board slot 1
+ * 13       PCI on board slot 2
+ * 14       PCI on board slot 3
+ */
+/*
+ * NOTE: the IRQ assignments below are arbitrary, but need to be consistent
+ * with the values in the irq swizzling tables above.
+ */
+
+static int
+lynx_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[19][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 13,  PCEB   */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 14,  PPB    */
+		{   28,    28,    28,    28,    28},  /* IdSel 15,  NCR demi */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 16,  none   */
+		{   32,    32,    33,    34,    35},  /* IdSel 17,  slot 4 */
+		{   36,    36,    37,    38,    39},  /* IdSel 18,  slot 5 */
+		{   40,    40,    41,    42,    43},  /* IdSel 19,  slot 6 */
+		{   44,    44,    45,    46,    47},  /* IdSel 20,  slot 7 */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 22,  none   */
+		/* The following are actually behind the PPB. */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 16   none */
+		{   28,    28,    28,    28,    28},  /* IdSel 17   NCR lynx */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 18   none */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 19   none */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 20   none */
+		{   -1,    -1,    -1,    -1,    -1},  /* IdSel 21   none */
+		{   48,    48,    49,    50,    51},  /* IdSel 22   slot 0 */
+		{   52,    52,    53,    54,    55},  /* IdSel 23   slot 1 */
+		{   56,    56,    57,    58,    59},  /* IdSel 24   slot 2 */
+		{   60,    60,    61,    62,    63}   /* IdSel 25   slot 3 */
+	};
+	const long min_idsel = 2, max_idsel = 20, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static u8
+lynx_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	int slot, pin = *pinp;
+
+	if (dev->bus->number == 0) {
+		slot = PCI_SLOT(dev->devfn);
+	}
+	/* Check for the built-in bridge */
+	else if (PCI_SLOT(dev->bus->self->devfn) == 3) {
+		slot = PCI_SLOT(dev->devfn) + 11;
+	}
+	else
+	{
+		/* Must be a card-based bridge.  */
+		do {
+			if (PCI_SLOT(dev->bus->self->devfn) == 3) {
+				slot = PCI_SLOT(dev->devfn) + 11;
+				break;
+			}
+			pin = pci_swizzle_interrupt_pin(dev, pin);
+
+			/* Move up the chain of bridges.  */
+			dev = dev->bus->self;
+			/* Slot of the next bridge.  */
+			slot = PCI_SLOT(dev->devfn);
+		} while (dev->bus->self);
+	}
+	*pinp = pin;
+	return slot;
+}
+
+#endif /* defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_LYNX) */
+
+/***********************************************************************/
+/* GENERIC irq routines */
+
+static inline void
+sable_lynx_enable_irq(struct irq_data *d)
+{
+	unsigned long bit, mask;
+
+	bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
+	spin_lock(&sable_lynx_irq_lock);
+	mask = sable_lynx_irq_swizzle->shadow_mask &= ~(1UL << bit);
+	sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
+	spin_unlock(&sable_lynx_irq_lock);
+#if 0
+	printk("%s: mask 0x%lx bit 0x%lx irq 0x%x\n",
+	       __func__, mask, bit, irq);
+#endif
+}
+
+static void
+sable_lynx_disable_irq(struct irq_data *d)
+{
+	unsigned long bit, mask;
+
+	bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
+	spin_lock(&sable_lynx_irq_lock);
+	mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit;
+	sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
+	spin_unlock(&sable_lynx_irq_lock);
+#if 0
+	printk("%s: mask 0x%lx bit 0x%lx irq 0x%x\n",
+	       __func__, mask, bit, irq);
+#endif
+}
+
+static void
+sable_lynx_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned long bit, mask;
+
+	bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
+	spin_lock(&sable_lynx_irq_lock);
+	mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit;
+	sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
+	sable_lynx_irq_swizzle->ack_irq_hw(bit);
+	spin_unlock(&sable_lynx_irq_lock);
+}
+
+static struct irq_chip sable_lynx_irq_type = {
+	.name		= "SABLE/LYNX",
+	.irq_unmask	= sable_lynx_enable_irq,
+	.irq_mask	= sable_lynx_disable_irq,
+	.irq_mask_ack	= sable_lynx_mask_and_ack_irq,
+};
+
+static void 
+sable_lynx_srm_device_interrupt(unsigned long vector)
+{
+	/* Note that the vector reported by the SRM PALcode corresponds
+	   to the interrupt mask bits, but we have to manage via the
+	   so-called legacy IRQs for many common devices.  */
+
+	int bit, irq;
+
+	bit = (vector - 0x800) >> 4;
+	irq = sable_lynx_irq_swizzle->mask_to_irq[bit];
+#if 0
+	printk("%s: vector 0x%lx bit 0x%x irq 0x%x\n",
+	       __func__, vector, bit, irq);
+#endif
+	handle_irq(irq);
+}
+
+static void __init
+sable_lynx_init_irq(int nr_of_irqs)
+{
+	long i;
+
+	for (i = 0; i < nr_of_irqs; ++i) {
+		irq_set_chip_and_handler(i, &sable_lynx_irq_type,
+					 handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	common_init_isa_dma();
+}
+
+static void __init
+sable_lynx_init_pci(void)
+{
+	common_init_pci();
+}
+
+/*****************************************************************/
+/*
+ * The System Vectors
+ *
+ * In order that T2_HAE_ADDRESS should be a constant, we play
+ * these games with GAMMA_BIAS.
+ */
+
+#if defined(CONFIG_ALPHA_GENERIC) || \
+    (defined(CONFIG_ALPHA_SABLE) && !defined(CONFIG_ALPHA_GAMMA))
+#undef GAMMA_BIAS
+#define GAMMA_BIAS 0
+struct alpha_machine_vector sable_mv __initmv = {
+	.vector_name		= "Sable",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_T2_IO,
+	.machine_check		= t2_machine_check,
+	.max_isa_dma_address	= ALPHA_SABLE_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= EISA_DEFAULT_IO_BASE,
+	.min_mem_address	= T2_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 40,
+	.device_interrupt	= sable_lynx_srm_device_interrupt,
+
+	.init_arch		= t2_init_arch,
+	.init_irq		= sable_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= sable_lynx_init_pci,
+	.kill_arch		= t2_kill_arch,
+	.pci_map_irq		= sable_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .t2 = {
+	    .gamma_bias		= 0
+	} }
+};
+ALIAS_MV(sable)
+#endif /* GENERIC || (SABLE && !GAMMA) */
+
+#if defined(CONFIG_ALPHA_GENERIC) || \
+    (defined(CONFIG_ALPHA_SABLE) && defined(CONFIG_ALPHA_GAMMA))
+#undef GAMMA_BIAS
+#define GAMMA_BIAS _GAMMA_BIAS
+struct alpha_machine_vector sable_gamma_mv __initmv = {
+	.vector_name		= "Sable-Gamma",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_T2_IO,
+	.machine_check		= t2_machine_check,
+	.max_isa_dma_address	= ALPHA_SABLE_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= EISA_DEFAULT_IO_BASE,
+	.min_mem_address	= T2_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 40,
+	.device_interrupt	= sable_lynx_srm_device_interrupt,
+
+	.init_arch		= t2_init_arch,
+	.init_irq		= sable_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= sable_lynx_init_pci,
+	.kill_arch		= t2_kill_arch,
+	.pci_map_irq		= sable_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .t2 = {
+	    .gamma_bias		= _GAMMA_BIAS
+	} }
+};
+ALIAS_MV(sable_gamma)
+#endif /* GENERIC || (SABLE && GAMMA) */
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_LYNX)
+#undef GAMMA_BIAS
+#define GAMMA_BIAS _GAMMA_BIAS
+struct alpha_machine_vector lynx_mv __initmv = {
+	.vector_name		= "Lynx",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_T2_IO,
+	.machine_check		= t2_machine_check,
+	.max_isa_dma_address	= ALPHA_SABLE_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= EISA_DEFAULT_IO_BASE,
+	.min_mem_address	= T2_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 64,
+	.device_interrupt	= sable_lynx_srm_device_interrupt,
+
+	.init_arch		= t2_init_arch,
+	.init_irq		= lynx_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= sable_lynx_init_pci,
+	.kill_arch		= t2_kill_arch,
+	.pci_map_irq		= lynx_map_irq,
+	.pci_swizzle		= lynx_swizzle,
+
+	.sys = { .t2 = {
+	    .gamma_bias		= _GAMMA_BIAS
+	} }
+};
+ALIAS_MV(lynx)
+#endif /* GENERIC || LYNX */
diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c
new file mode 100644
index 0000000000..7c420d8dac
--- /dev/null
+++ b/arch/alpha/kernel/sys_sio.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_sio.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code for all boards that route the PCI interrupts through the SIO
+ * PCI/ISA bridge.  This includes Noname (AXPpci33), Multia (UDB),
+ * Kenetics's Platform 2000, Avanti (AlphaStation), XL, and AlphaBook1.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/screen_info.h>
+
+#include <asm/compiler.h>
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_apecs.h>
+#include <asm/core_lca.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+#include "pc873xx.h"
+
+#if defined(ALPHA_RESTORE_SRM_SETUP)
+/* Save LCA configuration data as the console had it set up.  */
+struct 
+{
+	unsigned int orig_route_tab; /* for SAVE/RESTORE */
+} saved_config __attribute((common));
+#endif
+
+
+static void __init
+sio_init_irq(void)
+{
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = srm_device_interrupt;
+
+	init_i8259a_irqs();
+	common_init_isa_dma();
+}
+
+static inline void __init
+alphabook1_init_arch(void)
+{
+	/* The AlphaBook1 has LCD video fixed at 800x600,
+	   37 rows and 100 cols. */
+	screen_info.orig_y = 37;
+	screen_info.orig_video_cols = 100;
+	screen_info.orig_video_lines = 37;
+
+	lca_init_arch();
+}
+
+
+/*
+ * sio_route_tab selects irq routing in PCI/ISA bridge so that:
+ *		PIRQ0 -> irq 15
+ *		PIRQ1 -> irq  9
+ *		PIRQ2 -> irq 10
+ *		PIRQ3 -> irq 11
+ *
+ * This probably ought to be configurable via MILO.  For
+ * example, sound boards seem to like using IRQ 9.
+ *
+ * This is NOT how we should do it. PIRQ0-X should have
+ * their own IRQs, the way intel uses the IO-APIC IRQs.
+ */
+
+static void __init
+sio_pci_route(void)
+{
+	unsigned int orig_route_tab;
+
+	/* First, ALWAYS read and print the original setting. */
+	pci_bus_read_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60,
+				  &orig_route_tab);
+	printk("%s: PIRQ original 0x%x new 0x%x\n", __func__,
+	       orig_route_tab, alpha_mv.sys.sio.route_tab);
+
+#if defined(ALPHA_RESTORE_SRM_SETUP)
+	saved_config.orig_route_tab = orig_route_tab;
+#endif
+
+	/* Now override with desired setting. */
+	pci_bus_write_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60,
+				   alpha_mv.sys.sio.route_tab);
+}
+
+static bool sio_pci_dev_irq_needs_level(const struct pci_dev *dev)
+{
+	if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) &&
+	    (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA))
+		return false;
+
+	return true;
+}
+
+static unsigned int __init
+sio_collect_irq_levels(void)
+{
+	unsigned int level_bits = 0;
+	struct pci_dev *dev = NULL;
+
+	/* Iterate through the devices, collecting IRQ levels.  */
+	for_each_pci_dev(dev) {
+		if (!sio_pci_dev_irq_needs_level(dev))
+			continue;
+
+		if (dev->irq)
+			level_bits |= (1 << dev->irq);
+	}
+	return level_bits;
+}
+
+static void __sio_fixup_irq_levels(unsigned int level_bits, bool reset)
+{
+	unsigned int old_level_bits;
+
+	/*
+	 * Now, make all PCI interrupts level sensitive.  Notice:
+	 * these registers must be accessed byte-wise.  inw()/outw()
+	 * don't work.
+	 *
+	 * Make sure to turn off any level bits set for IRQs 9,10,11,15,
+	 *  so that the only bits getting set are for devices actually found.
+	 * Note that we do preserve the remainder of the bits, which we hope
+	 *  will be set correctly by ARC/SRM.
+	 *
+	 * Note: we at least preserve any level-set bits on AlphaBook1
+	 */
+	old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8);
+
+	if (reset)
+		old_level_bits &= 0x71ff;
+
+	level_bits |= old_level_bits;
+
+	outb((level_bits >> 0) & 0xff, 0x4d0);
+	outb((level_bits >> 8) & 0xff, 0x4d1);
+}
+
+static inline void
+sio_fixup_irq_levels(unsigned int level_bits)
+{
+	__sio_fixup_irq_levels(level_bits, true);
+}
+
+static inline int
+noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	/*
+	 * The Noname board has 5 PCI slots with each of the 4
+	 * interrupt pins routed to different pins on the PCI/ISA
+	 * bridge (PIRQ0-PIRQ3).  The table below is based on
+	 * information available at:
+	 *
+	 *   http://ftp.digital.com/pub/DEC/axppci/ref_interrupts.txt
+	 *
+	 * I have no information on the Avanti interrupt routing, but
+	 * the routing seems to be identical to the Noname except
+	 * that the Avanti has an additional slot whose routing I'm
+	 * unsure of.
+	 *
+	 * pirq_tab[0] is a fake entry to deal with old PCI boards
+	 * that have the interrupt pin number hardwired to 0 (meaning
+	 * that they use the default INTA line, if they are interrupt
+	 * driven at all).
+	 */
+	static char irq_tab[][5] = {
+		/*INT A   B   C   D */
+		{ 3,  3,  3,  3,  3}, /* idsel  6 (53c810) */ 
+		{-1, -1, -1, -1, -1}, /* idsel  7 (SIO: PCI/ISA bridge) */
+		{ 2,  2, -1, -1, -1}, /* idsel  8 (Hack: slot closest ISA) */
+		{-1, -1, -1, -1, -1}, /* idsel  9 (unused) */
+		{-1, -1, -1, -1, -1}, /* idsel 10 (unused) */
+		{ 0,  0,  2,  1,  0}, /* idsel 11 KN25_PCI_SLOT0 */
+		{ 1,  1,  0,  2,  1}, /* idsel 12 KN25_PCI_SLOT1 */
+		{ 2,  2,  1,  0,  2}, /* idsel 13 KN25_PCI_SLOT2 */
+		{ 0,  0,  0,  0,  0}, /* idsel 14 AS255 TULIP */
+	};
+	const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5;
+	int irq = COMMON_TABLE_LOOKUP, tmp;
+	tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq);
+
+	irq = irq >= 0 ? tmp : -1;
+
+	/* Fixup IRQ level if an actual IRQ mapping is detected */
+	if (sio_pci_dev_irq_needs_level(dev) && irq >= 0)
+		__sio_fixup_irq_levels(1 << irq, false);
+
+	return irq;
+}
+
+static inline int
+p2k_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[][5] = {
+		/*INT A   B   C   D */
+		{ 0,  0, -1, -1, -1}, /* idsel  6 (53c810) */
+		{-1, -1, -1, -1, -1}, /* idsel  7 (SIO: PCI/ISA bridge) */
+		{ 1,  1,  2,  3,  0}, /* idsel  8 (slot A) */
+		{ 2,  2,  3,  0,  1}, /* idsel  9 (slot B) */
+		{-1, -1, -1, -1, -1}, /* idsel 10 (unused) */
+		{-1, -1, -1, -1, -1}, /* idsel 11 (unused) */
+		{ 3,  3, -1, -1, -1}, /* idsel 12 (CMD0646) */
+	};
+	const long min_idsel = 6, max_idsel = 12, irqs_per_slot = 5;
+	int irq = COMMON_TABLE_LOOKUP, tmp;
+	tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq);
+	return irq >= 0 ? tmp : -1;
+}
+
+static inline void __init
+noname_init_pci(void)
+{
+	common_init_pci();
+	sio_pci_route();
+	sio_fixup_irq_levels(sio_collect_irq_levels());
+
+	if (pc873xx_probe() == -1) {
+		printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n");
+	} else {
+		printk(KERN_INFO "Found %s Super IO chip at 0x%x\n",
+			pc873xx_get_model(), pc873xx_get_base());
+
+		/* Enabling things in the Super IO chip doesn't actually
+		 * configure and enable things, the legacy drivers still
+		 * need to do the actual configuration and enabling.
+		 * This only unblocks them.
+		 */
+
+#if !defined(CONFIG_ALPHA_AVANTI)
+		/* Don't bother on the Avanti family.
+		 * None of them had on-board IDE.
+		 */
+		pc873xx_enable_ide();
+#endif
+		pc873xx_enable_epp19();
+	}
+}
+
+static inline void __init
+alphabook1_init_pci(void)
+{
+	struct pci_dev *dev;
+	unsigned char orig, config;
+
+	common_init_pci();
+	sio_pci_route();
+
+	/*
+	 * On the AlphaBook1, the PCMCIA chip (Cirrus 6729)
+	 * is sensitive to PCI bus bursts, so we must DISABLE
+	 * burst mode for the NCR 8xx SCSI... :-(
+	 *
+	 * Note that the NCR810 SCSI driver must preserve the
+	 * setting of the bit in order for this to work.  At the
+	 * moment (2.0.29), ncr53c8xx.c does NOT do this, but
+	 * 53c7,8xx.c DOES.
+	 */
+
+	dev = NULL;
+	while ((dev = pci_get_device(PCI_VENDOR_ID_NCR, PCI_ANY_ID, dev))) {
+		if (dev->device == PCI_DEVICE_ID_NCR_53C810
+		    || dev->device == PCI_DEVICE_ID_NCR_53C815
+		    || dev->device == PCI_DEVICE_ID_NCR_53C820
+		    || dev->device == PCI_DEVICE_ID_NCR_53C825) {
+			unsigned long io_port;
+			unsigned char ctest4;
+
+			io_port = dev->resource[0].start;
+			ctest4 = inb(io_port+0x21);
+			if (!(ctest4 & 0x80)) {
+				printk("AlphaBook1 NCR init: setting"
+				       " burst disable\n");
+				outb(ctest4 | 0x80, io_port+0x21);
+			}
+                }
+	}
+
+	/* Do not set *ANY* level triggers for AlphaBook1. */
+	sio_fixup_irq_levels(0);
+
+	/* Make sure that register PR1 indicates 1Mb mem */
+	outb(0x0f, 0x3ce); orig = inb(0x3cf);   /* read PR5  */
+	outb(0x0f, 0x3ce); outb(0x05, 0x3cf);   /* unlock PR0-4 */
+	outb(0x0b, 0x3ce); config = inb(0x3cf); /* read PR1 */
+	if ((config & 0xc0) != 0xc0) {
+		printk("AlphaBook1 VGA init: setting 1Mb memory\n");
+		config |= 0xc0;
+		outb(0x0b, 0x3ce); outb(config, 0x3cf); /* write PR1 */
+	}
+	outb(0x0f, 0x3ce); outb(orig, 0x3cf); /* (re)lock PR0-4 */
+}
+
+void
+sio_kill_arch(int mode)
+{
+#if defined(ALPHA_RESTORE_SRM_SETUP)
+	/* Since we cannot read the PCI DMA Window CSRs, we
+	 * cannot restore them here.
+	 *
+	 * However, we CAN read the PIRQ route register, so restore it
+	 * now...
+	 */
+ 	pci_bus_write_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60,
+				   saved_config.orig_route_tab);
+#endif
+}
+
+
+/*
+ * The System Vectors
+ */
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_BOOK1)
+struct alpha_machine_vector alphabook1_mv __initmv = {
+	.vector_name		= "AlphaBook1",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_LCA_IO,
+	.machine_check		= lca_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 16,
+	.device_interrupt	= isa_device_interrupt,
+
+	.init_arch		= alphabook1_init_arch,
+	.init_irq		= sio_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= alphabook1_init_pci,
+	.kill_arch		= sio_kill_arch,
+	.pci_map_irq		= noname_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .sio = {
+		/* NCR810 SCSI is 14, PCMCIA controller is 15.  */
+		.route_tab	= 0x0e0f0a0a,
+	}}
+};
+ALIAS_MV(alphabook1)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_AVANTI)
+struct alpha_machine_vector avanti_mv __initmv = {
+	.vector_name		= "Avanti",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_APECS_IO,
+	.machine_check		= apecs_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 16,
+	.device_interrupt	= isa_device_interrupt,
+
+	.init_arch		= apecs_init_arch,
+	.init_irq		= sio_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= noname_init_pci,
+	.kill_arch		= sio_kill_arch,
+	.pci_map_irq		= noname_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .sio = {
+		.route_tab	= 0x0b0a050f, /* leave 14 for IDE, 9 for SND */
+	}}
+};
+ALIAS_MV(avanti)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_NONAME)
+struct alpha_machine_vector noname_mv __initmv = {
+	.vector_name		= "Noname",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_LCA_IO,
+	.machine_check		= lca_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 16,
+	.device_interrupt	= srm_device_interrupt,
+
+	.init_arch		= lca_init_arch,
+	.init_irq		= sio_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= noname_init_pci,
+	.kill_arch		= sio_kill_arch,
+	.pci_map_irq		= noname_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .sio = {
+		/* For UDB, the only available PCI slot must not map to IRQ 9,
+		   since that's the builtin MSS sound chip. That PCI slot
+		   will map to PIRQ1 (for INTA at least), so we give it IRQ 15
+		   instead.
+
+		   Unfortunately we have to do this for NONAME as well, since
+		   they are co-indicated when the platform type "Noname" is
+		   selected... :-(  */
+
+		.route_tab	= 0x0b0a0f0d,
+	}}
+};
+ALIAS_MV(noname)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_P2K)
+struct alpha_machine_vector p2k_mv __initmv = {
+	.vector_name		= "Platform2000",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_LCA_IO,
+	.machine_check		= lca_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= APECS_AND_LCA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 16,
+	.device_interrupt	= srm_device_interrupt,
+
+	.init_arch		= lca_init_arch,
+	.init_irq		= sio_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= noname_init_pci,
+	.kill_arch		= sio_kill_arch,
+	.pci_map_irq		= p2k_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .sio = {
+		.route_tab	= 0x0b0a090f,
+	}}
+};
+ALIAS_MV(p2k)
+#endif
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_XL)
+struct alpha_machine_vector xl_mv __initmv = {
+	.vector_name		= "XL",
+	DO_EV4_MMU,
+	DO_DEFAULT_RTC,
+	DO_APECS_IO,
+	.machine_check		= apecs_machine_check,
+	.max_isa_dma_address	= ALPHA_XL_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= XL_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 16,
+	.device_interrupt	= isa_device_interrupt,
+
+	.init_arch		= apecs_init_arch,
+	.init_irq		= sio_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= noname_init_pci,
+	.kill_arch		= sio_kill_arch,
+	.pci_map_irq		= noname_map_irq,
+	.pci_swizzle		= common_swizzle,
+
+	.sys = { .sio = {
+		.route_tab	= 0x0b0a090f,
+	}}
+};
+ALIAS_MV(xl)
+#endif
diff --git a/arch/alpha/kernel/sys_sx164.c b/arch/alpha/kernel/sys_sx164.c
new file mode 100644
index 0000000000..dd9de84b63
--- /dev/null
+++ b/arch/alpha/kernel/sys_sx164.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_sx164.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999, 2000 Richard Henderson
+ *
+ * Code supporting the SX164 (PCA56+PYXIS).
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_cia.h>
+#include <asm/hwrpb.h>
+#include <asm/tlbflush.h>
+#include <asm/special_insns.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+
+static void __init
+sx164_init_irq(void)
+{
+	outb(0, DMA1_RESET_REG);
+	outb(0, DMA2_RESET_REG);
+	outb(DMA_MODE_CASCADE, DMA2_MODE_REG);
+	outb(0, DMA2_MASK_REG);
+
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = srm_device_interrupt;
+
+	init_i8259a_irqs();
+
+	/* Not interested in the bogus interrupts (0,3,4,5,40-47),
+	   NMI (1), or HALT (2).  */
+	if (alpha_using_srm)
+		init_srm_irqs(40, 0x3f0000);
+	else
+		init_pyxis_irqs(0xff00003f0000UL);
+
+	if (request_irq(16 + 6, no_action, 0, "timer-cascade", NULL))
+		pr_err("Failed to register timer-cascade interrupt\n");
+}
+
+/*
+ * PCI Fixup configuration.
+ *
+ * Summary @ PYXIS_INT_REQ:
+ * Bit      Meaning
+ * 0        RSVD
+ * 1        NMI
+ * 2        Halt/Reset switch
+ * 3        MBZ
+ * 4        RAZ
+ * 5        RAZ
+ * 6        Interval timer (RTC)
+ * 7        PCI-ISA Bridge
+ * 8        Interrupt Line A from slot 3
+ * 9        Interrupt Line A from slot 2
+ *10        Interrupt Line A from slot 1
+ *11        Interrupt Line A from slot 0
+ *12        Interrupt Line B from slot 3
+ *13        Interrupt Line B from slot 2
+ *14        Interrupt Line B from slot 1
+ *15        Interrupt line B from slot 0
+ *16        Interrupt Line C from slot 3
+ *17        Interrupt Line C from slot 2
+ *18        Interrupt Line C from slot 1
+ *19        Interrupt Line C from slot 0
+ *20        Interrupt Line D from slot 3
+ *21        Interrupt Line D from slot 2
+ *22        Interrupt Line D from slot 1
+ *23        Interrupt Line D from slot 0
+ *
+ * IdSel       
+ *   5  32 bit PCI option slot 2
+ *   6  64 bit PCI option slot 0
+ *   7  64 bit PCI option slot 1
+ *   8  Cypress I/O
+ *   9  32 bit PCI option slot 3
+ */
+
+static int
+sx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[5][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{ 16+ 9, 16+ 9, 16+13, 16+17, 16+21}, /* IdSel 5 slot 2 J17 */
+		{ 16+11, 16+11, 16+15, 16+19, 16+23}, /* IdSel 6 slot 0 J19 */
+		{ 16+10, 16+10, 16+14, 16+18, 16+22}, /* IdSel 7 slot 1 J18 */
+		{    -1,    -1,    -1,	  -1,    -1}, /* IdSel 8 SIO        */
+		{ 16+ 8, 16+ 8, 16+12, 16+16, 16+20}  /* IdSel 9 slot 3 J15 */
+	};
+	const long min_idsel = 5, max_idsel = 9, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static void __init
+sx164_init_pci(void)
+{
+	cia_init_pci();
+	SMC669_Init(0);
+}
+
+static void __init
+sx164_init_arch(void)
+{
+	/*
+	 * OSF palcode v1.23 forgets to enable PCA56 Motion Video
+	 * Instructions. Let's enable it.
+	 * We have to check palcode revision because CSERVE interface
+	 * is subject to change without notice. For example, it
+	 * has been changed completely since v1.16 (found in MILO
+	 * distribution). -ink
+	 */
+	struct percpu_struct *cpu = (struct percpu_struct*)
+		((char*)hwrpb + hwrpb->processor_offset);
+
+	if (amask(AMASK_MAX) != 0
+	    && alpha_using_srm
+	    && (cpu->pal_revision & 0xffff) <= 0x117) {
+		__asm__ __volatile__(
+		"lda	$16,8($31)\n"
+		"call_pal 9\n"		/* Allow PALRES insns in kernel mode */
+		".long  0x64000118\n\n"	/* hw_mfpr $0,icsr */
+		"ldah	$16,(1<<(19-16))($31)\n"
+		"or	$0,$16,$0\n"	/* set MVE bit */
+		".long  0x74000118\n"	/* hw_mtpr $0,icsr */
+		"lda	$16,9($31)\n"
+		"call_pal 9"		/* Disable PALRES insns */
+		: : : "$0", "$16");
+		printk("PCA56 MVI set enabled\n");
+	}
+
+	pyxis_init_arch();
+}
+
+/*
+ * The System Vector
+ */
+
+struct alpha_machine_vector sx164_mv __initmv = {
+	.vector_name		= "SX164",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_PYXIS_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= PYXIS_DAC_OFFSET,
+
+	.nr_irqs		= 48,
+	.device_interrupt	= pyxis_device_interrupt,
+
+	.init_arch		= sx164_init_arch,
+	.init_irq		= sx164_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= sx164_init_pci,
+	.kill_arch		= cia_kill_arch,
+	.pci_map_irq		= sx164_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(sx164)
diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c
new file mode 100644
index 0000000000..9e2adb69bc
--- /dev/null
+++ b/arch/alpha/kernel/sys_takara.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_takara.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *
+ * Code supporting the TAKARA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_cia.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+#include "pc873xx.h"
+
+/* Note mask bit is true for DISABLED irqs.  */
+static unsigned long cached_irq_mask[2] = { -1, -1 };
+
+static inline void
+takara_update_irq_hw(unsigned long irq, unsigned long mask)
+{
+	int regaddr;
+
+	mask = (irq >= 64 ? mask << 16 : mask >> ((irq - 16) & 0x30));
+	regaddr = 0x510 + (((irq - 16) >> 2) & 0x0c);
+	outl(mask & 0xffff0000UL, regaddr);
+}
+
+static inline void
+takara_enable_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+	unsigned long mask;
+	mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63)));
+	takara_update_irq_hw(irq, mask);
+}
+
+static void
+takara_disable_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+	unsigned long mask;
+	mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63));
+	takara_update_irq_hw(irq, mask);
+}
+
+static struct irq_chip takara_irq_type = {
+	.name		= "TAKARA",
+	.irq_unmask	= takara_enable_irq,
+	.irq_mask	= takara_disable_irq,
+	.irq_mask_ack	= takara_disable_irq,
+};
+
+static void
+takara_device_interrupt(unsigned long vector)
+{
+	unsigned intstatus;
+
+	/*
+	 * The PALcode will have passed us vectors 0x800 or 0x810,
+	 * which are fairly arbitrary values and serve only to tell
+	 * us whether an interrupt has come in on IRQ0 or IRQ1. If
+	 * it's IRQ1 it's a PCI interrupt; if it's IRQ0, it's
+	 * probably ISA, but PCI interrupts can come through IRQ0
+	 * as well if the interrupt controller isn't in accelerated
+	 * mode.
+	 *
+	 * OTOH, the accelerator thing doesn't seem to be working
+	 * overly well, so what we'll do instead is try directly
+	 * examining the Master Interrupt Register to see if it's a
+	 * PCI interrupt, and if _not_ then we'll pass it on to the
+	 * ISA handler.
+	 */
+
+	intstatus = inw(0x500) & 15;
+	if (intstatus) {
+		/*
+		 * This is a PCI interrupt. Check each bit and
+		 * despatch an interrupt if it's set.
+		 */
+
+		if (intstatus & 8) handle_irq(16+3);
+		if (intstatus & 4) handle_irq(16+2);
+		if (intstatus & 2) handle_irq(16+1);
+		if (intstatus & 1) handle_irq(16+0);
+	} else {
+		isa_device_interrupt (vector);
+	}
+}
+
+static void 
+takara_srm_device_interrupt(unsigned long vector)
+{
+	int irq = (vector - 0x800) >> 4;
+	handle_irq(irq);
+}
+
+static void __init
+takara_init_irq(void)
+{
+	long i;
+
+	init_i8259a_irqs();
+
+	if (alpha_using_srm) {
+		alpha_mv.device_interrupt = takara_srm_device_interrupt;
+	} else {
+		unsigned int ctlreg = inl(0x500);
+
+		/* Return to non-accelerated mode.  */
+		ctlreg &= ~0x8000;
+		outl(ctlreg, 0x500);
+
+		/* Enable the PCI interrupt register.  */
+		ctlreg = 0x05107c00;
+		outl(ctlreg, 0x500);
+	}
+
+	for (i = 16; i < 128; i += 16)
+		takara_update_irq_hw(i, -1);
+
+	for (i = 16; i < 128; ++i) {
+		irq_set_chip_and_handler(i, &takara_irq_type,
+					 handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+
+	common_init_isa_dma();
+}
+
+
+/*
+ * The Takara has PCI devices 1, 2, and 3 configured to slots 20,
+ * 19, and 18 respectively, in the default configuration. They can
+ * also be jumpered to slots 8, 7, and 6 respectively, which is fun
+ * because the SIO ISA bridge can also be slot 7. However, the SIO
+ * doesn't explicitly generate PCI-type interrupts, so we can
+ * assign it whatever the hell IRQ we like and it doesn't matter.
+ */
+
+static int
+takara_map_irq_srm(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[15][5] = {
+		{ 16+3, 16+3, 16+3, 16+3, 16+3},   /* slot  6 == device 3 */
+		{ 16+2, 16+2, 16+2, 16+2, 16+2},   /* slot  7 == device 2 */
+		{ 16+1, 16+1, 16+1, 16+1, 16+1},   /* slot  8 == device 1 */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot  9 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 10 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 11 == nothing */
+		/* These are behind the bridges.  */
+		{   12,   12,   13,   14,   15},   /* slot 12 == nothing */
+		{    8,    8,    9,   19,   11},   /* slot 13 == nothing */
+		{    4,    4,    5,    6,    7},   /* slot 14 == nothing */
+		{    0,    0,    1,    2,    3},   /* slot 15 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 16 == nothing */
+		{64+ 0, 64+0, 64+1, 64+2, 64+3},   /* slot 17= device 4 */
+		{48+ 0, 48+0, 48+1, 48+2, 48+3},   /* slot 18= device 3 */
+		{32+ 0, 32+0, 32+1, 32+2, 32+3},   /* slot 19= device 2 */
+		{16+ 0, 16+0, 16+1, 16+2, 16+3},   /* slot 20= device 1 */
+	};
+	const long min_idsel = 6, max_idsel = 20, irqs_per_slot = 5;
+        int irq = COMMON_TABLE_LOOKUP;
+	if (irq >= 0 && irq < 16) {
+		/* Guess that we are behind a bridge.  */
+		unsigned int busslot = PCI_SLOT(dev->bus->self->devfn);
+		irq += irq_tab[busslot-min_idsel][0];
+	}
+	return irq;
+}
+
+static int __init
+takara_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[15][5] __initdata = {
+		{ 16+3, 16+3, 16+3, 16+3, 16+3},   /* slot  6 == device 3 */
+		{ 16+2, 16+2, 16+2, 16+2, 16+2},   /* slot  7 == device 2 */
+		{ 16+1, 16+1, 16+1, 16+1, 16+1},   /* slot  8 == device 1 */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot  9 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 10 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 11 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 12 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 13 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 14 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 15 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 16 == nothing */
+		{   -1,   -1,   -1,   -1,   -1},   /* slot 17 == nothing */
+		{ 16+3, 16+3, 16+3, 16+3, 16+3},   /* slot 18 == device 3 */
+		{ 16+2, 16+2, 16+2, 16+2, 16+2},   /* slot 19 == device 2 */
+		{ 16+1, 16+1, 16+1, 16+1, 16+1},   /* slot 20 == device 1 */
+	};
+	const long min_idsel = 6, max_idsel = 20, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static u8
+takara_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	int slot = PCI_SLOT(dev->devfn);
+	int pin = *pinp;
+	unsigned int ctlreg = inl(0x500);
+	unsigned int busslot;
+
+	if (!dev->bus->self)
+		return slot;
+
+	busslot = PCI_SLOT(dev->bus->self->devfn);
+	/* Check for built-in bridges.  */
+	if (dev->bus->number != 0
+	    && busslot > 16
+	    && ((1<<(36-busslot)) & ctlreg)) {
+		if (pin == 1)
+			pin += (20 - busslot);
+		else {
+			printk(KERN_WARNING "takara_swizzle: can only "
+			       "handle cards with INTA IRQ pin.\n");
+		}
+	} else {
+		/* Must be a card-based bridge.  */
+		printk(KERN_WARNING "takara_swizzle: cannot handle "
+		       "card-bridge behind builtin bridge yet.\n");
+	}
+
+	*pinp = pin;
+	return slot;
+}
+
+static void __init
+takara_init_pci(void)
+{
+	if (alpha_using_srm)
+		alpha_mv.pci_map_irq = takara_map_irq_srm;
+
+	cia_init_pci();
+
+	if (pc873xx_probe() == -1) {
+		printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n");
+	} else {
+		printk(KERN_INFO "Found %s Super IO chip at 0x%x\n",
+			pc873xx_get_model(), pc873xx_get_base());
+		pc873xx_enable_ide();
+	}
+}
+
+
+/*
+ * The System Vector
+ */
+
+struct alpha_machine_vector takara_mv __initmv = {
+	.vector_name		= "Takara",
+	DO_EV5_MMU,
+	DO_DEFAULT_RTC,
+	DO_CIA_IO,
+	.machine_check		= cia_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= CIA_DEFAULT_MEM_BASE,
+
+	.nr_irqs		= 128,
+	.device_interrupt	= takara_device_interrupt,
+
+	.init_arch		= cia_init_arch,
+	.init_irq		= takara_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= takara_init_pci,
+	.kill_arch		= cia_kill_arch,
+	.pci_map_irq		= takara_map_irq,
+	.pci_swizzle		= takara_swizzle,
+};
+ALIAS_MV(takara)
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
new file mode 100644
index 0000000000..b1f3b4fcf9
--- /dev/null
+++ b/arch/alpha/kernel/sys_titan.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	linux/arch/alpha/kernel/sys_titan.c
+ *
+ *	Copyright (C) 1995 David A Rusling
+ *	Copyright (C) 1996, 1999 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
+ *      Copyright (C) 1999, 2000 Jeff Wiedemeier
+ *
+ * Code supporting TITAN systems (EV6+TITAN), currently:
+ *      Privateer
+ *	Falcon
+ *	Granite
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_titan.h>
+#include <asm/hwrpb.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+#include "err_impl.h"
+
+
+/*
+ * Titan generic
+ */
+
+/*
+ * Titan supports up to 4 CPUs
+ */
+static unsigned long titan_cpu_irq_affinity[4] = { ~0UL, ~0UL, ~0UL, ~0UL };
+
+/*
+ * Mask is set (1) if enabled
+ */
+static unsigned long titan_cached_irq_mask;
+
+/*
+ * Need SMP-safe access to interrupt CSRs
+ */
+DEFINE_SPINLOCK(titan_irq_lock);
+
+static void
+titan_update_irq_hw(unsigned long mask)
+{
+	register titan_cchip *cchip = TITAN_cchip;
+	unsigned long isa_enable = 1UL << 55;
+	register int bcpu = boot_cpuid;
+
+#ifdef CONFIG_SMP
+	cpumask_t cpm;
+	volatile unsigned long *dim0, *dim1, *dim2, *dim3;
+	unsigned long mask0, mask1, mask2, mask3, dummy;
+
+	cpumask_copy(&cpm, cpu_present_mask);
+	mask &= ~isa_enable;
+	mask0 = mask & titan_cpu_irq_affinity[0];
+	mask1 = mask & titan_cpu_irq_affinity[1];
+	mask2 = mask & titan_cpu_irq_affinity[2];
+	mask3 = mask & titan_cpu_irq_affinity[3];
+
+	if (bcpu == 0) mask0 |= isa_enable;
+	else if (bcpu == 1) mask1 |= isa_enable;
+	else if (bcpu == 2) mask2 |= isa_enable;
+	else mask3 |= isa_enable;
+
+	dim0 = &cchip->dim0.csr;
+	dim1 = &cchip->dim1.csr;
+	dim2 = &cchip->dim2.csr;
+	dim3 = &cchip->dim3.csr;
+	if (!cpumask_test_cpu(0, &cpm)) dim0 = &dummy;
+	if (!cpumask_test_cpu(1, &cpm)) dim1 = &dummy;
+	if (!cpumask_test_cpu(2, &cpm)) dim2 = &dummy;
+	if (!cpumask_test_cpu(3, &cpm)) dim3 = &dummy;
+
+	*dim0 = mask0;
+	*dim1 = mask1;
+	*dim2 = mask2;
+	*dim3 = mask3;
+	mb();
+	*dim0;
+	*dim1;
+	*dim2;
+	*dim3;
+#else
+	volatile unsigned long *dimB;
+	dimB = &cchip->dim0.csr;
+	if (bcpu == 1) dimB = &cchip->dim1.csr;
+	else if (bcpu == 2) dimB = &cchip->dim2.csr;
+	else if (bcpu == 3) dimB = &cchip->dim3.csr;
+
+	*dimB = mask | isa_enable;
+	mb();
+	*dimB;
+#endif
+}
+
+static inline void
+titan_enable_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+	spin_lock(&titan_irq_lock);
+	titan_cached_irq_mask |= 1UL << (irq - 16);
+	titan_update_irq_hw(titan_cached_irq_mask);
+	spin_unlock(&titan_irq_lock);
+}
+
+static inline void
+titan_disable_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+	spin_lock(&titan_irq_lock);
+	titan_cached_irq_mask &= ~(1UL << (irq - 16));
+	titan_update_irq_hw(titan_cached_irq_mask);
+	spin_unlock(&titan_irq_lock);
+}
+
+static void
+titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < 4; cpu++) {
+		if (cpumask_test_cpu(cpu, &affinity))
+			titan_cpu_irq_affinity[cpu] |= 1UL << irq;
+		else
+			titan_cpu_irq_affinity[cpu] &= ~(1UL << irq);
+	}
+
+}
+
+static int
+titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
+		       bool force)
+{ 
+	unsigned int irq = d->irq;
+	spin_lock(&titan_irq_lock);
+	titan_cpu_set_irq_affinity(irq - 16, *affinity);
+	titan_update_irq_hw(titan_cached_irq_mask);
+	spin_unlock(&titan_irq_lock);
+
+	return 0;
+}
+
+static void
+titan_device_interrupt(unsigned long vector)
+{
+	printk("titan_device_interrupt: NOT IMPLEMENTED YET!!\n");
+}
+
+static void 
+titan_srm_device_interrupt(unsigned long vector)
+{
+	int irq;
+
+	irq = (vector - 0x800) >> 4;
+	handle_irq(irq);
+}
+
+
+static void __init
+init_titan_irqs(struct irq_chip * ops, int imin, int imax)
+{
+	long i;
+	for (i = imin; i <= imax; ++i) {
+		irq_set_chip_and_handler(i, ops, handle_level_irq);
+		irq_set_status_flags(i, IRQ_LEVEL);
+	}
+}
+
+static struct irq_chip titan_irq_type = {
+       .name			= "TITAN",
+       .irq_unmask		= titan_enable_irq,
+       .irq_mask		= titan_disable_irq,
+       .irq_mask_ack		= titan_disable_irq,
+       .irq_set_affinity	= titan_set_irq_affinity,
+};
+
+static irqreturn_t
+titan_intr_nop(int irq, void *dev_id)
+{
+      /*
+       * This is a NOP interrupt handler for the purposes of
+       * event counting -- just return.
+       */                                                                     
+       return IRQ_HANDLED;
+}
+
+static void __init
+titan_init_irq(void)
+{
+	if (alpha_using_srm && !alpha_mv.device_interrupt)
+		alpha_mv.device_interrupt = titan_srm_device_interrupt;
+	if (!alpha_mv.device_interrupt)
+		alpha_mv.device_interrupt = titan_device_interrupt;
+
+	titan_update_irq_hw(0);
+
+	init_titan_irqs(&titan_irq_type, 16, 63 + 16);
+}
+  
+static void __init
+titan_legacy_init_irq(void)
+{
+	/* init the legacy dma controller */
+	outb(0, DMA1_RESET_REG);
+	outb(0, DMA2_RESET_REG);
+	outb(DMA_MODE_CASCADE, DMA2_MODE_REG);
+	outb(0, DMA2_MASK_REG);
+
+	/* init the legacy irq controller */
+	init_i8259a_irqs();
+
+	/* init the titan irqs */
+	titan_init_irq();
+}
+
+void
+titan_dispatch_irqs(u64 mask)
+{
+	unsigned long vector;
+
+	/*
+	 * Mask down to those interrupts which are enable on this processor
+	 */
+	mask &= titan_cpu_irq_affinity[smp_processor_id()];
+
+	/*
+	 * Dispatch all requested interrupts 
+	 */
+	while (mask) {
+		/* convert to SRM vector... priority is <63> -> <0> */
+		vector = 63 - __kernel_ctlz(mask);
+		mask &= ~(1UL << vector);	/* clear it out 	 */
+		vector = 0x900 + (vector << 4);	/* convert to SRM vector */
+		
+		/* dispatch it */
+		alpha_mv.device_interrupt(vector);
+	}
+}
+  
+
+/*
+ * Titan Family
+ */
+static void __init
+titan_request_irq(unsigned int irq, irq_handler_t handler,
+		  unsigned long irqflags, const char *devname,
+		  void *dev_id)
+{
+	int err;
+	err = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (err) {
+		printk("titan_request_irq for IRQ %d returned %d; ignoring\n",
+		       irq, err);
+	}
+}
+
+static void __init
+titan_late_init(void)
+{
+	/*
+	 * Enable the system error interrupts. These interrupts are 
+	 * all reported to the kernel as machine checks, so the handler
+	 * is a nop so it can be called to count the individual events.
+	 */
+	titan_request_irq(63+16, titan_intr_nop, 0,
+		    "CChip Error", NULL);
+	titan_request_irq(62+16, titan_intr_nop, 0,
+		    "PChip 0 H_Error", NULL);
+	titan_request_irq(61+16, titan_intr_nop, 0,
+		    "PChip 1 H_Error", NULL);
+	titan_request_irq(60+16, titan_intr_nop, 0,
+		    "PChip 0 C_Error", NULL);
+	titan_request_irq(59+16, titan_intr_nop, 0,
+		    "PChip 1 C_Error", NULL);
+
+	/* 
+	 * Register our error handlers.
+	 */
+	titan_register_error_handlers();
+
+	/*
+	 * Check if the console left us any error logs.
+	 */
+	cdl_check_console_data_log();
+
+}
+
+static int
+titan_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	u8 intline;
+	int irq;
+
+ 	/* Get the current intline.  */
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
+	irq = intline;
+
+ 	/* Is it explicitly routed through ISA?  */
+ 	if ((irq & 0xF0) == 0xE0)
+ 		return irq;
+ 
+ 	/* Offset by 16 to make room for ISA interrupts 0 - 15.  */
+ 	return irq + 16;
+}
+
+static void __init
+titan_init_pci(void)
+{
+ 	/*
+ 	 * This isn't really the right place, but there's some init
+ 	 * that needs to be done after everything is basically up.
+ 	 */
+ 	titan_late_init();
+ 
+	/* Indicate that we trust the console to configure things properly */
+	pci_set_flags(PCI_PROBE_ONLY);
+	common_init_pci();
+	SMC669_Init(0);
+	locate_and_init_vga(NULL);
+}
+
+
+/*
+ * Privateer
+ */
+static void __init
+privateer_init_pci(void)
+{
+	/*
+	 * Hook a couple of extra err interrupts that the
+	 * common titan code won't.
+	 */
+	titan_request_irq(53+16, titan_intr_nop, 0,
+		    "NMI", NULL);
+	titan_request_irq(50+16, titan_intr_nop, 0,
+		    "Temperature Warning", NULL);
+
+	/*
+	 * Finish with the common version.
+	 */
+	return titan_init_pci();
+}
+
+
+/*
+ * The System Vectors.
+ */
+struct alpha_machine_vector titan_mv __initmv = {
+	.vector_name		= "TITAN",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TITAN_IO,
+	.machine_check		= titan_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= TITAN_DAC_OFFSET,
+
+	.nr_irqs		= 80,	/* 64 + 16 */
+	/* device_interrupt will be filled in by titan_init_irq */
+
+	.agp_info		= titan_agp_info,
+
+	.init_arch		= titan_init_arch,
+	.init_irq		= titan_legacy_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= titan_init_pci,
+
+	.kill_arch		= titan_kill_arch,
+	.pci_map_irq		= titan_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(titan)
+
+struct alpha_machine_vector privateer_mv __initmv = {
+	.vector_name		= "PRIVATEER",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TITAN_IO,
+	.machine_check		= privateer_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+	.pci_dac_offset		= TITAN_DAC_OFFSET,
+
+	.nr_irqs		= 80,	/* 64 + 16 */
+	/* device_interrupt will be filled in by titan_init_irq */
+
+	.agp_info		= titan_agp_info,
+
+	.init_arch		= titan_init_arch,
+	.init_irq		= titan_legacy_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= privateer_init_pci,
+
+	.kill_arch		= titan_kill_arch,
+	.pci_map_irq		= titan_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+/* No alpha_mv alias for privateer since we compile it 
+   in unconditionally with titan; setup_arch knows how to cope. */
diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c
new file mode 100644
index 0000000000..3cee05443f
--- /dev/null
+++ b/arch/alpha/kernel/sys_wildfire.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/kernel/sys_wildfire.c
+ *
+ *  Wildfire support.
+ *
+ *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+
+#include <asm/ptrace.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/core_wildfire.h>
+#include <asm/hwrpb.h>
+#include <asm/tlbflush.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+#include "pci_impl.h"
+#include "machvec_impl.h"
+
+static unsigned long cached_irq_mask[WILDFIRE_NR_IRQS/(sizeof(long)*8)];
+
+DEFINE_SPINLOCK(wildfire_irq_lock);
+
+static int doing_init_irq_hw = 0;
+
+static void
+wildfire_update_irq_hw(unsigned int irq)
+{
+	int qbbno = (irq >> 8) & (WILDFIRE_MAX_QBB - 1);
+	int pcano = (irq >> 6) & (WILDFIRE_PCA_PER_QBB - 1);
+	wildfire_pca *pca;
+	volatile unsigned long * enable0;
+
+	if (!WILDFIRE_PCA_EXISTS(qbbno, pcano)) {
+		if (!doing_init_irq_hw) {
+			printk(KERN_ERR "wildfire_update_irq_hw:"
+			       " got irq %d for non-existent PCA %d"
+			       " on QBB %d.\n",
+			       irq, pcano, qbbno);
+		}
+		return;
+	}
+
+	pca = WILDFIRE_pca(qbbno, pcano);
+	enable0 = (unsigned long *) &pca->pca_int[0].enable; /* ??? */
+
+	*enable0 = cached_irq_mask[qbbno * WILDFIRE_PCA_PER_QBB + pcano];
+	mb();
+	*enable0;
+}
+
+static void __init
+wildfire_init_irq_hw(void)
+{
+#if 0
+	register wildfire_pca * pca = WILDFIRE_pca(0, 0);
+	volatile unsigned long * enable0, * enable1, * enable2, *enable3;
+	volatile unsigned long * target0, * target1, * target2, *target3;
+
+	enable0 = (unsigned long *) &pca->pca_int[0].enable;
+	enable1 = (unsigned long *) &pca->pca_int[1].enable;
+	enable2 = (unsigned long *) &pca->pca_int[2].enable;
+	enable3 = (unsigned long *) &pca->pca_int[3].enable;
+
+	target0 = (unsigned long *) &pca->pca_int[0].target;
+	target1 = (unsigned long *) &pca->pca_int[1].target;
+	target2 = (unsigned long *) &pca->pca_int[2].target;
+	target3 = (unsigned long *) &pca->pca_int[3].target;
+
+	*enable0 = *enable1 = *enable2 = *enable3 = 0;
+
+	*target0 = (1UL<<8) | WILDFIRE_QBB(0);
+	*target1 = *target2 = *target3 = 0;
+
+	mb();
+
+	*enable0; *enable1; *enable2; *enable3;
+	*target0; *target1; *target2; *target3;
+
+#else
+	int i;
+
+	doing_init_irq_hw = 1;
+
+	/* Need to update only once for every possible PCA. */
+	for (i = 0; i < WILDFIRE_NR_IRQS; i+=WILDFIRE_IRQ_PER_PCA)
+		wildfire_update_irq_hw(i);
+
+	doing_init_irq_hw = 0;
+#endif
+}
+
+static void
+wildfire_enable_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+
+	if (irq < 16)
+		i8259a_enable_irq(d);
+
+	spin_lock(&wildfire_irq_lock);
+	set_bit(irq, &cached_irq_mask);
+	wildfire_update_irq_hw(irq);
+	spin_unlock(&wildfire_irq_lock);
+}
+
+static void
+wildfire_disable_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+
+	if (irq < 16)
+		i8259a_disable_irq(d);
+
+	spin_lock(&wildfire_irq_lock);
+	clear_bit(irq, &cached_irq_mask);
+	wildfire_update_irq_hw(irq);
+	spin_unlock(&wildfire_irq_lock);
+}
+
+static void
+wildfire_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned int irq = d->irq;
+
+	if (irq < 16)
+		i8259a_mask_and_ack_irq(d);
+
+	spin_lock(&wildfire_irq_lock);
+	clear_bit(irq, &cached_irq_mask);
+	wildfire_update_irq_hw(irq);
+	spin_unlock(&wildfire_irq_lock);
+}
+
+static struct irq_chip wildfire_irq_type = {
+	.name		= "WILDFIRE",
+	.irq_unmask	= wildfire_enable_irq,
+	.irq_mask	= wildfire_disable_irq,
+	.irq_mask_ack	= wildfire_mask_and_ack_irq,
+};
+
+static void __init
+wildfire_init_irq_per_pca(int qbbno, int pcano)
+{
+	int i, irq_bias;
+
+	irq_bias = qbbno * (WILDFIRE_PCA_PER_QBB * WILDFIRE_IRQ_PER_PCA)
+		 + pcano * WILDFIRE_IRQ_PER_PCA;
+
+#if 0
+	unsigned long io_bias;
+
+	/* Only need the following for first PCI bus per PCA. */
+	io_bias = WILDFIRE_IO(qbbno, pcano<<1) - WILDFIRE_IO_BIAS;
+
+	outb(0, DMA1_RESET_REG + io_bias);
+	outb(0, DMA2_RESET_REG + io_bias);
+	outb(DMA_MODE_CASCADE, DMA2_MODE_REG + io_bias);
+	outb(0, DMA2_MASK_REG + io_bias);
+#endif
+
+#if 0
+	/* ??? Not sure how to do this, yet... */
+	init_i8259a_irqs(); /* ??? */
+#endif
+
+	for (i = 0; i < 16; ++i) {
+		if (i == 2)
+			continue;
+		irq_set_chip_and_handler(i + irq_bias, &wildfire_irq_type,
+					 handle_level_irq);
+		irq_set_status_flags(i + irq_bias, IRQ_LEVEL);
+	}
+
+	irq_set_chip_and_handler(36 + irq_bias, &wildfire_irq_type,
+				 handle_level_irq);
+	irq_set_status_flags(36 + irq_bias, IRQ_LEVEL);
+	for (i = 40; i < 64; ++i) {
+		irq_set_chip_and_handler(i + irq_bias, &wildfire_irq_type,
+					 handle_level_irq);
+		irq_set_status_flags(i + irq_bias, IRQ_LEVEL);
+	}
+
+	if (request_irq(32 + irq_bias, no_action, 0, "isa_enable", NULL))
+		pr_err("Failed to register isa_enable interrupt\n");
+}
+
+static void __init
+wildfire_init_irq(void)
+{
+	int qbbno, pcano;
+
+#if 1
+	wildfire_init_irq_hw();
+	init_i8259a_irqs();
+#endif
+
+	for (qbbno = 0; qbbno < WILDFIRE_MAX_QBB; qbbno++) {
+	  if (WILDFIRE_QBB_EXISTS(qbbno)) {
+	    for (pcano = 0; pcano < WILDFIRE_PCA_PER_QBB; pcano++) {
+	      if (WILDFIRE_PCA_EXISTS(qbbno, pcano)) {
+		wildfire_init_irq_per_pca(qbbno, pcano);
+	      }
+	    }
+	  }
+	}
+}
+
+static void 
+wildfire_device_interrupt(unsigned long vector)
+{
+	int irq;
+
+	irq = (vector - 0x800) >> 4;
+
+	/*
+	 * bits 10-8:	source QBB ID
+	 * bits 7-6:	PCA
+	 * bits 5-0:	irq in PCA
+	 */
+
+	handle_irq(irq);
+	return;
+}
+
+/*
+ * PCI Fixup configuration.
+ *
+ * Summary per PCA (2 PCI or HIPPI buses):
+ *
+ * Bit      Meaning
+ * 0-15     ISA
+ *
+ *32        ISA summary
+ *33        SMI
+ *34        NMI
+ *36        builtin QLogic SCSI (or slot 0 if no IO module)
+ *40        Interrupt Line A from slot 2 PCI0
+ *41        Interrupt Line B from slot 2 PCI0
+ *42        Interrupt Line C from slot 2 PCI0
+ *43        Interrupt Line D from slot 2 PCI0
+ *44        Interrupt Line A from slot 3 PCI0
+ *45        Interrupt Line B from slot 3 PCI0
+ *46        Interrupt Line C from slot 3 PCI0
+ *47        Interrupt Line D from slot 3 PCI0
+ *
+ *48        Interrupt Line A from slot 4 PCI1
+ *49        Interrupt Line B from slot 4 PCI1
+ *50        Interrupt Line C from slot 4 PCI1
+ *51        Interrupt Line D from slot 4 PCI1
+ *52        Interrupt Line A from slot 5 PCI1
+ *53        Interrupt Line B from slot 5 PCI1
+ *54        Interrupt Line C from slot 5 PCI1
+ *55        Interrupt Line D from slot 5 PCI1
+ *56        Interrupt Line A from slot 6 PCI1
+ *57        Interrupt Line B from slot 6 PCI1
+ *58        Interrupt Line C from slot 6 PCI1
+ *50        Interrupt Line D from slot 6 PCI1
+ *60        Interrupt Line A from slot 7 PCI1
+ *61        Interrupt Line B from slot 7 PCI1
+ *62        Interrupt Line C from slot 7 PCI1
+ *63        Interrupt Line D from slot 7 PCI1
+ * 
+ *
+ * IdSel	
+ *   0	 Cypress Bridge I/O (ISA summary interrupt)
+ *   1	 64 bit PCI 0 option slot 1 (SCSI QLogic builtin)
+ *   2	 64 bit PCI 0 option slot 2
+ *   3	 64 bit PCI 0 option slot 3
+ *   4	 64 bit PCI 1 option slot 4
+ *   5	 64 bit PCI 1 option slot 5
+ *   6	 64 bit PCI 1 option slot 6
+ *   7	 64 bit PCI 1 option slot 7
+ */
+
+static int
+wildfire_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	static char irq_tab[8][5] = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{ -1,    -1,    -1,    -1,    -1}, /* IdSel 0 ISA Bridge */
+		{ 36,    36,    36+1, 36+2, 36+3}, /* IdSel 1 SCSI builtin */
+		{ 40,    40,    40+1, 40+2, 40+3}, /* IdSel 2 PCI 0 slot 2 */
+		{ 44,    44,    44+1, 44+2, 44+3}, /* IdSel 3 PCI 0 slot 3 */
+		{ 48,    48,    48+1, 48+2, 48+3}, /* IdSel 4 PCI 1 slot 4 */
+		{ 52,    52,    52+1, 52+2, 52+3}, /* IdSel 5 PCI 1 slot 5 */
+		{ 56,    56,    56+1, 56+2, 56+3}, /* IdSel 6 PCI 1 slot 6 */
+		{ 60,    60,    60+1, 60+2, 60+3}, /* IdSel 7 PCI 1 slot 7 */
+	};
+	long min_idsel = 0, max_idsel = 7, irqs_per_slot = 5;
+
+	struct pci_controller *hose = dev->sysdata;
+	int irq = COMMON_TABLE_LOOKUP;
+
+	if (irq > 0) {
+		int qbbno = hose->index >> 3;
+		int pcano = (hose->index >> 1) & 3;
+		irq += (qbbno << 8) + (pcano << 6);
+	}
+	return irq;
+}
+
+
+/*
+ * The System Vectors
+ */
+
+struct alpha_machine_vector wildfire_mv __initmv = {
+	.vector_name		= "WILDFIRE",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_WILDFIRE_IO,
+	.machine_check		= wildfire_machine_check,
+	.max_isa_dma_address	= ALPHA_MAX_ISA_DMA_ADDRESS,
+	.min_io_address		= DEFAULT_IO_BASE,
+	.min_mem_address	= DEFAULT_MEM_BASE,
+
+	.nr_irqs		= WILDFIRE_NR_IRQS,
+	.device_interrupt	= wildfire_device_interrupt,
+
+	.init_arch		= wildfire_init_arch,
+	.init_irq		= wildfire_init_irq,
+	.init_rtc		= common_init_rtc,
+	.init_pci		= common_init_pci,
+	.kill_arch		= wildfire_kill_arch,
+	.pci_map_irq		= wildfire_map_irq,
+	.pci_swizzle		= common_swizzle,
+};
+ALIAS_MV(wildfire)
diff --git a/arch/alpha/kernel/syscalls/Makefile b/arch/alpha/kernel/syscalls/Makefile
new file mode 100644
index 0000000000..6713c65a25
--- /dev/null
+++ b/arch/alpha/kernel/syscalls/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+kapi := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
+
+_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')	\
+	  $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+syscall := $(src)/syscall.tbl
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR  $@
+      cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr $< $@
+
+quiet_cmd_systbl = SYSTBL  $@
+      cmd_systbl = $(CONFIG_SHELL) $(systbl) $< $@
+
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
+	$(call if_changed,syshdr)
+
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
+	$(call if_changed,systbl)
+
+uapisyshdr-y		+= unistd_32.h
+kapisyshdr-y		+= syscall_table.h
+
+uapisyshdr-y	:= $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y	:= $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets		+= $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
+
+PHONY += all
+all: $(uapisyshdr-y) $(kapisyshdr-y)
+	@:
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
new file mode 100644
index 0000000000..3515bc4f16
--- /dev/null
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -0,0 +1,492 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for alpha
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The <abi> is always "common" for this file
+#
+0	common	osf_syscall			alpha_syscall_zero
+1	common	exit				sys_exit
+2	common	fork				alpha_fork
+3	common	read				sys_read
+4	common	write				sys_write
+5	common	osf_old_open			sys_ni_syscall
+6	common	close				sys_close
+7	common	osf_wait4			sys_osf_wait4
+8	common	osf_old_creat			sys_ni_syscall
+9	common	link				sys_link
+10	common	unlink				sys_unlink
+11	common	osf_execve			sys_ni_syscall
+12	common	chdir				sys_chdir
+13	common	fchdir				sys_fchdir
+14	common	mknod				sys_mknod
+15	common	chmod				sys_chmod
+16	common	chown				sys_chown
+17	common	brk				sys_osf_brk
+18	common	osf_getfsstat			sys_ni_syscall
+19	common	lseek				sys_lseek
+20	common	getxpid				sys_getxpid
+21	common	osf_mount			sys_osf_mount
+22	common	umount2				sys_umount
+23	common	setuid				sys_setuid
+24	common	getxuid				sys_getxuid
+25	common	exec_with_loader		sys_ni_syscall
+26	common	ptrace				sys_ptrace
+27	common	osf_nrecvmsg			sys_ni_syscall
+28	common	osf_nsendmsg			sys_ni_syscall
+29	common	osf_nrecvfrom			sys_ni_syscall
+30	common	osf_naccept			sys_ni_syscall
+31	common	osf_ngetpeername		sys_ni_syscall
+32	common	osf_ngetsockname		sys_ni_syscall
+33	common	access				sys_access
+34	common	osf_chflags			sys_ni_syscall
+35	common	osf_fchflags			sys_ni_syscall
+36	common	sync				sys_sync
+37	common	kill				sys_kill
+38	common	osf_old_stat			sys_ni_syscall
+39	common	setpgid				sys_setpgid
+40	common	osf_old_lstat			sys_ni_syscall
+41	common	dup				sys_dup
+42	common	pipe				sys_alpha_pipe
+43	common	osf_set_program_attributes	sys_osf_set_program_attributes
+44	common	osf_profil			sys_ni_syscall
+45	common	open				sys_open
+46	common	osf_old_sigaction		sys_ni_syscall
+47	common	getxgid				sys_getxgid
+48	common	osf_sigprocmask			sys_osf_sigprocmask
+49	common	osf_getlogin			sys_ni_syscall
+50	common	osf_setlogin			sys_ni_syscall
+51	common	acct				sys_acct
+52	common	sigpending			sys_sigpending
+54	common	ioctl				sys_ioctl
+55	common	osf_reboot			sys_ni_syscall
+56	common	osf_revoke			sys_ni_syscall
+57	common	symlink				sys_symlink
+58	common	readlink			sys_readlink
+59	common	execve				sys_execve
+60	common	umask				sys_umask
+61	common	chroot				sys_chroot
+62	common	osf_old_fstat			sys_ni_syscall
+63	common	getpgrp				sys_getpgrp
+64	common	getpagesize			sys_getpagesize
+65	common	osf_mremap			sys_ni_syscall
+66	common	vfork				alpha_vfork
+67	common	stat				sys_newstat
+68	common	lstat				sys_newlstat
+69	common	osf_sbrk			sys_ni_syscall
+70	common	osf_sstk			sys_ni_syscall
+71	common	mmap				sys_osf_mmap
+72	common	osf_old_vadvise			sys_ni_syscall
+73	common	munmap				sys_munmap
+74	common	mprotect			sys_mprotect
+75	common	madvise				sys_madvise
+76	common	vhangup				sys_vhangup
+77	common	osf_kmodcall			sys_ni_syscall
+78	common	osf_mincore			sys_ni_syscall
+79	common	getgroups			sys_getgroups
+80	common	setgroups			sys_setgroups
+81	common	osf_old_getpgrp			sys_ni_syscall
+82	common	setpgrp				sys_setpgid
+83	common	osf_setitimer			compat_sys_setitimer
+84	common	osf_old_wait			sys_ni_syscall
+85	common	osf_table			sys_ni_syscall
+86	common	osf_getitimer			compat_sys_getitimer
+87	common	gethostname			sys_gethostname
+88	common	sethostname			sys_sethostname
+89	common	getdtablesize			sys_getdtablesize
+90	common	dup2				sys_dup2
+91	common	fstat				sys_newfstat
+92	common	fcntl				sys_fcntl
+93	common	osf_select			sys_osf_select
+94	common	poll				sys_poll
+95	common	fsync				sys_fsync
+96	common	setpriority			sys_setpriority
+97	common	socket				sys_socket
+98	common	connect				sys_connect
+99	common	accept				sys_accept
+100	common	getpriority			sys_osf_getpriority
+101	common	send				sys_send
+102	common	recv				sys_recv
+103	common	sigreturn			sys_sigreturn
+104	common	bind				sys_bind
+105	common	setsockopt			sys_setsockopt
+106	common	listen				sys_listen
+107	common	osf_plock			sys_ni_syscall
+108	common	osf_old_sigvec			sys_ni_syscall
+109	common	osf_old_sigblock		sys_ni_syscall
+110	common	osf_old_sigsetmask		sys_ni_syscall
+111	common	sigsuspend			sys_sigsuspend
+112	common	osf_sigstack			sys_osf_sigstack
+113	common	recvmsg				sys_recvmsg
+114	common	sendmsg				sys_sendmsg
+115	common	osf_old_vtrace			sys_ni_syscall
+116	common	osf_gettimeofday		sys_osf_gettimeofday
+117	common	osf_getrusage			sys_osf_getrusage
+118	common	getsockopt			sys_getsockopt
+120	common	readv				sys_osf_readv
+121	common	writev				sys_osf_writev
+122	common	osf_settimeofday		sys_osf_settimeofday
+123	common	fchown				sys_fchown
+124	common	fchmod				sys_fchmod
+125	common	recvfrom			sys_recvfrom
+126	common	setreuid			sys_setreuid
+127	common	setregid			sys_setregid
+128	common	rename				sys_rename
+129	common	truncate			sys_truncate
+130	common	ftruncate			sys_ftruncate
+131	common	flock				sys_flock
+132	common	setgid				sys_setgid
+133	common	sendto				sys_sendto
+134	common	shutdown			sys_shutdown
+135	common	socketpair			sys_socketpair
+136	common	mkdir				sys_mkdir
+137	common	rmdir				sys_rmdir
+138	common	osf_utimes			sys_osf_utimes
+139	common	osf_old_sigreturn		sys_ni_syscall
+140	common	osf_adjtime			sys_ni_syscall
+141	common	getpeername			sys_getpeername
+142	common	osf_gethostid			sys_ni_syscall
+143	common	osf_sethostid			sys_ni_syscall
+144	common	getrlimit			sys_getrlimit
+145	common	setrlimit			sys_setrlimit
+146	common	osf_old_killpg			sys_ni_syscall
+147	common	setsid				sys_setsid
+148	common	quotactl			sys_quotactl
+149	common	osf_oldquota			sys_ni_syscall
+150	common	getsockname			sys_getsockname
+153	common	osf_pid_block			sys_ni_syscall
+154	common	osf_pid_unblock			sys_ni_syscall
+156	common	sigaction			sys_osf_sigaction
+157	common	osf_sigwaitprim			sys_ni_syscall
+158	common	osf_nfssvc			sys_ni_syscall
+159	common	osf_getdirentries		sys_osf_getdirentries
+160	common	osf_statfs			sys_osf_statfs
+161	common	osf_fstatfs			sys_osf_fstatfs
+163	common	osf_asynch_daemon		sys_ni_syscall
+164	common	osf_getfh			sys_ni_syscall
+165	common	osf_getdomainname		sys_osf_getdomainname
+166	common	setdomainname			sys_setdomainname
+169	common	osf_exportfs			sys_ni_syscall
+181	common	osf_alt_plock			sys_ni_syscall
+184	common	osf_getmnt			sys_ni_syscall
+187	common	osf_alt_sigpending		sys_ni_syscall
+188	common	osf_alt_setsid			sys_ni_syscall
+199	common	osf_swapon			sys_swapon
+200	common	msgctl				sys_old_msgctl
+201	common	msgget				sys_msgget
+202	common	msgrcv				sys_msgrcv
+203	common	msgsnd				sys_msgsnd
+204	common	semctl				sys_old_semctl
+205	common	semget				sys_semget
+206	common	semop				sys_semop
+207	common	osf_utsname			sys_osf_utsname
+208	common	lchown				sys_lchown
+209	common	shmat				sys_shmat
+210	common	shmctl				sys_old_shmctl
+211	common	shmdt				sys_shmdt
+212	common	shmget				sys_shmget
+213	common	osf_mvalid			sys_ni_syscall
+214	common	osf_getaddressconf		sys_ni_syscall
+215	common	osf_msleep			sys_ni_syscall
+216	common	osf_mwakeup			sys_ni_syscall
+217	common	msync				sys_msync
+218	common	osf_signal			sys_ni_syscall
+219	common	osf_utc_gettime			sys_ni_syscall
+220	common	osf_utc_adjtime			sys_ni_syscall
+222	common	osf_security			sys_ni_syscall
+223	common	osf_kloadcall			sys_ni_syscall
+224	common	osf_stat			sys_osf_stat
+225	common	osf_lstat			sys_osf_lstat
+226	common	osf_fstat			sys_osf_fstat
+227	common	osf_statfs64			sys_osf_statfs64
+228	common	osf_fstatfs64			sys_osf_fstatfs64
+233	common	getpgid				sys_getpgid
+234	common	getsid				sys_getsid
+235	common	sigaltstack			sys_sigaltstack
+236	common	osf_waitid			sys_ni_syscall
+237	common	osf_priocntlset			sys_ni_syscall
+238	common	osf_sigsendset			sys_ni_syscall
+239	common	osf_set_speculative		sys_ni_syscall
+240	common	osf_msfs_syscall		sys_ni_syscall
+241	common	osf_sysinfo			sys_osf_sysinfo
+242	common	osf_uadmin			sys_ni_syscall
+243	common	osf_fuser			sys_ni_syscall
+244	common	osf_proplist_syscall		sys_osf_proplist_syscall
+245	common	osf_ntp_adjtime			sys_ni_syscall
+246	common	osf_ntp_gettime			sys_ni_syscall
+247	common	osf_pathconf			sys_ni_syscall
+248	common	osf_fpathconf			sys_ni_syscall
+250	common	osf_uswitch			sys_ni_syscall
+251	common	osf_usleep_thread		sys_osf_usleep_thread
+252	common	osf_audcntl			sys_ni_syscall
+253	common	osf_audgen			sys_ni_syscall
+254	common	sysfs				sys_sysfs
+255	common	osf_subsys_info			sys_ni_syscall
+256	common	osf_getsysinfo			sys_osf_getsysinfo
+257	common	osf_setsysinfo			sys_osf_setsysinfo
+258	common	osf_afs_syscall			sys_ni_syscall
+259	common	osf_swapctl			sys_ni_syscall
+260	common	osf_memcntl			sys_ni_syscall
+261	common	osf_fdatasync			sys_ni_syscall
+300	common	bdflush				sys_ni_syscall
+301	common	sethae				sys_sethae
+302	common	mount				sys_mount
+303	common	old_adjtimex			sys_old_adjtimex
+304	common	swapoff				sys_swapoff
+305	common	getdents			sys_getdents
+306	common	create_module			sys_ni_syscall
+307	common	init_module			sys_init_module
+308	common	delete_module			sys_delete_module
+309	common	get_kernel_syms			sys_ni_syscall
+310	common	syslog				sys_syslog
+311	common	reboot				sys_reboot
+312	common	clone				alpha_clone
+313	common	uselib				sys_uselib
+314	common	mlock				sys_mlock
+315	common	munlock				sys_munlock
+316	common	mlockall			sys_mlockall
+317	common	munlockall			sys_munlockall
+318	common	sysinfo				sys_sysinfo
+319	common	_sysctl				sys_ni_syscall
+# 320 was sys_idle
+321	common	oldumount			sys_oldumount
+322	common	swapon				sys_swapon
+323	common	times				sys_times
+324	common	personality			sys_personality
+325	common	setfsuid			sys_setfsuid
+326	common	setfsgid			sys_setfsgid
+327	common	ustat				sys_ustat
+328	common	statfs				sys_statfs
+329	common	fstatfs				sys_fstatfs
+330	common	sched_setparam			sys_sched_setparam
+331	common	sched_getparam			sys_sched_getparam
+332	common	sched_setscheduler		sys_sched_setscheduler
+333	common	sched_getscheduler		sys_sched_getscheduler
+334	common	sched_yield			sys_sched_yield
+335	common	sched_get_priority_max		sys_sched_get_priority_max
+336	common	sched_get_priority_min		sys_sched_get_priority_min
+337	common	sched_rr_get_interval		sys_sched_rr_get_interval
+338	common	afs_syscall			sys_ni_syscall
+339	common	uname				sys_newuname
+340	common	nanosleep			sys_nanosleep
+341	common	mremap				sys_mremap
+342	common	nfsservctl			sys_ni_syscall
+343	common	setresuid			sys_setresuid
+344	common	getresuid			sys_getresuid
+345	common	pciconfig_read			sys_pciconfig_read
+346	common	pciconfig_write			sys_pciconfig_write
+347	common	query_module			sys_ni_syscall
+348	common	prctl				sys_prctl
+349	common	pread64				sys_pread64
+350	common	pwrite64			sys_pwrite64
+351	common	rt_sigreturn			sys_rt_sigreturn
+352	common	rt_sigaction			sys_rt_sigaction
+353	common	rt_sigprocmask			sys_rt_sigprocmask
+354	common	rt_sigpending			sys_rt_sigpending
+355	common	rt_sigtimedwait			sys_rt_sigtimedwait
+356	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
+357	common	rt_sigsuspend			sys_rt_sigsuspend
+358	common	select				sys_select
+359	common	gettimeofday			sys_gettimeofday
+360	common	settimeofday			sys_settimeofday
+361	common	getitimer			sys_getitimer
+362	common	setitimer			sys_setitimer
+363	common	utimes				sys_utimes
+364	common	getrusage			sys_getrusage
+365	common	wait4				sys_wait4
+366	common	adjtimex			sys_adjtimex
+367	common	getcwd				sys_getcwd
+368	common	capget				sys_capget
+369	common	capset				sys_capset
+370	common	sendfile			sys_sendfile64
+371	common	setresgid			sys_setresgid
+372	common	getresgid			sys_getresgid
+373	common	dipc				sys_ni_syscall
+374	common	pivot_root			sys_pivot_root
+375	common	mincore				sys_mincore
+376	common	pciconfig_iobase		sys_pciconfig_iobase
+377	common	getdents64			sys_getdents64
+378	common	gettid				sys_gettid
+379	common	readahead			sys_readahead
+# 380 is unused
+381	common	tkill				sys_tkill
+382	common	setxattr			sys_setxattr
+383	common	lsetxattr			sys_lsetxattr
+384	common	fsetxattr			sys_fsetxattr
+385	common	getxattr			sys_getxattr
+386	common	lgetxattr			sys_lgetxattr
+387	common	fgetxattr			sys_fgetxattr
+388	common	listxattr			sys_listxattr
+389	common	llistxattr			sys_llistxattr
+390	common	flistxattr			sys_flistxattr
+391	common	removexattr			sys_removexattr
+392	common	lremovexattr			sys_lremovexattr
+393	common	fremovexattr			sys_fremovexattr
+394	common	futex				sys_futex
+395	common	sched_setaffinity		sys_sched_setaffinity
+396	common	sched_getaffinity		sys_sched_getaffinity
+397	common	tuxcall				sys_ni_syscall
+398	common	io_setup			sys_io_setup
+399	common	io_destroy			sys_io_destroy
+400	common	io_getevents			sys_io_getevents
+401	common	io_submit			sys_io_submit
+402	common	io_cancel			sys_io_cancel
+405	common	exit_group			sys_exit_group
+406	common	lookup_dcookie			sys_lookup_dcookie
+407	common	epoll_create			sys_epoll_create
+408	common	epoll_ctl			sys_epoll_ctl
+409	common	epoll_wait			sys_epoll_wait
+410	common	remap_file_pages		sys_remap_file_pages
+411	common	set_tid_address			sys_set_tid_address
+412	common	restart_syscall			sys_restart_syscall
+413	common	fadvise64			sys_fadvise64
+414	common	timer_create			sys_timer_create
+415	common	timer_settime			sys_timer_settime
+416	common	timer_gettime			sys_timer_gettime
+417	common	timer_getoverrun		sys_timer_getoverrun
+418	common	timer_delete			sys_timer_delete
+419	common	clock_settime			sys_clock_settime
+420	common	clock_gettime			sys_clock_gettime
+421	common	clock_getres			sys_clock_getres
+422	common	clock_nanosleep			sys_clock_nanosleep
+423	common	semtimedop			sys_semtimedop
+424	common	tgkill				sys_tgkill
+425	common	stat64				sys_stat64
+426	common	lstat64				sys_lstat64
+427	common	fstat64				sys_fstat64
+428	common	vserver				sys_ni_syscall
+429	common	mbind				sys_ni_syscall
+430	common	get_mempolicy			sys_ni_syscall
+431	common	set_mempolicy			sys_ni_syscall
+432	common	mq_open				sys_mq_open
+433	common	mq_unlink			sys_mq_unlink
+434	common	mq_timedsend			sys_mq_timedsend
+435	common	mq_timedreceive			sys_mq_timedreceive
+436	common	mq_notify			sys_mq_notify
+437	common	mq_getsetattr			sys_mq_getsetattr
+438	common	waitid				sys_waitid
+439	common	add_key				sys_add_key
+440	common	request_key			sys_request_key
+441	common	keyctl				sys_keyctl
+442	common	ioprio_set			sys_ioprio_set
+443	common	ioprio_get			sys_ioprio_get
+444	common	inotify_init			sys_inotify_init
+445	common	inotify_add_watch		sys_inotify_add_watch
+446	common	inotify_rm_watch		sys_inotify_rm_watch
+447	common	fdatasync			sys_fdatasync
+448	common	kexec_load			sys_kexec_load
+449	common	migrate_pages			sys_migrate_pages
+450	common	openat				sys_openat
+451	common	mkdirat				sys_mkdirat
+452	common	mknodat				sys_mknodat
+453	common	fchownat			sys_fchownat
+454	common	futimesat			sys_futimesat
+455	common	fstatat64			sys_fstatat64
+456	common	unlinkat			sys_unlinkat
+457	common	renameat			sys_renameat
+458	common	linkat				sys_linkat
+459	common	symlinkat			sys_symlinkat
+460	common	readlinkat			sys_readlinkat
+461	common	fchmodat			sys_fchmodat
+462	common	faccessat			sys_faccessat
+463	common	pselect6			sys_pselect6
+464	common	ppoll				sys_ppoll
+465	common	unshare				sys_unshare
+466	common	set_robust_list			sys_set_robust_list
+467	common	get_robust_list			sys_get_robust_list
+468	common	splice				sys_splice
+469	common	sync_file_range			sys_sync_file_range
+470	common	tee				sys_tee
+471	common	vmsplice			sys_vmsplice
+472	common	move_pages			sys_move_pages
+473	common	getcpu				sys_getcpu
+474	common	epoll_pwait			sys_epoll_pwait
+475	common	utimensat			sys_utimensat
+476	common	signalfd			sys_signalfd
+477	common	timerfd				sys_ni_syscall
+478	common	eventfd				sys_eventfd
+479	common	recvmmsg			sys_recvmmsg
+480	common	fallocate			sys_fallocate
+481	common	timerfd_create			sys_timerfd_create
+482	common	timerfd_settime			sys_timerfd_settime
+483	common	timerfd_gettime			sys_timerfd_gettime
+484	common	signalfd4			sys_signalfd4
+485	common	eventfd2			sys_eventfd2
+486	common	epoll_create1			sys_epoll_create1
+487	common	dup3				sys_dup3
+488	common	pipe2				sys_pipe2
+489	common	inotify_init1			sys_inotify_init1
+490	common	preadv				sys_preadv
+491	common	pwritev				sys_pwritev
+492	common	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo
+493	common	perf_event_open			sys_perf_event_open
+494	common	fanotify_init			sys_fanotify_init
+495	common	fanotify_mark			sys_fanotify_mark
+496	common	prlimit64			sys_prlimit64
+497	common	name_to_handle_at		sys_name_to_handle_at
+498	common	open_by_handle_at		sys_open_by_handle_at
+499	common	clock_adjtime			sys_clock_adjtime
+500	common	syncfs				sys_syncfs
+501	common	setns				sys_setns
+502	common	accept4				sys_accept4
+503	common	sendmmsg			sys_sendmmsg
+504	common	process_vm_readv		sys_process_vm_readv
+505	common	process_vm_writev		sys_process_vm_writev
+506	common	kcmp				sys_kcmp
+507	common	finit_module			sys_finit_module
+508	common	sched_setattr			sys_sched_setattr
+509	common	sched_getattr			sys_sched_getattr
+510	common	renameat2			sys_renameat2
+511	common	getrandom			sys_getrandom
+512	common	memfd_create			sys_memfd_create
+513	common	execveat			sys_execveat
+514	common	seccomp				sys_seccomp
+515	common	bpf				sys_bpf
+516	common	userfaultfd			sys_userfaultfd
+517	common	membarrier			sys_membarrier
+518	common	mlock2				sys_mlock2
+519	common	copy_file_range			sys_copy_file_range
+520	common	preadv2				sys_preadv2
+521	common	pwritev2			sys_pwritev2
+522	common	statx				sys_statx
+523	common	io_pgetevents			sys_io_pgetevents
+524	common	pkey_mprotect			sys_pkey_mprotect
+525	common	pkey_alloc			sys_pkey_alloc
+526	common	pkey_free			sys_pkey_free
+527	common	rseq				sys_rseq
+528	common	statfs64			sys_statfs64
+529	common	fstatfs64			sys_fstatfs64
+530	common	getegid				sys_getegid
+531	common	geteuid				sys_geteuid
+532	common	getppid				sys_getppid
+# all other architectures have common numbers for new syscall, alpha
+# is the exception.
+534	common	pidfd_send_signal		sys_pidfd_send_signal
+535	common	io_uring_setup			sys_io_uring_setup
+536	common	io_uring_enter			sys_io_uring_enter
+537	common	io_uring_register		sys_io_uring_register
+538	common	open_tree			sys_open_tree
+539	common	move_mount			sys_move_mount
+540	common	fsopen				sys_fsopen
+541	common	fsconfig			sys_fsconfig
+542	common	fsmount				sys_fsmount
+543	common	fspick				sys_fspick
+544	common	pidfd_open			sys_pidfd_open
+# 545 reserved for clone3
+546	common	close_range			sys_close_range
+547	common	openat2				sys_openat2
+548	common	pidfd_getfd			sys_pidfd_getfd
+549	common	faccessat2			sys_faccessat2
+550	common	process_madvise			sys_process_madvise
+551	common	epoll_pwait2			sys_epoll_pwait2
+552	common	mount_setattr			sys_mount_setattr
+553	common	quotactl_fd			sys_quotactl_fd
+554	common	landlock_create_ruleset		sys_landlock_create_ruleset
+555	common	landlock_add_rule		sys_landlock_add_rule
+556	common	landlock_restrict_self		sys_landlock_restrict_self
+# 557 reserved for memfd_secret
+558	common	process_mrelease		sys_process_mrelease
+559	common  futex_waitv                     sys_futex_waitv
+560	common	set_mempolicy_home_node		sys_ni_syscall
diff --git a/arch/alpha/kernel/syscalls/syscallhdr.sh b/arch/alpha/kernel/syscalls/syscallhdr.sh
new file mode 100644
index 0000000000..1780e86149
--- /dev/null
+++ b/arch/alpha/kernel/syscalls/syscallhdr.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+prefix="$4"
+offset="$5"
+
+fileguard=_UAPI_ASM_ALPHA_`basename "$out" | sed \
+	-e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
+	-e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+	printf "#ifndef %s\n" "${fileguard}"
+	printf "#define %s\n" "${fileguard}"
+	printf "\n"
+
+	nxt=0
+	while read nr abi name entry ; do
+		if [ -z "$offset" ]; then
+			printf "#define __NR_%s%s\t%s\n" \
+				"${prefix}" "${name}" "${nr}"
+		else
+			printf "#define __NR_%s%s\t(%s + %s)\n" \
+				"${prefix}" "${name}" "${offset}" "${nr}"
+		fi
+		nxt=$((nr+1))
+	done
+
+	printf "\n"
+	printf "#ifdef __KERNEL__\n"
+	printf "#define __NR_syscalls\t%s\n" "${nxt}"
+	printf "#endif\n"
+	printf "\n"
+	printf "#endif /* %s */\n" "${fileguard}"
+) > "$out"
diff --git a/arch/alpha/kernel/syscalls/syscalltbl.sh b/arch/alpha/kernel/syscalls/syscalltbl.sh
new file mode 100644
index 0000000000..85d78d9309
--- /dev/null
+++ b/arch/alpha/kernel/syscalls/syscalltbl.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+my_abi="$4"
+offset="$5"
+
+emit() {
+	t_nxt="$1"
+	t_nr="$2"
+	t_entry="$3"
+
+	while [ $t_nxt -lt $t_nr ]; do
+		printf "__SYSCALL(%s, sys_ni_syscall, )\n" "${t_nxt}"
+		t_nxt=$((t_nxt+1))
+	done
+	printf "__SYSCALL(%s, %s, )\n" "${t_nxt}" "${t_entry}"
+}
+
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+	nxt=0
+	if [ -z "$offset" ]; then
+		offset=0
+	fi
+
+	while read nr abi name entry ; do
+		emit $((nxt+offset)) $((nr+offset)) $entry
+		nxt=$((nr+1))
+	done
+) > "$out"
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
new file mode 100644
index 0000000000..68f3e4f329
--- /dev/null
+++ b/arch/alpha/kernel/systbls.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/kernel/systbls.S
+ *
+ * The system call table. 
+ */
+
+#include <asm/unistd.h>
+
+#define __SYSCALL(nr, entry) .quad entry
+	.data
+	.align 3
+	.globl sys_call_table
+sys_call_table:
+#include <asm/syscall_table.h>
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
new file mode 100644
index 0000000000..4d01c392ab
--- /dev/null
+++ b/arch/alpha/kernel/time.c
@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995, 1999, 2000  Linus Torvalds
+ *
+ * This file contains the clocksource time handling.
+ * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
+ *		"A Kernel Model for Precision Timekeeping" by Dave Mills
+ * 1997-01-09    Adrian Sun
+ *      use interval timer if CONFIG_RTC=y
+ * 1997-10-29    John Bowman (bowman@math.ualberta.ca)
+ *      fixed tick loss calculation in timer_interrupt
+ *      (round system clock to nearest tick instead of truncating)
+ *      fixed algorithm in time_init for getting time from CMOS clock
+ * 1999-04-16	Thorsten Kranzkowski (dl8bcu@gmx.net)
+ *	fixed algorithm in do_gettimeofday() for calculating the precise time
+ *	from processor cycle counter (now taking lost_ticks into account)
+ * 2003-06-03	R. Scott Bailey <scott.bailey@eds.com>
+ *	Tighten sanity in time_init from 1% (10,000 PPM) to 250 PPM
+ */
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/bcd.h>
+#include <linux/profile.h>
+#include <linux/irq_work.h>
+
+#include <linux/uaccess.h>
+#include <asm/io.h>
+#include <asm/hwrpb.h>
+
+#include <linux/mc146818rtc.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+
+#include "proto.h"
+#include "irq_impl.h"
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
+unsigned long est_cycle_freq;
+
+#ifdef CONFIG_IRQ_WORK
+
+DEFINE_PER_CPU(u8, irq_work_pending);
+
+#define set_irq_work_pending_flag()  __this_cpu_write(irq_work_pending, 1)
+#define test_irq_work_pending()      __this_cpu_read(irq_work_pending)
+#define clear_irq_work_pending()     __this_cpu_write(irq_work_pending, 0)
+
+void arch_irq_work_raise(void)
+{
+	set_irq_work_pending_flag();
+}
+
+#else  /* CONFIG_IRQ_WORK */
+
+#define test_irq_work_pending()      0
+#define clear_irq_work_pending()
+
+#endif /* CONFIG_IRQ_WORK */
+
+
+static inline __u32 rpcc(void)
+{
+	return __builtin_alpha_rpcc();
+}
+
+
+
+/*
+ * The RTC as a clock_event_device primitive.
+ */
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ce);
+
+irqreturn_t
+rtc_timer_interrupt(int irq, void *dev)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+	/* Don't run the hook for UNUSED or SHUTDOWN.  */
+	if (likely(clockevent_state_periodic(ce)))
+		ce->event_handler(ce);
+
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int
+rtc_ce_set_next_event(unsigned long evt, struct clock_event_device *ce)
+{
+	/* This hook is for oneshot mode, which we don't support.  */
+	return -EINVAL;
+}
+
+static void __init
+init_rtc_clockevent(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+	*ce = (struct clock_event_device){
+		.name = "rtc",
+		.features = CLOCK_EVT_FEAT_PERIODIC,
+		.rating = 100,
+		.cpumask = cpumask_of(cpu),
+		.set_next_event = rtc_ce_set_next_event,
+	};
+
+	clockevents_config_and_register(ce, CONFIG_HZ, 0, 0);
+}
+
+
+/*
+ * The QEMU clock as a clocksource primitive.
+ */
+
+static u64
+qemu_cs_read(struct clocksource *cs)
+{
+	return qemu_get_vmtime();
+}
+
+static struct clocksource qemu_cs = {
+	.name                   = "qemu",
+	.rating                 = 400,
+	.read                   = qemu_cs_read,
+	.mask                   = CLOCKSOURCE_MASK(64),
+	.flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
+	.max_idle_ns		= LONG_MAX
+};
+
+
+/*
+ * The QEMU alarm as a clock_event_device primitive.
+ */
+
+static int qemu_ce_shutdown(struct clock_event_device *ce)
+{
+	/* The mode member of CE is updated for us in generic code.
+	   Just make sure that the event is disabled.  */
+	qemu_set_alarm_abs(0);
+	return 0;
+}
+
+static int
+qemu_ce_set_next_event(unsigned long evt, struct clock_event_device *ce)
+{
+	qemu_set_alarm_rel(evt);
+	return 0;
+}
+
+static irqreturn_t
+qemu_timer_interrupt(int irq, void *dev)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+	ce->event_handler(ce);
+	return IRQ_HANDLED;
+}
+
+static void __init
+init_qemu_clockevent(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+	*ce = (struct clock_event_device){
+		.name = "qemu",
+		.features = CLOCK_EVT_FEAT_ONESHOT,
+		.rating = 400,
+		.cpumask = cpumask_of(cpu),
+		.set_state_shutdown = qemu_ce_shutdown,
+		.set_state_oneshot = qemu_ce_shutdown,
+		.tick_resume = qemu_ce_shutdown,
+		.set_next_event = qemu_ce_set_next_event,
+	};
+
+	clockevents_config_and_register(ce, NSEC_PER_SEC, 1000, LONG_MAX);
+}
+
+
+void __init
+common_init_rtc(void)
+{
+	unsigned char x, sel = 0;
+
+	/* Reset periodic interrupt frequency.  */
+#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200
+ 	x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f;
+	/* Test includes known working values on various platforms
+	   where 0x26 is wrong; we refuse to change those. */
+ 	if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) {
+		sel = RTC_REF_CLCK_32KHZ + 6;
+	}
+#elif CONFIG_HZ == 256 || CONFIG_HZ == 128 || CONFIG_HZ == 64 || CONFIG_HZ == 32
+	sel = RTC_REF_CLCK_32KHZ + __builtin_ffs(32768 / CONFIG_HZ);
+#else
+# error "Unknown HZ from arch/alpha/Kconfig"
+#endif
+	if (sel) {
+		printk(KERN_INFO "Setting RTC_FREQ to %d Hz (%x)\n",
+		       CONFIG_HZ, sel);
+		CMOS_WRITE(sel, RTC_FREQ_SELECT);
+ 	}
+
+	/* Turn on periodic interrupts.  */
+	x = CMOS_READ(RTC_CONTROL);
+	if (!(x & RTC_PIE)) {
+		printk("Turning on RTC interrupts.\n");
+		x |= RTC_PIE;
+		x &= ~(RTC_AIE | RTC_UIE);
+		CMOS_WRITE(x, RTC_CONTROL);
+	}
+	(void) CMOS_READ(RTC_INTR_FLAGS);
+
+	outb(0x36, 0x43);	/* pit counter 0: system timer */
+	outb(0x00, 0x40);
+	outb(0x00, 0x40);
+
+	outb(0xb6, 0x43);	/* pit counter 2: speaker */
+	outb(0x31, 0x42);
+	outb(0x13, 0x42);
+
+	init_rtc_irq(NULL);
+}
+
+
+#ifndef CONFIG_ALPHA_WTINT
+/*
+ * The RPCC as a clocksource primitive.
+ *
+ * While we have free-running timecounters running on all CPUs, and we make
+ * a half-hearted attempt in init_rtc_rpcc_info to sync the timecounter
+ * with the wall clock, that initialization isn't kept up-to-date across
+ * different time counters in SMP mode.  Therefore we can only use this
+ * method when there's only one CPU enabled.
+ *
+ * When using the WTINT PALcall, the RPCC may shift to a lower frequency,
+ * or stop altogether, while waiting for the interrupt.  Therefore we cannot
+ * use this method when WTINT is in use.
+ */
+
+static u64 read_rpcc(struct clocksource *cs)
+{
+	return rpcc();
+}
+
+static struct clocksource clocksource_rpcc = {
+	.name                   = "rpcc",
+	.rating                 = 300,
+	.read                   = read_rpcc,
+	.mask                   = CLOCKSOURCE_MASK(32),
+	.flags                  = CLOCK_SOURCE_IS_CONTINUOUS
+};
+#endif /* ALPHA_WTINT */
+
+
+/* Validate a computed cycle counter result against the known bounds for
+   the given processor core.  There's too much brokenness in the way of
+   timing hardware for any one method to work everywhere.  :-(
+
+   Return 0 if the result cannot be trusted, otherwise return the argument.  */
+
+static unsigned long __init
+validate_cc_value(unsigned long cc)
+{
+	static struct bounds {
+		unsigned int min, max;
+	} cpu_hz[] __initdata = {
+		[EV3_CPU]    = {   50000000,  200000000 },	/* guess */
+		[EV4_CPU]    = {  100000000,  300000000 },
+		[LCA4_CPU]   = {  100000000,  300000000 },	/* guess */
+		[EV45_CPU]   = {  200000000,  300000000 },
+		[EV5_CPU]    = {  250000000,  433000000 },
+		[EV56_CPU]   = {  333000000,  667000000 },
+		[PCA56_CPU]  = {  400000000,  600000000 },	/* guess */
+		[PCA57_CPU]  = {  500000000,  600000000 },	/* guess */
+		[EV6_CPU]    = {  466000000,  600000000 },
+		[EV67_CPU]   = {  600000000,  750000000 },
+		[EV68AL_CPU] = {  750000000,  940000000 },
+		[EV68CB_CPU] = { 1000000000, 1333333333 },
+		/* None of the following are shipping as of 2001-11-01.  */
+		[EV68CX_CPU] = { 1000000000, 1700000000 },	/* guess */
+		[EV69_CPU]   = { 1000000000, 1700000000 },	/* guess */
+		[EV7_CPU]    = {  800000000, 1400000000 },	/* guess */
+		[EV79_CPU]   = { 1000000000, 2000000000 },	/* guess */
+	};
+
+	/* Allow for some drift in the crystal.  10MHz is more than enough.  */
+	const unsigned int deviation = 10000000;
+
+	struct percpu_struct *cpu;
+	unsigned int index;
+
+	cpu = (struct percpu_struct *)((char*)hwrpb + hwrpb->processor_offset);
+	index = cpu->type & 0xffffffff;
+
+	/* If index out of bounds, no way to validate.  */
+	if (index >= ARRAY_SIZE(cpu_hz))
+		return cc;
+
+	/* If index contains no data, no way to validate.  */
+	if (cpu_hz[index].max == 0)
+		return cc;
+
+	if (cc < cpu_hz[index].min - deviation
+	    || cc > cpu_hz[index].max + deviation)
+		return 0;
+
+	return cc;
+}
+
+
+/*
+ * Calibrate CPU clock using legacy 8254 timer/counter. Stolen from
+ * arch/i386/time.c.
+ */
+
+#define CALIBRATE_LATCH	0xffff
+#define TIMEOUT_COUNT	0x100000
+
+static unsigned long __init
+calibrate_cc_with_pit(void)
+{
+	int cc, count = 0;
+
+	/* Set the Gate high, disable speaker */
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+	/*
+	 * Now let's take care of CTC channel 2
+	 *
+	 * Set the Gate high, program CTC channel 2 for mode 0,
+	 * (interrupt on terminal count mode), binary count,
+	 * load 5 * LATCH count, (LSB and MSB) to begin countdown.
+	 */
+	outb(0xb0, 0x43);		/* binary, mode 0, LSB/MSB, Ch 2 */
+	outb(CALIBRATE_LATCH & 0xff, 0x42);	/* LSB of count */
+	outb(CALIBRATE_LATCH >> 8, 0x42);	/* MSB of count */
+
+	cc = rpcc();
+	do {
+		count++;
+	} while ((inb(0x61) & 0x20) == 0 && count < TIMEOUT_COUNT);
+	cc = rpcc() - cc;
+
+	/* Error: ECTCNEVERSET or ECPUTOOFAST.  */
+	if (count <= 1 || count == TIMEOUT_COUNT)
+		return 0;
+
+	return ((long)cc * PIT_TICK_RATE) / (CALIBRATE_LATCH + 1);
+}
+
+/* The Linux interpretation of the CMOS clock register contents:
+   When the Update-In-Progress (UIP) flag goes from 1 to 0, the
+   RTC registers show the second which has precisely just started.
+   Let's hope other operating systems interpret the RTC the same way.  */
+
+static unsigned long __init
+rpcc_after_update_in_progress(void)
+{
+	do { } while (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP));
+	do { } while (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
+
+	return rpcc();
+}
+
+void __init
+time_init(void)
+{
+	unsigned int cc1, cc2;
+	unsigned long cycle_freq, tolerance;
+	long diff;
+
+	if (alpha_using_qemu) {
+		clocksource_register_hz(&qemu_cs, NSEC_PER_SEC);
+		init_qemu_clockevent();
+		init_rtc_irq(qemu_timer_interrupt);
+		return;
+	}
+
+	/* Calibrate CPU clock -- attempt #1.  */
+	if (!est_cycle_freq)
+		est_cycle_freq = validate_cc_value(calibrate_cc_with_pit());
+
+	cc1 = rpcc();
+
+	/* Calibrate CPU clock -- attempt #2.  */
+	if (!est_cycle_freq) {
+		cc1 = rpcc_after_update_in_progress();
+		cc2 = rpcc_after_update_in_progress();
+		est_cycle_freq = validate_cc_value(cc2 - cc1);
+		cc1 = cc2;
+	}
+
+	cycle_freq = hwrpb->cycle_freq;
+	if (est_cycle_freq) {
+		/* If the given value is within 250 PPM of what we calculated,
+		   accept it.  Otherwise, use what we found.  */
+		tolerance = cycle_freq / 4000;
+		diff = cycle_freq - est_cycle_freq;
+		if (diff < 0)
+			diff = -diff;
+		if ((unsigned long)diff > tolerance) {
+			cycle_freq = est_cycle_freq;
+			printk("HWRPB cycle frequency bogus.  "
+			       "Estimated %lu Hz\n", cycle_freq);
+		} else {
+			est_cycle_freq = 0;
+		}
+	} else if (! validate_cc_value (cycle_freq)) {
+		printk("HWRPB cycle frequency bogus, "
+		       "and unable to estimate a proper value!\n");
+	}
+
+	/* See above for restrictions on using clocksource_rpcc.  */
+#ifndef CONFIG_ALPHA_WTINT
+	if (hwrpb->nr_processors == 1)
+		clocksource_register_hz(&clocksource_rpcc, cycle_freq);
+#endif
+
+	/* Startup the timer source. */
+	alpha_mv.init_rtc();
+	init_rtc_clockevent();
+}
+
+/* Initialize the clock_event_device for secondary cpus.  */
+#ifdef CONFIG_SMP
+void __init
+init_clockevent(void)
+{
+	if (alpha_using_qemu)
+		init_qemu_clockevent();
+	else
+		init_rtc_clockevent();
+}
+#endif
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
new file mode 100644
index 0000000000..8a66fe544c
--- /dev/null
+++ b/arch/alpha/kernel/traps.c
@@ -0,0 +1,992 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/alpha/kernel/traps.c
+ *
+ * (C) Copyright 1994 Linus Torvalds
+ */
+
+/*
+ * This file initializes the trap entry points
+ */
+
+#include <linux/jiffies.h>
+#include <linux/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+#include <linux/tty.h>
+#include <linux/delay.h>
+#include <linux/extable.h>
+#include <linux/kallsyms.h>
+#include <linux/ratelimit.h>
+
+#include <asm/gentrap.h>
+#include <linux/uaccess.h>
+#include <asm/unaligned.h>
+#include <asm/sysinfo.h>
+#include <asm/hwrpb.h>
+#include <asm/mmu_context.h>
+#include <asm/special_insns.h>
+
+#include "proto.h"
+
+/* Work-around for some SRMs which mishandle opDEC faults.  */
+
+static int opDEC_fix;
+
+static void
+opDEC_check(void)
+{
+	__asm__ __volatile__ (
+	/* Load the address of... */
+	"	br	$16, 1f\n"
+	/* A stub instruction fault handler.  Just add 4 to the
+	   pc and continue.  */
+	"	ldq	$16, 8($sp)\n"
+	"	addq	$16, 4, $16\n"
+	"	stq	$16, 8($sp)\n"
+	"	call_pal %[rti]\n"
+	/* Install the instruction fault handler.  */
+	"1:	lda	$17, 3\n"
+	"	call_pal %[wrent]\n"
+	/* With that in place, the fault from the round-to-minf fp
+	   insn will arrive either at the "lda 4" insn (bad) or one
+	   past that (good).  This places the correct fixup in %0.  */
+	"	lda %[fix], 0\n"
+	"	cvttq/svm $f31,$f31\n"
+	"	lda %[fix], 4"
+	: [fix] "=r" (opDEC_fix)
+	: [rti] "n" (PAL_rti), [wrent] "n" (PAL_wrent)
+	: "$0", "$1", "$16", "$17", "$22", "$23", "$24", "$25");
+
+	if (opDEC_fix)
+		printk("opDEC fixup enabled.\n");
+}
+
+void
+dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
+{
+	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
+	       regs->pc, regs->r26, regs->ps, print_tainted());
+	printk("pc is at %pSR\n", (void *)regs->pc);
+	printk("ra is at %pSR\n", (void *)regs->r26);
+	printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
+	       regs->r0, regs->r1, regs->r2);
+	printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
+ 	       regs->r3, regs->r4, regs->r5);
+	printk("t5 = %016lx  t6 = %016lx  t7 = %016lx\n",
+	       regs->r6, regs->r7, regs->r8);
+
+	if (r9_15) {
+		printk("s0 = %016lx  s1 = %016lx  s2 = %016lx\n",
+		       r9_15[9], r9_15[10], r9_15[11]);
+		printk("s3 = %016lx  s4 = %016lx  s5 = %016lx\n",
+		       r9_15[12], r9_15[13], r9_15[14]);
+		printk("s6 = %016lx\n", r9_15[15]);
+	}
+
+	printk("a0 = %016lx  a1 = %016lx  a2 = %016lx\n",
+	       regs->r16, regs->r17, regs->r18);
+	printk("a3 = %016lx  a4 = %016lx  a5 = %016lx\n",
+ 	       regs->r19, regs->r20, regs->r21);
+ 	printk("t8 = %016lx  t9 = %016lx  t10= %016lx\n",
+	       regs->r22, regs->r23, regs->r24);
+	printk("t11= %016lx  pv = %016lx  at = %016lx\n",
+	       regs->r25, regs->r27, regs->r28);
+	printk("gp = %016lx  sp = %p\n", regs->gp, regs+1);
+#if 0
+__halt();
+#endif
+}
+
+#if 0
+static char * ireg_name[] = {"v0", "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+			   "t7", "s0", "s1", "s2", "s3", "s4", "s5", "s6",
+			   "a0", "a1", "a2", "a3", "a4", "a5", "t8", "t9",
+			   "t10", "t11", "ra", "pv", "at", "gp", "sp", "zero"};
+#endif
+
+static void
+dik_show_code(unsigned int *pc)
+{
+	long i;
+
+	printk("Code:");
+	for (i = -6; i < 2; i++) {
+		unsigned int insn;
+		if (__get_user(insn, (unsigned int __user *)pc + i))
+			break;
+		printk("%c%08x%c", i ? ' ' : '<', insn, i ? ' ' : '>');
+	}
+	printk("\n");
+}
+
+static void
+dik_show_trace(unsigned long *sp, const char *loglvl)
+{
+	long i = 0;
+	printk("%sTrace:\n", loglvl);
+	while (0x1ff8 & (unsigned long) sp) {
+		extern char _stext[], _etext[];
+		unsigned long tmp = *sp;
+		sp++;
+		if (!is_kernel_text(tmp))
+			continue;
+		printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp);
+		if (i > 40) {
+			printk("%s ...", loglvl);
+			break;
+		}
+	}
+	printk("%s\n", loglvl);
+}
+
+static int kstack_depth_to_print = 24;
+
+void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+{
+	unsigned long *stack;
+	int i;
+
+	/*
+	 * debugging aid: "show_stack(NULL, NULL, KERN_EMERG);" prints the
+	 * back trace for this cpu.
+	 */
+	if(sp==NULL)
+		sp=(unsigned long*)&sp;
+
+	stack = sp;
+	for(i=0; i < kstack_depth_to_print; i++) {
+		if (((long) stack & (THREAD_SIZE-1)) == 0)
+			break;
+		if ((i % 4) == 0) {
+			if (i)
+				pr_cont("\n");
+			printk("%s       ", loglvl);
+		} else {
+			pr_cont(" ");
+		}
+		pr_cont("%016lx", *stack++);
+	}
+	pr_cont("\n");
+	dik_show_trace(sp, loglvl);
+}
+
+void
+die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
+{
+	if (regs->ps & 8)
+		return;
+#ifdef CONFIG_SMP
+	printk("CPU %d ", hard_smp_processor_id());
+#endif
+	printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
+	dik_show_regs(regs, r9_15);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT);
+	dik_show_code((unsigned int *)regs->pc);
+
+	if (test_and_set_thread_flag (TIF_DIE_IF_KERNEL)) {
+		printk("die_if_kernel recursion detected.\n");
+		local_irq_enable();
+		while (1);
+	}
+	make_task_dead(SIGSEGV);
+}
+
+#ifndef CONFIG_MATHEMU
+static long dummy_emul(void) { return 0; }
+long (*alpha_fp_emul_imprecise)(struct pt_regs *regs, unsigned long writemask)
+  = (void *)dummy_emul;
+EXPORT_SYMBOL_GPL(alpha_fp_emul_imprecise);
+long (*alpha_fp_emul) (unsigned long pc)
+  = (void *)dummy_emul;
+EXPORT_SYMBOL_GPL(alpha_fp_emul);
+#else
+long alpha_fp_emul_imprecise(struct pt_regs *regs, unsigned long writemask);
+long alpha_fp_emul (unsigned long pc);
+#endif
+
+asmlinkage void
+do_entArith(unsigned long summary, unsigned long write_mask,
+	    struct pt_regs *regs)
+{
+	long si_code = FPE_FLTINV;
+
+	if (summary & 1) {
+		/* Software-completion summary bit is set, so try to
+		   emulate the instruction.  If the processor supports
+		   precise exceptions, we don't have to search.  */
+		if (!amask(AMASK_PRECISE_TRAP))
+			si_code = alpha_fp_emul(regs->pc - 4);
+		else
+			si_code = alpha_fp_emul_imprecise(regs, write_mask);
+		if (si_code == 0)
+			return;
+	}
+	die_if_kernel("Arithmetic fault", regs, 0, NULL);
+
+	send_sig_fault_trapno(SIGFPE, si_code, (void __user *) regs->pc, 0, current);
+}
+
+asmlinkage void
+do_entIF(unsigned long type, struct pt_regs *regs)
+{
+	int signo, code;
+
+	if ((regs->ps & ~IPL_MAX) == 0) {
+		if (type == 1) {
+			const unsigned int *data
+			  = (const unsigned int *) regs->pc;
+			printk("Kernel bug at %s:%d\n",
+			       (const char *)(data[1] | (long)data[2] << 32), 
+			       data[0]);
+		}
+#ifdef CONFIG_ALPHA_WTINT
+		if (type == 4) {
+			/* If CALL_PAL WTINT is totally unsupported by the
+			   PALcode, e.g. MILO, "emulate" it by overwriting
+			   the insn.  */
+			unsigned int *pinsn
+			  = (unsigned int *) regs->pc - 1;
+			if (*pinsn == PAL_wtint) {
+				*pinsn = 0x47e01400; /* mov 0,$0 */
+				imb();
+				regs->r0 = 0;
+				return;
+			}
+		}
+#endif /* ALPHA_WTINT */
+		die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"),
+			      regs, type, NULL);
+	}
+
+	switch (type) {
+	      case 0: /* breakpoint */
+		if (ptrace_cancel_bpt(current)) {
+			regs->pc -= 4;	/* make pc point to former bpt */
+		}
+
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc,
+			       current);
+		return;
+
+	      case 1: /* bugcheck */
+		send_sig_fault_trapno(SIGTRAP, TRAP_UNK,
+				      (void __user *) regs->pc, 0, current);
+		return;
+		
+	      case 2: /* gentrap */
+		switch ((long) regs->r16) {
+		case GEN_INTOVF:
+			signo = SIGFPE;
+			code = FPE_INTOVF;
+			break;
+		case GEN_INTDIV:
+			signo = SIGFPE;
+			code = FPE_INTDIV;
+			break;
+		case GEN_FLTOVF:
+			signo = SIGFPE;
+			code = FPE_FLTOVF;
+			break;
+		case GEN_FLTDIV:
+			signo = SIGFPE;
+			code = FPE_FLTDIV;
+			break;
+		case GEN_FLTUND:
+			signo = SIGFPE;
+			code = FPE_FLTUND;
+			break;
+		case GEN_FLTINV:
+			signo = SIGFPE;
+			code = FPE_FLTINV;
+			break;
+		case GEN_FLTINE:
+			signo = SIGFPE;
+			code = FPE_FLTRES;
+			break;
+		case GEN_ROPRAND:
+			signo = SIGFPE;
+			code = FPE_FLTUNK;
+			break;
+
+		case GEN_DECOVF:
+		case GEN_DECDIV:
+		case GEN_DECINV:
+		case GEN_ASSERTERR:
+		case GEN_NULPTRERR:
+		case GEN_STKOVF:
+		case GEN_STRLENERR:
+		case GEN_SUBSTRERR:
+		case GEN_RANGERR:
+		case GEN_SUBRNG:
+		case GEN_SUBRNG1:
+		case GEN_SUBRNG2:
+		case GEN_SUBRNG3:
+		case GEN_SUBRNG4:
+		case GEN_SUBRNG5:
+		case GEN_SUBRNG6:
+		case GEN_SUBRNG7:
+		default:
+			signo = SIGTRAP;
+			code = TRAP_UNK;
+			break;
+		}
+
+		send_sig_fault_trapno(signo, code, (void __user *) regs->pc,
+				      regs->r16, current);
+		return;
+
+	      case 4: /* opDEC */
+		if (implver() == IMPLVER_EV4) {
+			long si_code;
+
+			/* The some versions of SRM do not handle
+			   the opDEC properly - they return the PC of the
+			   opDEC fault, not the instruction after as the
+			   Alpha architecture requires.  Here we fix it up.
+			   We do this by intentionally causing an opDEC
+			   fault during the boot sequence and testing if
+			   we get the correct PC.  If not, we set a flag
+			   to correct it every time through.  */
+			regs->pc += opDEC_fix; 
+			
+			/* EV4 does not implement anything except normal
+			   rounding.  Everything else will come here as
+			   an illegal instruction.  Emulate them.  */
+			si_code = alpha_fp_emul(regs->pc - 4);
+			if (si_code == 0)
+				return;
+			if (si_code > 0) {
+				send_sig_fault_trapno(SIGFPE, si_code,
+						      (void __user *) regs->pc,
+						      0, current);
+				return;
+			}
+		}
+		break;
+
+	      case 3: /* FEN fault */
+		/* Irritating users can call PAL_clrfen to disable the
+		   FPU for the process.  The kernel will then trap in
+		   do_switch_stack and undo_switch_stack when we try
+		   to save and restore the FP registers.
+
+		   Given that GCC by default generates code that uses the
+		   FP registers, PAL_clrfen is not useful except for DoS
+		   attacks.  So turn the bleeding FPU back on and be done
+		   with it.  */
+		current_thread_info()->pcb.flags |= 1;
+		__reload_thread(&current_thread_info()->pcb);
+		return;
+
+	      case 5: /* illoc */
+	      default: /* unexpected instruction-fault type */
+		      ;
+	}
+
+	send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, current);
+}
+
+/* There is an ifdef in the PALcode in MILO that enables a 
+   "kernel debugging entry point" as an unprivileged call_pal.
+
+   We don't want to have anything to do with it, but unfortunately
+   several versions of MILO included in distributions have it enabled,
+   and if we don't put something on the entry point we'll oops.  */
+
+asmlinkage void
+do_entDbg(struct pt_regs *regs)
+{
+	die_if_kernel("Instruction fault", regs, 0, NULL);
+
+	force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc);
+}
+
+
+/*
+ * entUna has a different register layout to be reasonably simple. It
+ * needs access to all the integer registers (the kernel doesn't use
+ * fp-regs), and it needs to have them in order for simpler access.
+ *
+ * Due to the non-standard register layout (and because we don't want
+ * to handle floating-point regs), user-mode unaligned accesses are
+ * handled separately by do_entUnaUser below.
+ *
+ * Oh, btw, we don't handle the "gp" register correctly, but if we fault
+ * on a gp-register unaligned load/store, something is _very_ wrong
+ * in the kernel anyway..
+ */
+struct allregs {
+	unsigned long regs[32];
+	unsigned long ps, pc, gp, a0, a1, a2;
+};
+
+struct unaligned_stat {
+	unsigned long count, va, pc;
+} unaligned[2];
+
+
+/* Macro for exception fixup code to access integer registers.  */
+#define una_reg(r)  (_regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)])
+
+
+asmlinkage void
+do_entUna(void * va, unsigned long opcode, unsigned long reg,
+	  struct allregs *regs)
+{
+	long error, tmp1, tmp2, tmp3, tmp4;
+	unsigned long pc = regs->pc - 4;
+	unsigned long *_regs = regs->regs;
+	const struct exception_table_entry *fixup;
+
+	unaligned[0].count++;
+	unaligned[0].va = (unsigned long) va;
+	unaligned[0].pc = pc;
+
+	/* We don't want to use the generic get/put unaligned macros as
+	   we want to trap exceptions.  Only if we actually get an
+	   exception will we decide whether we should have caught it.  */
+
+	switch (opcode) {
+	case 0x0c: /* ldwu */
+		__asm__ __volatile__(
+		"1:	ldq_u %1,0(%3)\n"
+		"2:	ldq_u %2,1(%3)\n"
+		"	extwl %1,%3,%1\n"
+		"	extwh %2,%3,%2\n"
+		"3:\n"
+		EXC(1b,3b,%1,%0)
+		EXC(2b,3b,%2,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+		if (error)
+			goto got_exception;
+		una_reg(reg) = tmp1|tmp2;
+		return;
+
+	case 0x28: /* ldl */
+		__asm__ __volatile__(
+		"1:	ldq_u %1,0(%3)\n"
+		"2:	ldq_u %2,3(%3)\n"
+		"	extll %1,%3,%1\n"
+		"	extlh %2,%3,%2\n"
+		"3:\n"
+		EXC(1b,3b,%1,%0)
+		EXC(2b,3b,%2,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+		if (error)
+			goto got_exception;
+		una_reg(reg) = (int)(tmp1|tmp2);
+		return;
+
+	case 0x29: /* ldq */
+		__asm__ __volatile__(
+		"1:	ldq_u %1,0(%3)\n"
+		"2:	ldq_u %2,7(%3)\n"
+		"	extql %1,%3,%1\n"
+		"	extqh %2,%3,%2\n"
+		"3:\n"
+		EXC(1b,3b,%1,%0)
+		EXC(2b,3b,%2,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+		if (error)
+			goto got_exception;
+		una_reg(reg) = tmp1|tmp2;
+		return;
+
+	/* Note that the store sequences do not indicate that they change
+	   memory because it _should_ be affecting nothing in this context.
+	   (Otherwise we have other, much larger, problems.)  */
+	case 0x0d: /* stw */
+		__asm__ __volatile__(
+		"1:	ldq_u %2,1(%5)\n"
+		"2:	ldq_u %1,0(%5)\n"
+		"	inswh %6,%5,%4\n"
+		"	inswl %6,%5,%3\n"
+		"	mskwh %2,%5,%2\n"
+		"	mskwl %1,%5,%1\n"
+		"	or %2,%4,%2\n"
+		"	or %1,%3,%1\n"
+		"3:	stq_u %2,1(%5)\n"
+		"4:	stq_u %1,0(%5)\n"
+		"5:\n"
+		EXC(1b,5b,%2,%0)
+		EXC(2b,5b,%1,%0)
+		EXC(3b,5b,$31,%0)
+		EXC(4b,5b,$31,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+			  "=&r"(tmp3), "=&r"(tmp4)
+			: "r"(va), "r"(una_reg(reg)), "0"(0));
+		if (error)
+			goto got_exception;
+		return;
+
+	case 0x2c: /* stl */
+		__asm__ __volatile__(
+		"1:	ldq_u %2,3(%5)\n"
+		"2:	ldq_u %1,0(%5)\n"
+		"	inslh %6,%5,%4\n"
+		"	insll %6,%5,%3\n"
+		"	msklh %2,%5,%2\n"
+		"	mskll %1,%5,%1\n"
+		"	or %2,%4,%2\n"
+		"	or %1,%3,%1\n"
+		"3:	stq_u %2,3(%5)\n"
+		"4:	stq_u %1,0(%5)\n"
+		"5:\n"
+		EXC(1b,5b,%2,%0)
+		EXC(2b,5b,%1,%0)
+		EXC(3b,5b,$31,%0)
+		EXC(4b,5b,$31,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+			  "=&r"(tmp3), "=&r"(tmp4)
+			: "r"(va), "r"(una_reg(reg)), "0"(0));
+		if (error)
+			goto got_exception;
+		return;
+
+	case 0x2d: /* stq */
+		__asm__ __volatile__(
+		"1:	ldq_u %2,7(%5)\n"
+		"2:	ldq_u %1,0(%5)\n"
+		"	insqh %6,%5,%4\n"
+		"	insql %6,%5,%3\n"
+		"	mskqh %2,%5,%2\n"
+		"	mskql %1,%5,%1\n"
+		"	or %2,%4,%2\n"
+		"	or %1,%3,%1\n"
+		"3:	stq_u %2,7(%5)\n"
+		"4:	stq_u %1,0(%5)\n"
+		"5:\n"
+		EXC(1b,5b,%2,%0)
+		EXC(2b,5b,%1,%0)
+		EXC(3b,5b,$31,%0)
+		EXC(4b,5b,$31,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+			  "=&r"(tmp3), "=&r"(tmp4)
+			: "r"(va), "r"(una_reg(reg)), "0"(0));
+		if (error)
+			goto got_exception;
+		return;
+	}
+
+	printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n",
+		pc, va, opcode, reg);
+	make_task_dead(SIGSEGV);
+
+got_exception:
+	/* Ok, we caught the exception, but we don't want it.  Is there
+	   someone to pass it along to?  */
+	if ((fixup = search_exception_tables(pc)) != 0) {
+		unsigned long newpc;
+		newpc = fixup_exception(una_reg, fixup, pc);
+
+		printk("Forwarding unaligned exception at %lx (%lx)\n",
+		       pc, newpc);
+
+		regs->pc = newpc;
+		return;
+	}
+
+	/*
+	 * Yikes!  No one to forward the exception to.
+	 * Since the registers are in a weird format, dump them ourselves.
+ 	 */
+
+	printk("%s(%d): unhandled unaligned exception\n",
+	       current->comm, task_pid_nr(current));
+
+	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx\n",
+	       pc, una_reg(26), regs->ps);
+	printk("r0 = %016lx  r1 = %016lx  r2 = %016lx\n",
+	       una_reg(0), una_reg(1), una_reg(2));
+	printk("r3 = %016lx  r4 = %016lx  r5 = %016lx\n",
+ 	       una_reg(3), una_reg(4), una_reg(5));
+	printk("r6 = %016lx  r7 = %016lx  r8 = %016lx\n",
+	       una_reg(6), una_reg(7), una_reg(8));
+	printk("r9 = %016lx  r10= %016lx  r11= %016lx\n",
+	       una_reg(9), una_reg(10), una_reg(11));
+	printk("r12= %016lx  r13= %016lx  r14= %016lx\n",
+	       una_reg(12), una_reg(13), una_reg(14));
+	printk("r15= %016lx\n", una_reg(15));
+	printk("r16= %016lx  r17= %016lx  r18= %016lx\n",
+	       una_reg(16), una_reg(17), una_reg(18));
+	printk("r19= %016lx  r20= %016lx  r21= %016lx\n",
+ 	       una_reg(19), una_reg(20), una_reg(21));
+ 	printk("r22= %016lx  r23= %016lx  r24= %016lx\n",
+	       una_reg(22), una_reg(23), una_reg(24));
+	printk("r25= %016lx  r27= %016lx  r28= %016lx\n",
+	       una_reg(25), una_reg(27), una_reg(28));
+	printk("gp = %016lx  sp = %p\n", regs->gp, regs+1);
+
+	dik_show_code((unsigned int *)pc);
+	dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT);
+
+	if (test_and_set_thread_flag (TIF_DIE_IF_KERNEL)) {
+		printk("die_if_kernel recursion detected.\n");
+		local_irq_enable();
+		while (1);
+	}
+	make_task_dead(SIGSEGV);
+}
+
+/*
+ * Convert an s-floating point value in memory format to the
+ * corresponding value in register format.  The exponent
+ * needs to be remapped to preserve non-finite values
+ * (infinities, not-a-numbers, denormals).
+ */
+static inline unsigned long
+s_mem_to_reg (unsigned long s_mem)
+{
+	unsigned long frac    = (s_mem >>  0) & 0x7fffff;
+	unsigned long sign    = (s_mem >> 31) & 0x1;
+	unsigned long exp_msb = (s_mem >> 30) & 0x1;
+	unsigned long exp_low = (s_mem >> 23) & 0x7f;
+	unsigned long exp;
+
+	exp = (exp_msb << 10) | exp_low;	/* common case */
+	if (exp_msb) {
+		if (exp_low == 0x7f) {
+			exp = 0x7ff;
+		}
+	} else {
+		if (exp_low == 0x00) {
+			exp = 0x000;
+		} else {
+			exp |= (0x7 << 7);
+		}
+	}
+	return (sign << 63) | (exp << 52) | (frac << 29);
+}
+
+/*
+ * Convert an s-floating point value in register format to the
+ * corresponding value in memory format.
+ */
+static inline unsigned long
+s_reg_to_mem (unsigned long s_reg)
+{
+	return ((s_reg >> 62) << 30) | ((s_reg << 5) >> 34);
+}
+
+/*
+ * Handle user-level unaligned fault.  Handling user-level unaligned
+ * faults is *extremely* slow and produces nasty messages.  A user
+ * program *should* fix unaligned faults ASAP.
+ *
+ * Notice that we have (almost) the regular kernel stack layout here,
+ * so finding the appropriate registers is a little more difficult
+ * than in the kernel case.
+ *
+ * Finally, we handle regular integer load/stores only.  In
+ * particular, load-linked/store-conditionally and floating point
+ * load/stores are not supported.  The former make no sense with
+ * unaligned faults (they are guaranteed to fail) and I don't think
+ * the latter will occur in any decent program.
+ *
+ * Sigh. We *do* have to handle some FP operations, because GCC will
+ * uses them as temporary storage for integer memory to memory copies.
+ * However, we need to deal with stt/ldt and sts/lds only.
+ */
+
+#define OP_INT_MASK	( 1L << 0x28 | 1L << 0x2c   /* ldl stl */	\
+			| 1L << 0x29 | 1L << 0x2d   /* ldq stq */	\
+			| 1L << 0x0c | 1L << 0x0d   /* ldwu stw */	\
+			| 1L << 0x0a | 1L << 0x0e ) /* ldbu stb */
+
+#define OP_WRITE_MASK	( 1L << 0x26 | 1L << 0x27   /* sts stt */	\
+			| 1L << 0x2c | 1L << 0x2d   /* stl stq */	\
+			| 1L << 0x0d | 1L << 0x0e ) /* stw stb */
+
+#define R(x)	((size_t) &((struct pt_regs *)0)->x)
+
+static int unauser_reg_offsets[32] = {
+	R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8),
+	/* r9 ... r15 are stored in front of regs.  */
+	-56, -48, -40, -32, -24, -16, -8,
+	R(r16), R(r17), R(r18),
+	R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26),
+	R(r27), R(r28), R(gp),
+	0, 0
+};
+
+#undef R
+
+asmlinkage void
+do_entUnaUser(void __user * va, unsigned long opcode,
+	      unsigned long reg, struct pt_regs *regs)
+{
+	static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
+
+	unsigned long tmp1, tmp2, tmp3, tmp4;
+	unsigned long fake_reg, *reg_addr = &fake_reg;
+	int si_code;
+	long error;
+
+	/* Check the UAC bits to decide what the user wants us to do
+	   with the unaligned access.  */
+
+	if (!(current_thread_info()->status & TS_UAC_NOPRINT)) {
+		if (__ratelimit(&ratelimit)) {
+			printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n",
+			       current->comm, task_pid_nr(current),
+			       regs->pc - 4, va, opcode, reg);
+		}
+	}
+	if ((current_thread_info()->status & TS_UAC_SIGBUS))
+		goto give_sigbus;
+	/* Not sure why you'd want to use this, but... */
+	if ((current_thread_info()->status & TS_UAC_NOFIX))
+		return;
+
+	/* Don't bother reading ds in the access check since we already
+	   know that this came from the user.  Also rely on the fact that
+	   the page at TASK_SIZE is unmapped and so can't be touched anyway. */
+	if ((unsigned long)va >= TASK_SIZE)
+		goto give_sigsegv;
+
+	++unaligned[1].count;
+	unaligned[1].va = (unsigned long)va;
+	unaligned[1].pc = regs->pc - 4;
+
+	if ((1L << opcode) & OP_INT_MASK) {
+		/* it's an integer load/store */
+		if (reg < 30) {
+			reg_addr = (unsigned long *)
+			  ((char *)regs + unauser_reg_offsets[reg]);
+		} else if (reg == 30) {
+			/* usp in PAL regs */
+			fake_reg = rdusp();
+		} else {
+			/* zero "register" */
+			fake_reg = 0;
+		}
+	}
+
+	/* We don't want to use the generic get/put unaligned macros as
+	   we want to trap exceptions.  Only if we actually get an
+	   exception will we decide whether we should have caught it.  */
+
+	switch (opcode) {
+	case 0x0c: /* ldwu */
+		__asm__ __volatile__(
+		"1:	ldq_u %1,0(%3)\n"
+		"2:	ldq_u %2,1(%3)\n"
+		"	extwl %1,%3,%1\n"
+		"	extwh %2,%3,%2\n"
+		"3:\n"
+		EXC(1b,3b,%1,%0)
+		EXC(2b,3b,%2,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		*reg_addr = tmp1|tmp2;
+		break;
+
+	case 0x22: /* lds */
+		__asm__ __volatile__(
+		"1:	ldq_u %1,0(%3)\n"
+		"2:	ldq_u %2,3(%3)\n"
+		"	extll %1,%3,%1\n"
+		"	extlh %2,%3,%2\n"
+		"3:\n"
+		EXC(1b,3b,%1,%0)
+		EXC(2b,3b,%2,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		alpha_write_fp_reg(reg, s_mem_to_reg((int)(tmp1|tmp2)));
+		return;
+
+	case 0x23: /* ldt */
+		__asm__ __volatile__(
+		"1:	ldq_u %1,0(%3)\n"
+		"2:	ldq_u %2,7(%3)\n"
+		"	extql %1,%3,%1\n"
+		"	extqh %2,%3,%2\n"
+		"3:\n"
+		EXC(1b,3b,%1,%0)
+		EXC(2b,3b,%2,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		alpha_write_fp_reg(reg, tmp1|tmp2);
+		return;
+
+	case 0x28: /* ldl */
+		__asm__ __volatile__(
+		"1:	ldq_u %1,0(%3)\n"
+		"2:	ldq_u %2,3(%3)\n"
+		"	extll %1,%3,%1\n"
+		"	extlh %2,%3,%2\n"
+		"3:\n"
+		EXC(1b,3b,%1,%0)
+		EXC(2b,3b,%2,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		*reg_addr = (int)(tmp1|tmp2);
+		break;
+
+	case 0x29: /* ldq */
+		__asm__ __volatile__(
+		"1:	ldq_u %1,0(%3)\n"
+		"2:	ldq_u %2,7(%3)\n"
+		"	extql %1,%3,%1\n"
+		"	extqh %2,%3,%2\n"
+		"3:\n"
+		EXC(1b,3b,%1,%0)
+		EXC(2b,3b,%2,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2)
+			: "r"(va), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		*reg_addr = tmp1|tmp2;
+		break;
+
+	/* Note that the store sequences do not indicate that they change
+	   memory because it _should_ be affecting nothing in this context.
+	   (Otherwise we have other, much larger, problems.)  */
+	case 0x0d: /* stw */
+		__asm__ __volatile__(
+		"1:	ldq_u %2,1(%5)\n"
+		"2:	ldq_u %1,0(%5)\n"
+		"	inswh %6,%5,%4\n"
+		"	inswl %6,%5,%3\n"
+		"	mskwh %2,%5,%2\n"
+		"	mskwl %1,%5,%1\n"
+		"	or %2,%4,%2\n"
+		"	or %1,%3,%1\n"
+		"3:	stq_u %2,1(%5)\n"
+		"4:	stq_u %1,0(%5)\n"
+		"5:\n"
+		EXC(1b,5b,%2,%0)
+		EXC(2b,5b,%1,%0)
+		EXC(3b,5b,$31,%0)
+		EXC(4b,5b,$31,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+			  "=&r"(tmp3), "=&r"(tmp4)
+			: "r"(va), "r"(*reg_addr), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		return;
+
+	case 0x26: /* sts */
+		fake_reg = s_reg_to_mem(alpha_read_fp_reg(reg));
+		fallthrough;
+
+	case 0x2c: /* stl */
+		__asm__ __volatile__(
+		"1:	ldq_u %2,3(%5)\n"
+		"2:	ldq_u %1,0(%5)\n"
+		"	inslh %6,%5,%4\n"
+		"	insll %6,%5,%3\n"
+		"	msklh %2,%5,%2\n"
+		"	mskll %1,%5,%1\n"
+		"	or %2,%4,%2\n"
+		"	or %1,%3,%1\n"
+		"3:	stq_u %2,3(%5)\n"
+		"4:	stq_u %1,0(%5)\n"
+		"5:\n"
+		EXC(1b,5b,%2,%0)
+		EXC(2b,5b,%1,%0)
+		EXC(3b,5b,$31,%0)
+		EXC(4b,5b,$31,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+			  "=&r"(tmp3), "=&r"(tmp4)
+			: "r"(va), "r"(*reg_addr), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		return;
+
+	case 0x27: /* stt */
+		fake_reg = alpha_read_fp_reg(reg);
+		fallthrough;
+
+	case 0x2d: /* stq */
+		__asm__ __volatile__(
+		"1:	ldq_u %2,7(%5)\n"
+		"2:	ldq_u %1,0(%5)\n"
+		"	insqh %6,%5,%4\n"
+		"	insql %6,%5,%3\n"
+		"	mskqh %2,%5,%2\n"
+		"	mskql %1,%5,%1\n"
+		"	or %2,%4,%2\n"
+		"	or %1,%3,%1\n"
+		"3:	stq_u %2,7(%5)\n"
+		"4:	stq_u %1,0(%5)\n"
+		"5:\n"
+		EXC(1b,5b,%2,%0)
+		EXC(2b,5b,%1,%0)
+		EXC(3b,5b,$31,%0)
+		EXC(4b,5b,$31,%0)
+			: "=r"(error), "=&r"(tmp1), "=&r"(tmp2),
+			  "=&r"(tmp3), "=&r"(tmp4)
+			: "r"(va), "r"(*reg_addr), "0"(0));
+		if (error)
+			goto give_sigsegv;
+		return;
+
+	default:
+		/* What instruction were you trying to use, exactly?  */
+		goto give_sigbus;
+	}
+
+	/* Only integer loads should get here; everyone else returns early. */
+	if (reg == 30)
+		wrusp(fake_reg);
+	return;
+
+give_sigsegv:
+	regs->pc -= 4;  /* make pc point to faulting insn */
+
+	/* We need to replicate some of the logic in mm/fault.c,
+	   since we don't have access to the fault code in the
+	   exception handling return path.  */
+	if ((unsigned long)va >= TASK_SIZE)
+		si_code = SEGV_ACCERR;
+	else {
+		struct mm_struct *mm = current->mm;
+		mmap_read_lock(mm);
+		if (find_vma(mm, (unsigned long)va))
+			si_code = SEGV_ACCERR;
+		else
+			si_code = SEGV_MAPERR;
+		mmap_read_unlock(mm);
+	}
+	send_sig_fault(SIGSEGV, si_code, va, current);
+	return;
+
+give_sigbus:
+	regs->pc -= 4;
+	send_sig_fault(SIGBUS, BUS_ADRALN, va, current);
+	return;
+}
+
+void
+trap_init(void)
+{
+	/* Tell PAL-code what global pointer we want in the kernel.  */
+	register unsigned long gptr __asm__("$29");
+	wrkgp(gptr);
+
+	/* Hack for Multia (UDB) and JENSEN: some of their SRMs have
+	   a bug in the handling of the opDEC fault.  Fix it up if so.  */
+	if (implver() == IMPLVER_EV4)
+		opDEC_check();
+
+	wrent(entArith, 1);
+	wrent(entMM, 2);
+	wrent(entIF, 3);
+	wrent(entUna, 4);
+	wrent(entSys, 5);
+	wrent(entDbg, 6);
+}
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
new file mode 100644
index 0000000000..5b78d64072
--- /dev/null
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+
+OUTPUT_FORMAT("elf64-alpha")
+OUTPUT_ARCH(alpha)
+ENTRY(__start)
+PHDRS { text PT_LOAD; note PT_NOTE; }
+jiffies = jiffies_64;
+SECTIONS
+{
+#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS
+	. = 0xfffffc0000310000;
+#else
+	. = 0xfffffc0001010000;
+#endif
+
+	_text = .;	/* Text and read-only data */
+	.text : {
+		HEAD_TEXT
+		TEXT_TEXT
+		SCHED_TEXT
+		CPUIDLE_TEXT
+		LOCK_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+	} :text
+	swapper_pg_dir = SWAPPER_PGD;
+	_etext = .;	/* End of text section */
+
+	RO_DATA(4096)
+
+	/* Will be freed after init */
+	__init_begin = ALIGN(PAGE_SIZE);
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	INIT_DATA_SECTION(16)
+	PERCPU_SECTION(L1_CACHE_BYTES)
+	/* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
+	   needed for the THREAD_SIZE aligned init_task gets freed after init */
+	. = ALIGN(THREAD_SIZE);
+	__init_end = .;
+	/* Freed after init ends here */
+
+	_sdata = .;	/* Start of rw data section */
+	_data = .;
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+	.got : {
+		*(.got)
+	}
+	.sdata : {
+		*(.sdata)
+	}
+	_edata = .;	/* End of data section */
+
+	BSS_SECTION(0, 0, 0)
+	_end = .;
+
+	.mdebug 0 : {
+		*(.mdebug)
+	}
+	.note 0 : {
+		*(.note)
+	}
+
+	STABS_DEBUG
+	DWARF_DEBUG
+	ELF_DETAILS
+
+	DISCARDS
+}
diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
new file mode 100644
index 0000000000..1cc74f7b50
--- /dev/null
+++ b/arch/alpha/lib/Makefile
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for alpha-specific library files..
+#
+
+asflags-y := $(KBUILD_CFLAGS)
+ccflags-y := -Werror
+
+# Many of these routines have implementations tuned for ev6.
+# Choose them iff we're targeting ev6 specifically.
+ev6-$(CONFIG_ALPHA_EV6) := ev6-
+
+# Several make use of the cttz instruction introduced in ev67.
+ev67-$(CONFIG_ALPHA_EV67) := ev67-
+
+lib-y =	__divqu.o __remqu.o __divlu.o __remlu.o \
+	udiv-qrnnd.o \
+	udelay.o \
+	$(ev6-y)memset.o \
+	$(ev6-y)memcpy.o \
+	memmove.o \
+	checksum.o \
+	csum_partial_copy.o \
+	$(ev67-y)strlen.o \
+	stycpy.o \
+	styncpy.o \
+	$(ev67-y)strchr.o \
+	$(ev67-y)strrchr.o \
+	$(ev6-y)memchr.o \
+	$(ev6-y)copy_user.o \
+	$(ev6-y)clear_user.o \
+	$(ev6-y)csum_ipv6_magic.o \
+	$(ev6-y)clear_page.o \
+	$(ev6-y)copy_page.o \
+	fpreg.o \
+	callback_srm.o srm_puts.o srm_printk.o \
+	fls.o
+
+# The division routines are built from single source, with different defines.
+AFLAGS___divqu.o = -DDIV
+AFLAGS___remqu.o =       -DREM
+AFLAGS___divlu.o = -DDIV       -DINTSIZE
+AFLAGS___remlu.o =       -DREM -DINTSIZE
+
+$(addprefix $(obj)/,__divqu.o __remqu.o __divlu.o __remlu.o): \
+						$(src)/$(ev6-y)divide.S FORCE
+	$(call if_changed_rule,as_o_S)
+
+# There are direct branches between {str*cpy,str*cat} and stx*cpy.
+# Ensure the branches are within range by merging these objects.
+
+LDFLAGS_stycpy.o := -r
+LDFLAGS_styncpy.o := -r
+
+$(obj)/stycpy.o: $(obj)/strcpy.o $(obj)/$(ev67-y)strcat.o \
+		 $(obj)/$(ev6-y)stxcpy.o FORCE
+	$(call if_changed,ld)
+
+$(obj)/styncpy.o: $(obj)/strncpy.o $(obj)/$(ev67-y)strncat.o \
+		 $(obj)/$(ev6-y)stxncpy.o FORCE
+	$(call if_changed,ld)
diff --git a/arch/alpha/lib/callback_srm.S b/arch/alpha/lib/callback_srm.S
new file mode 100644
index 0000000000..b13c4a231f
--- /dev/null
+++ b/arch/alpha/lib/callback_srm.S
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	arch/alpha/lib/callback_srm.S
+ */
+
+#include <asm/console.h>
+#include <asm/export.h>
+
+.text
+#define HWRPB_CRB_OFFSET 0xc0
+
+#if defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC)
+.align 4
+srm_dispatch:
+#if defined(CONFIG_ALPHA_GENERIC)
+	ldl	$4,alpha_using_srm
+	beq	$4,nosrm
+#endif
+	ldq	$0,hwrpb	# gp is set up by CALLBACK macro.
+	ldl	$25,0($25)	# Pick up the wrapper data.
+	mov	$20,$21		# Shift arguments right.
+	mov	$19,$20
+	ldq	$1,HWRPB_CRB_OFFSET($0)
+	mov	$18,$19
+	mov	$17,$18
+	mov	$16,$17
+	addq	$0,$1,$2	# CRB address
+	ldq	$27,0($2)	# DISPATCH procedure descriptor (VMS call std)
+	extwl	$25,0,$16	# SRM callback function code
+	ldq	$3,8($27)	# call address
+	extwl	$25,2,$25	# argument information (VMS calling std)
+	jmp	($3)		# Return directly to caller of wrapper.
+
+.align 4
+.globl	srm_fixup
+.ent	srm_fixup
+srm_fixup:
+	ldgp	$29,0($27)
+#if defined(CONFIG_ALPHA_GENERIC)
+	ldl	$4,alpha_using_srm
+	beq	$4,nosrm
+#endif
+	ldq	$0,hwrpb
+	ldq	$1,HWRPB_CRB_OFFSET($0)
+	addq	$0,$1,$2	# CRB address
+	ldq	$27,16($2)	# VA of FIXUP procedure descriptor
+	ldq	$3,8($27)	# call address
+	lda	$25,2($31)	# two integer arguments
+	jmp	($3)		# Return directly to caller of srm_fixup.
+.end    srm_fixup
+
+#if defined(CONFIG_ALPHA_GENERIC)
+.align 3
+nosrm:
+	lda	$0,-1($31)
+	ret
+#endif
+
+#define CALLBACK(NAME, CODE, ARG_CNT) \
+.align 4; .globl callback_##NAME; .ent callback_##NAME; callback_##NAME##: \
+ldgp $29,0($27); br $25,srm_dispatch; .word CODE, ARG_CNT; .end callback_##NAME
+
+#else /* defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) */
+
+#define CALLBACK(NAME, CODE, ARG_CNT) \
+.align 3; .globl callback_##NAME; .ent callback_##NAME; callback_##NAME##: \
+lda $0,-1($31); ret; .end callback_##NAME
+
+.align 3
+.globl	srm_fixup
+.ent	srm_fixup
+srm_fixup:
+	lda	$0,-1($31)
+	ret
+.end	srm_fixup
+#endif /* defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) */
+
+CALLBACK(puts, CCB_PUTS, 4)
+CALLBACK(open, CCB_OPEN, 3)
+CALLBACK(close, CCB_CLOSE, 2)
+CALLBACK(read, CCB_READ, 5)
+CALLBACK(open_console, CCB_OPEN_CONSOLE, 1)
+CALLBACK(close_console, CCB_CLOSE_CONSOLE, 1)
+CALLBACK(getenv, CCB_GET_ENV, 4)
+CALLBACK(setenv, CCB_SET_ENV, 4)
+CALLBACK(getc, CCB_GETC, 2)
+CALLBACK(reset_term, CCB_RESET_TERM, 2)
+CALLBACK(term_int, CCB_SET_TERM_INT, 3)
+CALLBACK(term_ctl, CCB_SET_TERM_CTL, 3)
+CALLBACK(process_keycode, CCB_PROCESS_KEYCODE, 3)
+CALLBACK(ioctl, CCB_IOCTL, 6)
+CALLBACK(write, CCB_WRITE, 5)
+CALLBACK(reset_env, CCB_RESET_ENV, 4)
+CALLBACK(save_env, CCB_SAVE_ENV, 1)
+CALLBACK(pswitch, CCB_PSWITCH, 3)
+CALLBACK(bios_emul, CCB_BIOS_EMUL, 5)
+
+EXPORT_SYMBOL(callback_getenv)
+EXPORT_SYMBOL(callback_setenv)
+EXPORT_SYMBOL(callback_save_env)
+	
+.data
+__alpha_using_srm:		# For use by bootpheader
+	.long 7			# value is not 1 for link debugging
+	.weak alpha_using_srm; alpha_using_srm = __alpha_using_srm
+__callback_init_done:		# For use by bootpheader
+	.long 7			# value is not 1 for link debugging
+	.weak callback_init_done; callback_init_done = __callback_init_done
+
diff --git a/arch/alpha/lib/checksum.c b/arch/alpha/lib/checksum.c
new file mode 100644
index 0000000000..3f35c3ed69
--- /dev/null
+++ b/arch/alpha/lib/checksum.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/alpha/lib/checksum.c
+ *
+ * This file contains network checksum routines that are better done
+ * in an architecture-specific manner due to speed..
+ * Comments in other versions indicate that the algorithms are from RFC1071
+ *
+ * accelerated versions (and 21264 assembly versions ) contributed by
+ *	Rick Gorton	<rick.gorton@alpha-processor.com>
+ */
+ 
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short from64to16(unsigned long x)
+{
+	/* Using extract instructions is a bit more efficient
+	   than the original shift/bitmask version.  */
+
+	union {
+		unsigned long	ul;
+		unsigned int	ui[2];
+		unsigned short	us[4];
+	} in_v, tmp_v, out_v;
+
+	in_v.ul = x;
+	tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1];
+
+	/* Since the bits of tmp_v.sh[3] are going to always be zero,
+	   we don't have to bother to add that in.  */
+	out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1]
+			+ (unsigned long) tmp_v.us[2];
+
+	/* Similarly, out_v.us[2] is always zero for the final add.  */
+	return out_v.us[0] + out_v.us[1];
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented.
+ */
+__sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+			  __u32 len, __u8 proto, __wsum sum)
+{
+	return (__force __sum16)~from64to16(
+		(__force u64)saddr + (__force u64)daddr +
+		(__force u64)sum + ((len + proto) << 8));
+}
+EXPORT_SYMBOL(csum_tcpudp_magic);
+
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+			  __u32 len, __u8 proto, __wsum sum)
+{
+	unsigned long result;
+
+	result = (__force u64)saddr + (__force u64)daddr +
+		 (__force u64)sum + ((len + proto) << 8);
+
+	/* Fold down to 32-bits so we don't lose in the typedef-less 
+	   network stack.  */
+	/* 64 to 33 */
+	result = (result & 0xffffffff) + (result >> 32);
+	/* 33 to 32 */
+	result = (result & 0xffffffff) + (result >> 32);
+	return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_tcpudp_nofold);
+
+/*
+ * Do a 64-bit checksum on an arbitrary memory area..
+ *
+ * This isn't a great routine, but it's not _horrible_ either. The
+ * inner loop could be unrolled a bit further, and there are better
+ * ways to do the carry, but this is reasonable.
+ */
+static inline unsigned long do_csum(const unsigned char * buff, int len)
+{
+	int odd, count;
+	unsigned long result = 0;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = *buff << 8;
+		len--;
+		buff++;
+	}
+	count = len >> 1;		/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(unsigned short *) buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;		/* nr of 32-bit words.. */
+		if (count) {
+			if (4 & (unsigned long) buff) {
+				result += *(unsigned int *) buff;
+				count--;
+				len -= 4;
+				buff += 4;
+			}
+			count >>= 1;	/* nr of 64-bit words.. */
+			if (count) {
+				unsigned long carry = 0;
+				do {
+					unsigned long w = *(unsigned long *) buff;
+					count--;
+					buff += 8;
+					result += carry;
+					result += w;
+					carry = (w > result);
+				} while (count);
+				result += carry;
+				result = (result & 0xffffffff) + (result >> 32);
+			}
+			if (len & 4) {
+				result += *(unsigned int *) buff;
+				buff += 4;
+			}
+		}
+		if (len & 2) {
+			result += *(unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += *buff;
+	result = from64to16(result);
+	if (odd)
+		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+	return result;
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	return (__force __sum16)~do_csum(iph,ihl*4);
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	unsigned long result = do_csum(buff, len);
+
+	/* add in old sum, and carry.. */
+	result += (__force u32)sum;
+	/* 32+c bits -> 32 bits */
+	result = (result & 0xffffffff) + (result >> 32);
+	return (__force __wsum)result;
+}
+
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+	return (__force __sum16)~from64to16(do_csum(buff,len));
+}
+EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/alpha/lib/clear_page.S b/arch/alpha/lib/clear_page.S
new file mode 100644
index 0000000000..ce02de7b04
--- /dev/null
+++ b/arch/alpha/lib/clear_page.S
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/clear_page.S
+ *
+ * Zero an entire page.
+ */
+#include <asm/export.h>
+	.text
+	.align 4
+	.global clear_page
+	.ent clear_page
+clear_page:
+	.prologue 0
+
+	lda	$0,128
+	nop
+	unop
+	nop
+
+1:	stq	$31,0($16)
+	stq	$31,8($16)
+	stq	$31,16($16)
+	stq	$31,24($16)
+
+	stq	$31,32($16)
+	stq	$31,40($16)
+	stq	$31,48($16)
+	subq	$0,1,$0
+
+	stq	$31,56($16)
+	addq	$16,64,$16
+	unop
+	bne	$0,1b
+
+	ret
+	nop
+	unop
+	nop
+
+	.end clear_page
+	EXPORT_SYMBOL(clear_page)
diff --git a/arch/alpha/lib/clear_user.S b/arch/alpha/lib/clear_user.S
new file mode 100644
index 0000000000..db6c6ca458
--- /dev/null
+++ b/arch/alpha/lib/clear_user.S
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/clear_user.S
+ * Contributed by Richard Henderson <rth@tamu.edu>
+ *
+ * Zero user space, handling exceptions as we go.
+ *
+ * We have to make sure that $0 is always up-to-date and contains the
+ * right "bytes left to zero" value (and that it is updated only _after_
+ * a successful copy).  There is also some rather minor exception setup
+ * stuff.
+ */
+#include <asm/export.h>
+
+/* Allow an exception for an insn; exit if we get one.  */
+#define EX(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table,"a";	\
+	.long 99b - .;			\
+	lda $31, $exception-99b($31); 	\
+	.previous
+
+	.set noat
+	.set noreorder
+	.align 4
+
+	.globl __clear_user
+	.ent __clear_user
+	.frame	$30, 0, $26
+	.prologue 0
+
+$loop:
+	and	$1, 3, $4	# e0    :
+	beq	$4, 1f		# .. e1 :
+
+0:	EX( stq_u $31, 0($16) )	# e0    : zero one word
+	subq	$0, 8, $0	# .. e1 :
+	subq	$4, 1, $4	# e0    :
+	addq	$16, 8, $16	# .. e1 :
+	bne	$4, 0b		# e1    :
+	unop			#       :
+
+1:	bic	$1, 3, $1	# e0    :
+	beq	$1, $tail	# .. e1 :
+
+2:	EX( stq_u $31, 0($16) )	# e0    : zero four words
+	subq	$0, 8, $0	# .. e1 :
+	EX( stq_u $31, 8($16) )	# e0    :
+	subq	$0, 8, $0	# .. e1 :
+	EX( stq_u $31, 16($16) )	# e0    :
+	subq	$0, 8, $0	# .. e1 :
+	EX( stq_u $31, 24($16) )	# e0    :
+	subq	$0, 8, $0	# .. e1 :
+	subq	$1, 4, $1	# e0    :
+	addq	$16, 32, $16	# .. e1 :
+	bne	$1, 2b		# e1    :
+
+$tail:
+	bne	$2, 1f		# e1    : is there a tail to do?
+	ret	$31, ($26), 1	# .. e1 :
+
+1:	EX( ldq_u $5, 0($16) )	# e0    :
+	clr	$0		# .. e1 :
+	nop			# e1    :
+	mskqh	$5, $0, $5	# e0    :
+	EX( stq_u $5, 0($16) )	# e0    :
+	ret	$31, ($26), 1	# .. e1 :
+
+__clear_user:
+	and	$17, $17, $0
+	and	$16, 7, $4	# e0    : find dest misalignment
+	beq	$0, $zerolength # .. e1 :
+	addq	$0, $4, $1	# e0    : bias counter
+	and	$1, 7, $2	# e1    : number of bytes in tail
+	srl	$1, 3, $1	# e0    :
+	beq	$4, $loop	# .. e1 :
+
+	EX( ldq_u $5, 0($16) )	# e0    : load dst word to mask back in
+	beq	$1, $oneword	# .. e1 : sub-word store?
+
+	mskql	$5, $16, $5	# e0    : take care of misaligned head
+	addq	$16, 8, $16	# .. e1 :
+	EX( stq_u $5, -8($16) )	# e0    :
+	addq	$0, $4, $0	# .. e1 : bytes left -= 8 - misalignment
+	subq	$1, 1, $1	# e0    :
+	subq	$0, 8, $0	# .. e1 :
+	br	$loop		# e1    :
+	unop			#       :
+
+$oneword:
+	mskql	$5, $16, $4	# e0    :
+	mskqh	$5, $2, $5	# e0    :
+	or	$5, $4, $5	# e1    :
+	EX( stq_u $5, 0($16) )	# e0    :
+	clr	$0		# .. e1 :
+
+$zerolength:
+$exception:
+	ret	$31, ($26), 1	# .. e1 :
+
+	.end __clear_user
+	EXPORT_SYMBOL(__clear_user)
diff --git a/arch/alpha/lib/copy_page.S b/arch/alpha/lib/copy_page.S
new file mode 100644
index 0000000000..5439a30c77
--- /dev/null
+++ b/arch/alpha/lib/copy_page.S
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/copy_page.S
+ *
+ * Copy an entire page.
+ */
+#include <asm/export.h>
+	.text
+	.align 4
+	.global copy_page
+	.ent copy_page
+copy_page:
+	.prologue 0
+
+	lda	$18,128
+	nop
+	unop
+	nop
+
+1:	ldq	$0,0($17)
+	ldq	$1,8($17)
+	ldq	$2,16($17)
+	ldq	$3,24($17)
+
+	ldq	$4,32($17)
+	ldq	$5,40($17)
+	ldq	$6,48($17)
+	ldq	$7,56($17)
+
+	stq	$0,0($16)
+	subq	$18,1,$18
+	stq	$1,8($16)
+	addq	$17,64,$17
+
+	stq	$2,16($16)
+	stq	$3,24($16)
+	stq	$4,32($16)
+	stq	$5,40($16)
+
+	stq	$6,48($16)
+	stq	$7,56($16)
+	addq	$16,64,$16
+	bne	$18, 1b
+
+	ret
+	nop
+	unop
+	nop
+
+	.end copy_page
+	EXPORT_SYMBOL(copy_page)
diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S
new file mode 100644
index 0000000000..32ab0344b1
--- /dev/null
+++ b/arch/alpha/lib/copy_user.S
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/copy_user.S
+ *
+ * Copy to/from user space, handling exceptions as we go..  This
+ * isn't exactly pretty.
+ *
+ * This is essentially the same as "memcpy()", but with a few twists.
+ * Notably, we have to make sure that $0 is always up-to-date and
+ * contains the right "bytes left to copy" value (and that it is updated
+ * only _after_ a successful copy). There is also some rather minor
+ * exception setup stuff..
+ */
+
+#include <asm/export.h>
+
+/* Allow an exception for an insn; exit if we get one.  */
+#define EXI(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table,"a";	\
+	.long 99b - .;			\
+	lda $31, $exitin-99b($31);	\
+	.previous
+
+#define EXO(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table,"a";	\
+	.long 99b - .;			\
+	lda $31, $exitout-99b($31);	\
+	.previous
+
+	.set noat
+	.align 4
+	.globl __copy_user
+	.ent __copy_user
+__copy_user:
+	.prologue 0
+	mov $18,$0
+	and $16,7,$3
+	beq $0,$35
+	beq $3,$36
+	subq $3,8,$3
+	.align 4
+$37:
+	EXI( ldq_u $1,0($17) )
+	EXO( ldq_u $2,0($16) )
+	extbl $1,$17,$1
+	mskbl $2,$16,$2
+	insbl $1,$16,$1
+	addq $3,1,$3
+	bis $1,$2,$1
+	EXO( stq_u $1,0($16) )
+	subq $0,1,$0
+	addq $16,1,$16
+	addq $17,1,$17
+	beq $0,$41
+	bne $3,$37
+$36:
+	and $17,7,$1
+	bic $0,7,$4
+	beq $1,$43
+	beq $4,$48
+	EXI( ldq_u $3,0($17) )
+	.align 4
+$50:
+	EXI( ldq_u $2,8($17) )
+	subq $4,8,$4
+	extql $3,$17,$3
+	extqh $2,$17,$1
+	bis $3,$1,$1
+	EXO( stq $1,0($16) )
+	addq $17,8,$17
+	subq $0,8,$0
+	addq $16,8,$16
+	bis $2,$2,$3
+	bne $4,$50
+$48:
+	beq $0,$41
+	.align 4
+$57:
+	EXI( ldq_u $1,0($17) )
+	EXO( ldq_u $2,0($16) )
+	extbl $1,$17,$1
+	mskbl $2,$16,$2
+	insbl $1,$16,$1
+	bis $1,$2,$1
+	EXO( stq_u $1,0($16) )
+	subq $0,1,$0
+	addq $16,1,$16
+	addq $17,1,$17
+	bne $0,$57
+	br $31,$41
+	.align 4
+$43:
+	beq $4,$65
+	.align 4
+$66:
+	EXI( ldq $1,0($17) )
+	subq $4,8,$4
+	EXO( stq $1,0($16) )
+	addq $17,8,$17
+	subq $0,8,$0
+	addq $16,8,$16
+	bne $4,$66
+$65:
+	beq $0,$41
+	EXI( ldq $2,0($17) )
+	EXO( ldq $1,0($16) )
+	mskql $2,$0,$2
+	mskqh $1,$0,$1
+	bis $2,$1,$2
+	EXO( stq $2,0($16) )
+	bis $31,$31,$0
+$41:
+$35:
+$exitin:
+$exitout:
+	ret $31,($26),1
+
+	.end __copy_user
+EXPORT_SYMBOL(__copy_user)
diff --git a/arch/alpha/lib/csum_ipv6_magic.S b/arch/alpha/lib/csum_ipv6_magic.S
new file mode 100644
index 0000000000..c7b213ab01
--- /dev/null
+++ b/arch/alpha/lib/csum_ipv6_magic.S
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/csum_ipv6_magic.S
+ * Contributed by Richard Henderson <rth@tamu.edu>
+ *
+ * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
+ *                                struct in6_addr *daddr,
+ *                                __u32 len,
+ *                                unsigned short proto,
+ *                                unsigned int csum);
+ *
+ * Misalignment handling (which costs 16 instructions / 8 cycles)
+ * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ */
+
+#include <asm/export.h>
+	.globl csum_ipv6_magic
+	.align 4
+	.ent csum_ipv6_magic
+	.frame $30,0,$26,0
+csum_ipv6_magic:
+	.prologue 0
+
+	ldq_u	$0,0($16)	# e0    : load src & dst addr words
+	zapnot	$20,15,$20	# .. e1 : zero extend incoming csum
+	extqh	$18,1,$4	# e0    : byte swap len & proto while we wait
+	ldq_u	$21,7($16)	# .. e1 : handle misalignment
+
+	extbl	$18,1,$5	# e0	:
+	ldq_u	$1,8($16)	# .. e1 :
+	extbl	$18,2,$6	# e0 	:
+	ldq_u	$22,15($16)	# .. e1 :
+
+	extbl	$18,3,$18	# e0	:
+	ldq_u	$2,0($17)	# .. e1 :
+	sra	$4,32,$4	# e0	:
+	ldq_u	$23,7($17)	# .. e1 :
+
+	extql	$0,$16,$0	# e0	:
+	ldq_u	$3,8($17)	# .. e1 :
+	extqh	$21,$16,$21	# e0	:
+	ldq_u	$24,15($17)	# .. e1 :
+
+	sll	$5,16,$5	# e0	:
+	or	$0,$21,$0	# .. e1 : 1st src word complete
+	extql	$1,$16,$1	# e0	:
+	addq	$20,$0,$20	# .. e1 : begin summing the words
+
+	extqh	$22,$16,$22	# e0	:
+	cmpult	$20,$0,$0	# .. e1 :
+	sll	$6,8,$6		# e0	:
+	or	$1,$22,$1	# .. e1 : 2nd src word complete
+
+	extql	$2,$17,$2	# e0	:
+	or	$4,$18,$18	# .. e1 :
+	extqh	$23,$17,$23	# e0	:
+	or	$5,$6,$5	# .. e1 :
+
+	extql	$3,$17,$3	# e0	:
+	or	$2,$23,$2	# .. e1 : 1st dst word complete
+	extqh	$24,$17,$24	# e0	:
+	or	$18,$5,$18	# .. e1 : len complete
+
+	extwh	$19,7,$7	# e0    :
+	or	$3,$24,$3	# .. e1 : 2nd dst word complete
+	extbl	$19,1,$19	# e0    :
+	addq	$20,$1,$20	# .. e1 :
+
+	or	$19,$7,$19	# e0    :
+	cmpult	$20,$1,$1	# .. e1 :
+	sll	$19,48,$19	# e0    :
+	nop			# .. e0 :
+
+	sra	$19,32,$19	# e0    : proto complete
+	addq	$20,$2,$20	# .. e1 :
+	cmpult	$20,$2,$2	# e0    :
+	addq	$20,$3,$20	# .. e1 :
+
+	cmpult	$20,$3,$3	# e0    :
+	addq	$20,$18,$20	# .. e1 :
+	cmpult	$20,$18,$18	# e0    :
+	addq	$20,$19,$20	# .. e1 :
+
+	cmpult	$20,$19,$19	# e0    :
+	addq	$0,$1,$0	# .. e1 : merge the carries back into the csum
+	addq	$2,$3,$2	# e0    :
+	addq	$18,$19,$18	# .. e1 :
+
+	addq	$0,$2,$0	# e0    :
+	addq	$20,$18,$20	# .. e1 :
+	addq	$0,$20,$0	# e0    :
+	unop			#       :
+
+	extwl	$0,2,$2		# e0    : begin folding the 64-bit value
+	zapnot	$0,3,$3		# .. e1 :
+	extwl	$0,4,$1		# e0    :
+	addq	$2,$3,$3	# .. e1 :
+
+	extwl	$0,6,$0		# e0    :
+	addq	$3,$1,$3	# .. e1 :
+	addq	$0,$3,$0	# e0    :
+	unop			#       :
+
+	extwl	$0,2,$1		# e0    : fold 18-bit value
+	zapnot	$0,3,$0		# .. e1 :
+	addq	$0,$1,$0	# e0    :
+	unop			#       :
+
+	extwl	$0,2,$1		# e0    : fold 17-bit value
+	zapnot	$0,3,$0		# .. e1 :
+	addq	$0,$1,$0	# e0    :
+	not	$0,$0		# .. e1 : and complement.
+
+	zapnot	$0,3,$0		# e0    :
+	ret			# .. e1 :
+
+	.end csum_ipv6_magic
+	EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c
new file mode 100644
index 0000000000..1931a04af8
--- /dev/null
+++ b/arch/alpha/lib/csum_partial_copy.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * csum_partial_copy - do IP checksumming and copy
+ *
+ * (C) Copyright 1996 Linus Torvalds
+ * accelerated versions (and 21264 assembly versions ) contributed by
+ *	Rick Gorton	<rick.gorton@alpha-processor.com>
+ *
+ * Don't look at this too closely - you'll go mad. The things
+ * we do for performance..
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <net/checksum.h>
+
+
+#define ldq_u(x,y) \
+__asm__ __volatile__("ldq_u %0,%1":"=r" (x):"m" (*(const unsigned long *)(y)))
+
+#define stq_u(x,y) \
+__asm__ __volatile__("stq_u %1,%0":"=m" (*(unsigned long *)(y)):"r" (x))
+
+#define extql(x,y,z) \
+__asm__ __volatile__("extql %1,%2,%0":"=r" (z):"r" (x),"r" (y))
+
+#define extqh(x,y,z) \
+__asm__ __volatile__("extqh %1,%2,%0":"=r" (z):"r" (x),"r" (y))
+
+#define mskql(x,y,z) \
+__asm__ __volatile__("mskql %1,%2,%0":"=r" (z):"r" (x),"r" (y))
+
+#define mskqh(x,y,z) \
+__asm__ __volatile__("mskqh %1,%2,%0":"=r" (z):"r" (x),"r" (y))
+
+#define insql(x,y,z) \
+__asm__ __volatile__("insql %1,%2,%0":"=r" (z):"r" (x),"r" (y))
+
+#define insqh(x,y,z) \
+__asm__ __volatile__("insqh %1,%2,%0":"=r" (z):"r" (x),"r" (y))
+
+#define __get_word(insn,x,ptr)				\
+({							\
+	long __guu_err;					\
+	__asm__ __volatile__(				\
+	"1:	"#insn" %0,%2\n"			\
+	"2:\n"						\
+	EXC(1b,2b,%0,%1)				\
+		: "=r"(x), "=r"(__guu_err)		\
+		: "m"(__m(ptr)), "1"(0));		\
+	__guu_err;					\
+})
+
+static inline unsigned short from64to16(unsigned long x)
+{
+	/* Using extract instructions is a bit more efficient
+	   than the original shift/bitmask version.  */
+
+	union {
+		unsigned long	ul;
+		unsigned int	ui[2];
+		unsigned short	us[4];
+	} in_v, tmp_v, out_v;
+
+	in_v.ul = x;
+	tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1];
+
+	/* Since the bits of tmp_v.sh[3] are going to always be zero,
+	   we don't have to bother to add that in.  */
+	out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1]
+			+ (unsigned long) tmp_v.us[2];
+
+	/* Similarly, out_v.us[2] is always zero for the final add.  */
+	return out_v.us[0] + out_v.us[1];
+}
+
+
+
+/*
+ * Ok. This isn't fun, but this is the EASY case.
+ */
+static inline unsigned long
+csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst,
+			 long len)
+{
+	unsigned long checksum = ~0U;
+	unsigned long carry = 0;
+
+	while (len >= 0) {
+		unsigned long word;
+		if (__get_word(ldq, word, src))
+			return 0;
+		checksum += carry;
+		src++;
+		checksum += word;
+		len -= 8;
+		carry = checksum < word;
+		*dst = word;
+		dst++;
+	}
+	len += 8;
+	checksum += carry;
+	if (len) {
+		unsigned long word, tmp;
+		if (__get_word(ldq, word, src))
+			return 0;
+		tmp = *dst;
+		mskql(word, len, word);
+		checksum += word;
+		mskqh(tmp, len, tmp);
+		carry = checksum < word;
+		*dst = word | tmp;
+		checksum += carry;
+	}
+	return checksum;
+}
+
+/*
+ * This is even less fun, but this is still reasonably
+ * easy.
+ */
+static inline unsigned long
+csum_partial_cfu_dest_aligned(const unsigned long __user *src,
+			      unsigned long *dst,
+			      unsigned long soff,
+			      long len)
+{
+	unsigned long first;
+	unsigned long word, carry;
+	unsigned long lastsrc = 7+len+(unsigned long)src;
+	unsigned long checksum = ~0U;
+
+	if (__get_word(ldq_u, first,src))
+		return 0;
+	carry = 0;
+	while (len >= 0) {
+		unsigned long second;
+
+		if (__get_word(ldq_u, second, src+1))
+			return 0;
+		extql(first, soff, word);
+		len -= 8;
+		src++;
+		extqh(second, soff, first);
+		checksum += carry;
+		word |= first;
+		first = second;
+		checksum += word;
+		*dst = word;
+		dst++;
+		carry = checksum < word;
+	}
+	len += 8;
+	checksum += carry;
+	if (len) {
+		unsigned long tmp;
+		unsigned long second;
+		if (__get_word(ldq_u, second, lastsrc))
+			return 0;
+		tmp = *dst;
+		extql(first, soff, word);
+		extqh(second, soff, first);
+		word |= first;
+		mskql(word, len, word);
+		checksum += word;
+		mskqh(tmp, len, tmp);
+		carry = checksum < word;
+		*dst = word | tmp;
+		checksum += carry;
+	}
+	return checksum;
+}
+
+/*
+ * This is slightly less fun than the above..
+ */
+static inline unsigned long
+csum_partial_cfu_src_aligned(const unsigned long __user *src,
+			     unsigned long *dst,
+			     unsigned long doff,
+			     long len,
+			     unsigned long partial_dest)
+{
+	unsigned long carry = 0;
+	unsigned long word;
+	unsigned long second_dest;
+	unsigned long checksum = ~0U;
+
+	mskql(partial_dest, doff, partial_dest);
+	while (len >= 0) {
+		if (__get_word(ldq, word, src))
+			return 0;
+		len -= 8;
+		insql(word, doff, second_dest);
+		checksum += carry;
+		stq_u(partial_dest | second_dest, dst);
+		src++;
+		checksum += word;
+		insqh(word, doff, partial_dest);
+		carry = checksum < word;
+		dst++;
+	}
+	len += 8;
+	if (len) {
+		checksum += carry;
+		if (__get_word(ldq, word, src))
+			return 0;
+		mskql(word, len, word);
+		len -= 8;
+		checksum += word;
+		insql(word, doff, second_dest);
+		len += doff;
+		carry = checksum < word;
+		partial_dest |= second_dest;
+		if (len >= 0) {
+			stq_u(partial_dest, dst);
+			if (!len) goto out;
+			dst++;
+			insqh(word, doff, partial_dest);
+		}
+		doff = len;
+	}
+	ldq_u(second_dest, dst);
+	mskqh(second_dest, doff, second_dest);
+	stq_u(partial_dest | second_dest, dst);
+out:
+	checksum += carry;
+	return checksum;
+}
+
+/*
+ * This is so totally un-fun that it's frightening. Don't
+ * look at this too closely, you'll go blind.
+ */
+static inline unsigned long
+csum_partial_cfu_unaligned(const unsigned long __user * src,
+			   unsigned long * dst,
+			   unsigned long soff, unsigned long doff,
+			   long len, unsigned long partial_dest)
+{
+	unsigned long carry = 0;
+	unsigned long first;
+	unsigned long lastsrc;
+	unsigned long checksum = ~0U;
+
+	if (__get_word(ldq_u, first, src))
+		return 0;
+	lastsrc = 7+len+(unsigned long)src;
+	mskql(partial_dest, doff, partial_dest);
+	while (len >= 0) {
+		unsigned long second, word;
+		unsigned long second_dest;
+
+		if (__get_word(ldq_u, second, src+1))
+			return 0;
+		extql(first, soff, word);
+		checksum += carry;
+		len -= 8;
+		extqh(second, soff, first);
+		src++;
+		word |= first;
+		first = second;
+		insql(word, doff, second_dest);
+		checksum += word;
+		stq_u(partial_dest | second_dest, dst);
+		carry = checksum < word;
+		insqh(word, doff, partial_dest);
+		dst++;
+	}
+	len += doff;
+	checksum += carry;
+	if (len >= 0) {
+		unsigned long second, word;
+		unsigned long second_dest;
+
+		if (__get_word(ldq_u, second, lastsrc))
+			return 0;
+		extql(first, soff, word);
+		extqh(second, soff, first);
+		word |= first;
+		first = second;
+		mskql(word, len-doff, word);
+		checksum += word;
+		insql(word, doff, second_dest);
+		carry = checksum < word;
+		stq_u(partial_dest | second_dest, dst);
+		if (len) {
+			ldq_u(second_dest, dst+1);
+			insqh(word, doff, partial_dest);
+			mskqh(second_dest, len, second_dest);
+			stq_u(partial_dest | second_dest, dst+1);
+		}
+		checksum += carry;
+	} else {
+		unsigned long second, word;
+		unsigned long second_dest;
+
+		if (__get_word(ldq_u, second, lastsrc))
+			return 0;
+		extql(first, soff, word);
+		extqh(second, soff, first);
+		word |= first;
+		ldq_u(second_dest, dst);
+		mskql(word, len-doff, word);
+		checksum += word;
+		mskqh(second_dest, len, second_dest);
+		carry = checksum < word;
+		insql(word, doff, word);
+		stq_u(partial_dest | word | second_dest, dst);
+		checksum += carry;
+	}
+	return checksum;
+}
+
+static __wsum __csum_and_copy(const void __user *src, void *dst, int len)
+{
+	unsigned long soff = 7 & (unsigned long) src;
+	unsigned long doff = 7 & (unsigned long) dst;
+	unsigned long checksum;
+
+	if (!doff) {
+		if (!soff)
+			checksum = csum_partial_cfu_aligned(
+				(const unsigned long __user *) src,
+				(unsigned long *) dst, len-8);
+		else
+			checksum = csum_partial_cfu_dest_aligned(
+				(const unsigned long __user *) src,
+				(unsigned long *) dst,
+				soff, len-8);
+	} else {
+		unsigned long partial_dest;
+		ldq_u(partial_dest, dst);
+		if (!soff)
+			checksum = csum_partial_cfu_src_aligned(
+				(const unsigned long __user *) src,
+				(unsigned long *) dst,
+				doff, len-8, partial_dest);
+		else
+			checksum = csum_partial_cfu_unaligned(
+				(const unsigned long __user *) src,
+				(unsigned long *) dst,
+				soff, doff, len-8, partial_dest);
+	}
+	return (__force __wsum)from64to16 (checksum);
+}
+
+__wsum
+csum_and_copy_from_user(const void __user *src, void *dst, int len)
+{
+	if (!access_ok(src, len))
+		return 0;
+	return __csum_and_copy(src, dst, len);
+}
+EXPORT_SYMBOL(csum_and_copy_from_user);
+
+__wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+	return __csum_and_copy((__force const void __user *)src,
+						dst, len);
+}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/alpha/lib/dbg_current.S b/arch/alpha/lib/dbg_current.S
new file mode 100644
index 0000000000..be66121312
--- /dev/null
+++ b/arch/alpha/lib/dbg_current.S
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/dbg_current.S
+ * Contributed by Richard Henderson (rth@cygnus.com)
+ *
+ * Trap if we find current not correct.
+ */
+
+#include <asm/pal.h>
+
+	.text
+	.set noat
+
+	.globl _mcount
+	.ent _mcount
+_mcount:
+	.frame $30, 0, $28, 0
+	.prologue 0
+
+	lda	$0, -0x4000($30)
+	cmpult	$8, $30, $1
+	cmpule	$0, $30, $2
+	and	$1, $2, $3
+	bne	$3, 1f
+
+	call_pal PAL_bugchk
+
+1:	ret	$31, ($28), 1
+
+	.end _mcount
diff --git a/arch/alpha/lib/dbg_stackcheck.S b/arch/alpha/lib/dbg_stackcheck.S
new file mode 100644
index 0000000000..b3b6fc94f7
--- /dev/null
+++ b/arch/alpha/lib/dbg_stackcheck.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/dbg_stackcheck.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Verify that we have not overflowed the stack.  Oops if we have.
+ */
+
+#include <asm/asm-offsets.h>
+
+	.text
+	.set noat
+
+	.align 3
+	.globl _mcount
+	.ent _mcount
+_mcount:
+	.frame $30, 0, $28, 0
+	.prologue 0
+
+	lda	$0, TASK_SIZE($8)
+	cmpult	$30, $0, $0
+	bne	$0, 1f
+	ret	($28)
+1:	stq	$31, -8($31)	# oops me, damn it.
+	br	1b
+
+	.end _mcount
diff --git a/arch/alpha/lib/dbg_stackkill.S b/arch/alpha/lib/dbg_stackkill.S
new file mode 100644
index 0000000000..6d9197e52a
--- /dev/null
+++ b/arch/alpha/lib/dbg_stackkill.S
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/dbg_stackkill.S
+ * Contributed by Richard Henderson (rth@cygnus.com)
+ *
+ * Clobber the balance of the kernel stack, hoping to catch
+ * uninitialized local variables in the act.
+ */
+
+#include <asm/asm-offsets.h>
+
+	.text
+	.set noat
+
+	.align 5
+	.globl _mcount
+	.ent _mcount
+_mcount:
+	.frame $30, 0, $28, 0
+	.prologue 0
+
+	ldi	$0, 0xdeadbeef
+	lda	$2, -STACK_SIZE
+	sll	$0, 32, $1
+	and	$30, $2, $2
+	or	$0, $1, $0
+	lda	$2, TASK_SIZE($2)
+	cmpult	$2, $30, $1
+	beq	$1, 2f
+1:	stq	$0, 0($2)
+	addq	$2, 8, $2
+	cmpult	$2, $30, $1
+	bne	$1, 1b
+2:	ret	($28)
+
+	.end _mcount
diff --git a/arch/alpha/lib/divide.S b/arch/alpha/lib/divide.S
new file mode 100644
index 0000000000..2b60eb45e5
--- /dev/null
+++ b/arch/alpha/lib/divide.S
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/divide.S
+ *
+ * (C) 1995 Linus Torvalds
+ *
+ * Alpha division..
+ */
+
+/*
+ * The alpha chip doesn't provide hardware division, so we have to do it
+ * by hand.  The compiler expects the functions
+ *
+ *	__divqu: 64-bit unsigned long divide
+ *	__remqu: 64-bit unsigned long remainder
+ *	__divqs/__remqs: signed 64-bit
+ *	__divlu/__remlu: unsigned 32-bit
+ *	__divls/__remls: signed 32-bit
+ *
+ * These are not normal C functions: instead of the normal
+ * calling sequence, these expect their arguments in registers
+ * $24 and $25, and return the result in $27. Register $28 may
+ * be clobbered (assembly temporary), anything else must be saved. 
+ *
+ * In short: painful.
+ *
+ * This is a rather simple bit-at-a-time algorithm: it's very good
+ * at dividing random 64-bit numbers, but the more usual case where
+ * the divisor is small is handled better by the DEC algorithm
+ * using lookup tables. This uses much less memory, though, and is
+ * nicer on the cache.. Besides, I don't know the copyright status
+ * of the DEC code.
+ */
+
+/*
+ * My temporaries:
+ *	$0 - current bit
+ *	$1 - shifted divisor
+ *	$2 - modulus/quotient
+ *
+ *	$23 - return address
+ *	$24 - dividend
+ *	$25 - divisor
+ *
+ *	$27 - quotient/modulus
+ *	$28 - compare status
+ */
+
+#include <asm/export.h>
+#define halt .long 0
+
+/*
+ * Select function type and registers
+ */
+#define mask	$0
+#define divisor	$1
+#define compare $28
+#define tmp1	$3
+#define tmp2	$4
+
+#ifdef DIV
+#define DIV_ONLY(x,y...) x,##y
+#define MOD_ONLY(x,y...)
+#define func(x) __div##x
+#define modulus $2
+#define quotient $27
+#define GETSIGN(x) xor $24,$25,x
+#define STACK 48
+#else
+#define DIV_ONLY(x,y...)
+#define MOD_ONLY(x,y...) x,##y
+#define func(x) __rem##x
+#define modulus $27
+#define quotient $2
+#define GETSIGN(x) bis $24,$24,x
+#define STACK 32
+#endif
+
+/*
+ * For 32-bit operations, we need to extend to 64-bit
+ */
+#ifdef INTSIZE
+#define ufunction func(lu)
+#define sfunction func(l)
+#define LONGIFY(x) zapnot x,15,x
+#define SLONGIFY(x) addl x,0,x
+#else
+#define ufunction func(qu)
+#define sfunction func(q)
+#define LONGIFY(x)
+#define SLONGIFY(x)
+#endif
+
+.set noat
+.align	3
+.globl	ufunction
+.ent	ufunction
+ufunction:
+	subq	$30,STACK,$30
+	.frame	$30,STACK,$23
+	.prologue 0
+
+7:	stq	$1, 0($30)
+	bis	$25,$25,divisor
+	stq	$2, 8($30)
+	bis	$24,$24,modulus
+	stq	$0,16($30)
+	bis	$31,$31,quotient
+	LONGIFY(divisor)
+	stq	tmp1,24($30)
+	LONGIFY(modulus)
+	bis	$31,1,mask
+	DIV_ONLY(stq tmp2,32($30))
+	beq	divisor, 9f			/* div by zero */
+
+#ifdef INTSIZE
+	/*
+	 * shift divisor left, using 3-bit shifts for
+	 * 32-bit divides as we can't overflow. Three-bit
+	 * shifts will result in looping three times less
+	 * here, but can result in two loops more later.
+	 * Thus using a large shift isn't worth it (and
+	 * s8add pairs better than a sll..)
+	 */
+1:	cmpult	divisor,modulus,compare
+	s8addq	divisor,$31,divisor
+	s8addq	mask,$31,mask
+	bne	compare,1b
+#else
+1:	cmpult	divisor,modulus,compare
+	blt     divisor, 2f
+	addq	divisor,divisor,divisor
+	addq	mask,mask,mask
+	bne	compare,1b
+	unop
+#endif
+
+	/* ok, start to go right again.. */
+2:	DIV_ONLY(addq quotient,mask,tmp2)
+	srl	mask,1,mask
+	cmpule	divisor,modulus,compare
+	subq	modulus,divisor,tmp1
+	DIV_ONLY(cmovne compare,tmp2,quotient)
+	srl	divisor,1,divisor
+	cmovne	compare,tmp1,modulus
+	bne	mask,2b
+
+9:	ldq	$1, 0($30)
+	ldq	$2, 8($30)
+	ldq	$0,16($30)
+	ldq	tmp1,24($30)
+	DIV_ONLY(ldq tmp2,32($30))
+	addq	$30,STACK,$30
+	ret	$31,($23),1
+	.end	ufunction
+EXPORT_SYMBOL(ufunction)
+
+/*
+ * Uhh.. Ugly signed division. I'd rather not have it at all, but
+ * it's needed in some circumstances. There are different ways to
+ * handle this, really. This does:
+ * 	-a / b = a / -b = -(a / b)
+ *	-a % b = -(a % b)
+ *	a % -b = a % b
+ * which is probably not the best solution, but at least should
+ * have the property that (x/y)*y + (x%y) = x.
+ */
+.align 3
+.globl	sfunction
+.ent	sfunction
+sfunction:
+	subq	$30,STACK,$30
+	.frame	$30,STACK,$23
+	.prologue 0
+	bis	$24,$25,$28
+	SLONGIFY($28)
+	bge	$28,7b
+	stq	$24,0($30)
+	subq	$31,$24,$28
+	stq	$25,8($30)
+	cmovlt	$24,$28,$24	/* abs($24) */
+	stq	$23,16($30)
+	subq	$31,$25,$28
+	stq	tmp1,24($30)
+	cmovlt	$25,$28,$25	/* abs($25) */
+	unop
+	bsr	$23,ufunction
+	ldq	$24,0($30)
+	ldq	$25,8($30)
+	GETSIGN($28)
+	subq	$31,$27,tmp1
+	SLONGIFY($28)
+	ldq	$23,16($30)
+	cmovlt	$28,tmp1,$27
+	ldq	tmp1,24($30)
+	addq	$30,STACK,$30
+	ret	$31,($23),1
+	.end	sfunction
+EXPORT_SYMBOL(sfunction)
diff --git a/arch/alpha/lib/ev6-clear_page.S b/arch/alpha/lib/ev6-clear_page.S
new file mode 100644
index 0000000000..325864c815
--- /dev/null
+++ b/arch/alpha/lib/ev6-clear_page.S
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-clear_page.S
+ *
+ * Zero an entire page.
+ */
+#include <asm/export.h>
+        .text
+        .align 4
+        .global clear_page
+        .ent clear_page
+clear_page:
+        .prologue 0
+
+	lda	$0,128
+	lda	$1,125
+	addq	$16,64,$2
+	addq	$16,128,$3
+
+	addq	$16,192,$17
+	wh64	($16)
+	wh64	($2)
+	wh64	($3)
+
+1:	wh64	($17)
+	stq	$31,0($16)
+	subq	$0,1,$0
+	subq	$1,1,$1
+
+	stq	$31,8($16)
+	stq	$31,16($16)
+	addq	$17,64,$2
+	nop
+
+	stq	$31,24($16)
+	stq	$31,32($16)
+	cmovgt	$1,$2,$17
+	nop
+
+	stq	$31,40($16)
+	stq	$31,48($16)
+	nop
+	nop
+
+	stq	$31,56($16)
+	addq	$16,64,$16
+	nop
+	bne	$0,1b
+
+	ret
+	nop
+	nop
+	nop
+
+	.end clear_page
+	EXPORT_SYMBOL(clear_page)
diff --git a/arch/alpha/lib/ev6-clear_user.S b/arch/alpha/lib/ev6-clear_user.S
new file mode 100644
index 0000000000..7e644f83cd
--- /dev/null
+++ b/arch/alpha/lib/ev6-clear_user.S
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-clear_user.S
+ * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Zero user space, handling exceptions as we go.
+ *
+ * We have to make sure that $0 is always up-to-date and contains the
+ * right "bytes left to zero" value (and that it is updated only _after_
+ * a successful copy).  There is also some rather minor exception setup
+ * stuff.
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ * Determining actual stalls (other than slotting) doesn't appear to be easy to do.
+ * From perusing the source code context where this routine is called, it is
+ * a fair assumption that significant fractions of entire pages are zeroed, so
+ * it's going to be worth the effort to hand-unroll a big loop, and use wh64.
+ * ASSUMPTION:
+ *	The believed purpose of only updating $0 after a store is that a signal
+ *	may come along during the execution of this chunk of code, and we don't
+ *	want to leave a hole (and we also want to avoid repeating lots of work)
+ */
+
+#include <asm/export.h>
+/* Allow an exception for an insn; exit if we get one.  */
+#define EX(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table,"a";	\
+	.long 99b - .;			\
+	lda $31, $exception-99b($31); 	\
+	.previous
+
+	.set noat
+	.set noreorder
+	.align 4
+
+	.globl __clear_user
+	.ent __clear_user
+	.frame	$30, 0, $26
+	.prologue 0
+
+				# Pipeline info : Slotting & Comments
+__clear_user:
+	and	$17, $17, $0
+	and	$16, 7, $4	# .. E  .. ..	: find dest head misalignment
+	beq	$0, $zerolength # U  .. .. ..	:  U L U L
+
+	addq	$0, $4, $1	# .. .. .. E	: bias counter
+	and	$1, 7, $2	# .. .. E  ..	: number of misaligned bytes in tail
+# Note - we never actually use $2, so this is a moot computation
+# and we can rewrite this later...
+	srl	$1, 3, $1	# .. E  .. ..	: number of quadwords to clear
+	beq	$4, $headalign	# U  .. .. ..	: U L U L
+
+/*
+ * Head is not aligned.  Write (8 - $4) bytes to head of destination
+ * This means $16 is known to be misaligned
+ */
+	EX( ldq_u $5, 0($16) )	# .. .. .. L	: load dst word to mask back in
+	beq	$1, $onebyte	# .. .. U  ..	: sub-word store?
+	mskql	$5, $16, $5	# .. U  .. ..	: take care of misaligned head
+	addq	$16, 8, $16	# E  .. .. .. 	: L U U L
+
+	EX( stq_u $5, -8($16) )	# .. .. .. L	:
+	subq	$1, 1, $1	# .. .. E  ..	:
+	addq	$0, $4, $0	# .. E  .. ..	: bytes left -= 8 - misalignment
+	subq	$0, 8, $0	# E  .. .. ..	: U L U L
+
+	.align	4
+/*
+ * (The .align directive ought to be a moot point)
+ * values upon initial entry to the loop
+ * $1 is number of quadwords to clear (zero is a valid value)
+ * $2 is number of trailing bytes (0..7) ($2 never used...)
+ * $16 is known to be aligned 0mod8
+ */
+$headalign:
+	subq	$1, 16, $4	# .. .. .. E	: If < 16, we can not use the huge loop
+	and	$16, 0x3f, $2	# .. .. E  ..	: Forward work for huge loop
+	subq	$2, 0x40, $3	# .. E  .. ..	: bias counter (huge loop)
+	blt	$4, $trailquad	# U  .. .. ..	: U L U L
+
+/*
+ * We know that we're going to do at least 16 quads, which means we are
+ * going to be able to use the large block clear loop at least once.
+ * Figure out how many quads we need to clear before we are 0mod64 aligned
+ * so we can use the wh64 instruction.
+ */
+
+	nop			# .. .. .. E
+	nop			# .. .. E  ..
+	nop			# .. E  .. ..
+	beq	$3, $bigalign	# U  .. .. ..	: U L U L : Aligned 0mod64
+
+$alignmod64:
+	EX( stq_u $31, 0($16) )	# .. .. .. L
+	addq	$3, 8, $3	# .. .. E  ..
+	subq	$0, 8, $0	# .. E  .. ..
+	nop			# E  .. .. ..	: U L U L
+
+	nop			# .. .. .. E
+	subq	$1, 1, $1	# .. .. E  ..
+	addq	$16, 8, $16	# .. E  .. ..
+	blt	$3, $alignmod64	# U  .. .. ..	: U L U L
+
+$bigalign:
+/*
+ * $0 is the number of bytes left
+ * $1 is the number of quads left
+ * $16 is aligned 0mod64
+ * we know that we'll be taking a minimum of one trip through
+ * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
+ * We are _not_ going to update $0 after every single store.  That
+ * would be silly, because there will be cross-cluster dependencies
+ * no matter how the code is scheduled.  By doing it in slightly
+ * staggered fashion, we can still do this loop in 5 fetches
+ * The worse case will be doing two extra quads in some future execution,
+ * in the event of an interrupted clear.
+ * Assumes the wh64 needs to be for 2 trips through the loop in the future
+ * The wh64 is issued on for the starting destination address for trip +2
+ * through the loop, and if there are less than two trips left, the target
+ * address will be for the current trip.
+ */
+	nop			# E :
+	nop			# E :
+	nop			# E :
+	bis	$16,$16,$3	# E : U L U L : Initial wh64 address is dest
+	/* This might actually help for the current trip... */
+
+$do_wh64:
+	wh64	($3)		# .. .. .. L1	: memory subsystem hint
+	subq	$1, 16, $4	# .. .. E  ..	: Forward calculation - repeat the loop?
+	EX( stq_u $31, 0($16) )	# .. L  .. ..
+	subq	$0, 8, $0	# E  .. .. ..	: U L U L
+
+	addq	$16, 128, $3	# E : Target address of wh64
+	EX( stq_u $31, 8($16) )	# L :
+	EX( stq_u $31, 16($16) )	# L :
+	subq	$0, 16, $0	# E : U L L U
+
+	nop			# E :
+	EX( stq_u $31, 24($16) )	# L :
+	EX( stq_u $31, 32($16) )	# L :
+	subq	$0, 168, $5	# E : U L L U : two trips through the loop left?
+	/* 168 = 192 - 24, since we've already completed some stores */
+
+	subq	$0, 16, $0	# E :
+	EX( stq_u $31, 40($16) )	# L :
+	EX( stq_u $31, 48($16) )	# L :
+	cmovlt	$5, $16, $3	# E : U L L U : Latency 2, extra mapping cycle
+
+	subq	$1, 8, $1	# E :
+	subq	$0, 16, $0	# E :
+	EX( stq_u $31, 56($16) )	# L :
+	nop			# E : U L U L
+
+	nop			# E :
+	subq	$0, 8, $0	# E :
+	addq	$16, 64, $16	# E :
+	bge	$4, $do_wh64	# U : U L U L
+
+$trailquad:
+	# zero to 16 quadwords left to store, plus any trailing bytes
+	# $1 is the number of quadwords left to go.
+	# 
+	nop			# .. .. .. E
+	nop			# .. .. E  ..
+	nop			# .. E  .. ..
+	beq	$1, $trailbytes	# U  .. .. ..	: U L U L : Only 0..7 bytes to go
+
+$onequad:
+	EX( stq_u $31, 0($16) )	# .. .. .. L
+	subq	$1, 1, $1	# .. .. E  ..
+	subq	$0, 8, $0	# .. E  .. ..
+	nop			# E  .. .. ..	: U L U L
+
+	nop			# .. .. .. E
+	nop			# .. .. E  ..
+	addq	$16, 8, $16	# .. E  .. ..
+	bgt	$1, $onequad	# U  .. .. ..	: U L U L
+
+	# We have an unknown number of bytes left to go.
+$trailbytes:
+	nop			# .. .. .. E
+	nop			# .. .. E  ..
+	nop			# .. E  .. ..
+	beq	$0, $zerolength	# U  .. .. ..	: U L U L
+
+	# $0 contains the number of bytes left to copy (0..31)
+	# so we will use $0 as the loop counter
+	# We know for a fact that $0 > 0 zero due to previous context
+$onebyte:
+	EX( stb $31, 0($16) )	# .. .. .. L
+	subq	$0, 1, $0	# .. .. E  ..	:
+	addq	$16, 1, $16	# .. E  .. ..	:
+	bgt	$0, $onebyte	# U  .. .. ..	: U L U L
+
+$zerolength:
+$exception:			# Destination for exception recovery(?)
+	nop			# .. .. .. E	:
+	nop			# .. .. E  ..	:
+	nop			# .. E  .. ..	:
+	ret	$31, ($26), 1	# L0 .. .. ..	: L U L U
+	.end __clear_user
+	EXPORT_SYMBOL(__clear_user)
diff --git a/arch/alpha/lib/ev6-copy_page.S b/arch/alpha/lib/ev6-copy_page.S
new file mode 100644
index 0000000000..fd7212c8dc
--- /dev/null
+++ b/arch/alpha/lib/ev6-copy_page.S
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-copy_page.S
+ *
+ * Copy an entire page.
+ */
+
+/* The following comparison of this routine vs the normal copy_page.S
+   was written by an unnamed ev6 hardware designer and forwarded to me
+   via Steven Hobbs <hobbs@steven.zko.dec.com>.
+ 
+   First Problem: STQ overflows.
+   -----------------------------
+
+	It would be nice if EV6 handled every resource overflow efficiently,
+	but for some it doesn't.  Including store queue overflows.  It causes
+	a trap and a restart of the pipe.
+
+	To get around this we sometimes use (to borrow a term from a VSSAD
+	researcher) "aeration".  The idea is to slow the rate at which the
+	processor receives valid instructions by inserting nops in the fetch
+	path.  In doing so, you can prevent the overflow and actually make
+	the code run faster.  You can, of course, take advantage of the fact
+	that the processor can fetch at most 4 aligned instructions per cycle.
+
+	I inserted enough nops to force it to take 10 cycles to fetch the
+	loop code.  In theory, EV6 should be able to execute this loop in
+	9 cycles but I was not able to get it to run that fast -- the initial
+	conditions were such that I could not reach this optimum rate on
+	(chaotic) EV6.  I wrote the code such that everything would issue
+	in order. 
+
+   Second Problem: Dcache index matches.
+   -------------------------------------
+
+	If you are going to use this routine on random aligned pages, there
+	is a 25% chance that the pages will be at the same dcache indices.
+	This results in many nasty memory traps without care.
+
+	The solution is to schedule the prefetches to avoid the memory
+	conflicts.  I schedule the wh64 prefetches farther ahead of the
+	read prefetches to avoid this problem.
+
+   Third Problem: Needs more prefetching.
+   --------------------------------------
+
+	In order to improve the code I added deeper prefetching to take the
+	most advantage of EV6's bandwidth.
+
+	I also prefetched the read stream. Note that adding the read prefetch
+	forced me to add another cycle to the inner-most kernel - up to 11
+	from the original 8 cycles per iteration.  We could improve performance
+	further by unrolling the loop and doing multiple prefetches per cycle.
+
+   I think that the code below will be very robust and fast code for the
+   purposes of copying aligned pages.  It is slower when both source and
+   destination pages are in the dcache, but it is my guess that this is
+   less important than the dcache miss case.  */
+
+#include <asm/export.h>
+	.text
+	.align 4
+	.global copy_page
+	.ent copy_page
+copy_page:
+	.prologue 0
+
+	/* Prefetch 5 read cachelines; write-hint 10 cache lines.  */
+	wh64	($16)
+	ldl	$31,0($17)
+	ldl	$31,64($17)
+	lda	$1,1*64($16)
+
+	wh64	($1)
+	ldl	$31,128($17)
+	ldl	$31,192($17)
+	lda	$1,2*64($16)
+
+	wh64	($1)
+	ldl	$31,256($17)
+	lda	$18,118
+	lda	$1,3*64($16)
+
+	wh64	($1)
+	nop
+	lda	$1,4*64($16)
+	lda	$2,5*64($16)
+
+	wh64	($1)
+	wh64	($2)
+	lda	$1,6*64($16)
+	lda	$2,7*64($16)
+
+	wh64	($1)
+	wh64	($2)
+	lda	$1,8*64($16)
+	lda	$2,9*64($16)
+
+	wh64	($1)
+	wh64	($2)
+	lda	$19,10*64($16)
+	nop
+
+	/* Main prefetching/write-hinting loop.  */
+1:	ldq	$0,0($17)
+	ldq	$1,8($17)
+	unop
+	unop
+
+	unop
+	unop
+	ldq	$2,16($17)
+	ldq	$3,24($17)
+
+	ldq	$4,32($17)
+	ldq	$5,40($17)
+	unop
+	unop
+
+	unop
+	unop
+	ldq	$6,48($17)
+	ldq	$7,56($17)
+
+	ldl	$31,320($17)
+	unop
+	unop
+	unop
+
+	/* This gives the extra cycle of aeration above the minimum.  */
+	unop			
+	unop
+	unop
+	unop
+
+	wh64	($19)
+	unop
+	unop
+	unop
+
+	stq	$0,0($16)
+	subq	$18,1,$18
+	stq	$1,8($16)
+	unop
+
+	unop
+	stq	$2,16($16)
+	addq	$17,64,$17
+	stq	$3,24($16)
+
+	stq	$4,32($16)
+	stq	$5,40($16)
+	addq	$19,64,$19
+	unop
+
+	stq	$6,48($16)
+	stq	$7,56($16)
+	addq	$16,64,$16
+	bne	$18, 1b
+
+	/* Prefetch the final 5 cache lines of the read stream.  */
+	lda	$18,10
+	ldl	$31,320($17)
+	ldl	$31,384($17)
+	ldl	$31,448($17)
+
+	ldl	$31,512($17)
+	ldl	$31,576($17)
+	nop
+	nop
+
+	/* Non-prefetching, non-write-hinting cleanup loop for the
+	   final 10 cache lines.  */
+2:	ldq	$0,0($17)
+	ldq	$1,8($17)
+	ldq	$2,16($17)
+	ldq	$3,24($17)
+
+	ldq	$4,32($17)
+	ldq	$5,40($17)
+	ldq	$6,48($17)
+	ldq	$7,56($17)
+
+	stq	$0,0($16)
+	subq	$18,1,$18
+	stq	$1,8($16)
+	addq	$17,64,$17
+
+	stq	$2,16($16)
+	stq	$3,24($16)
+	stq	$4,32($16)
+	stq	$5,40($16)
+
+	stq	$6,48($16)
+	stq	$7,56($16)
+	addq	$16,64,$16
+	bne	$18, 2b
+
+	ret
+	nop
+	unop
+	nop
+
+	.end copy_page
+	EXPORT_SYMBOL(copy_page)
diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S
new file mode 100644
index 0000000000..f3e4337543
--- /dev/null
+++ b/arch/alpha/lib/ev6-copy_user.S
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-copy_user.S
+ *
+ * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Copy to/from user space, handling exceptions as we go..  This
+ * isn't exactly pretty.
+ *
+ * This is essentially the same as "memcpy()", but with a few twists.
+ * Notably, we have to make sure that $0 is always up-to-date and
+ * contains the right "bytes left to copy" value (and that it is updated
+ * only _after_ a successful copy). There is also some rather minor
+ * exception setup stuff..
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ */
+
+#include <asm/export.h>
+/* Allow an exception for an insn; exit if we get one.  */
+#define EXI(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table,"a";	\
+	.long 99b - .;			\
+	lda $31, $exitin-99b($31);	\
+	.previous
+
+#define EXO(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table,"a";	\
+	.long 99b - .;			\
+	lda $31, $exitout-99b($31);	\
+	.previous
+
+	.set noat
+	.align 4
+	.globl __copy_user
+	.ent __copy_user
+				# Pipeline info: Slotting & Comments
+__copy_user:
+	.prologue 0
+	mov $18, $0		# .. .. .. E
+	subq $18, 32, $1	# .. .. E. ..	: Is this going to be a small copy?
+	nop			# .. E  .. ..
+	beq $18, $zerolength	# U  .. .. ..	: U L U L
+
+	and $16,7,$3		# .. .. .. E	: is leading dest misalignment
+	ble $1, $onebyteloop	# .. .. U  ..	: 1st branch : small amount of data
+	beq $3, $destaligned	# .. U  .. ..	: 2nd (one cycle fetcher stall)
+	subq $3, 8, $3		# E  .. .. ..	: L U U L : trip counter
+/*
+ * The fetcher stall also hides the 1 cycle cross-cluster stall for $3 (L --> U)
+ * This loop aligns the destination a byte at a time
+ * We know we have at least one trip through this loop
+ */
+$aligndest:
+	EXI( ldbu $1,0($17) )	# .. .. .. L	: Keep loads separate from stores
+	addq $16,1,$16		# .. .. E  ..	: Section 3.8 in the CWG
+	addq $3,1,$3		# .. E  .. ..	:
+	nop			# E  .. .. ..	: U L U L
+
+/*
+ * the -1 is to compensate for the inc($16) done in a previous quadpack
+ * which allows us zero dependencies within either quadpack in the loop
+ */
+	EXO( stb $1,-1($16) )	# .. .. .. L	:
+	addq $17,1,$17		# .. .. E  ..	: Section 3.8 in the CWG
+	subq $0,1,$0		# .. E  .. ..	:
+	bne $3, $aligndest	# U  .. .. ..	: U L U L
+
+/*
+ * If we fell through into here, we have a minimum of 33 - 7 bytes
+ * If we arrived via branch, we have a minimum of 32 bytes
+ */
+$destaligned:
+	and $17,7,$1		# .. .. .. E	: Check _current_ source alignment
+	bic $0,7,$4		# .. .. E  ..	: number bytes as a quadword loop
+	EXI( ldq_u $3,0($17) )	# .. L  .. ..	: Forward fetch for fallthrough code
+	beq $1,$quadaligned	# U  .. .. ..	: U L U L
+
+/*
+ * In the worst case, we've just executed an ldq_u here from 0($17)
+ * and we'll repeat it once if we take the branch
+ */
+
+/* Misaligned quadword loop - not unrolled.  Leave it that way. */
+$misquad:
+	EXI( ldq_u $2,8($17) )	# .. .. .. L	:
+	subq $4,8,$4		# .. .. E  ..	:
+	extql $3,$17,$3		# .. U  .. ..	:
+	extqh $2,$17,$1		# U  .. .. ..	: U U L L
+
+	bis $3,$1,$1		# .. .. .. E	:
+	EXO( stq $1,0($16) )	# .. .. L  ..	:
+	addq $17,8,$17		# .. E  .. ..	:
+	subq $0,8,$0		# E  .. .. ..	: U L L U
+
+	addq $16,8,$16		# .. .. .. E	:
+	bis $2,$2,$3		# .. .. E  ..	:
+	nop			# .. E  .. ..	:
+	bne $4,$misquad		# U  .. .. ..	: U L U L
+
+	nop			# .. .. .. E
+	nop			# .. .. E  ..
+	nop			# .. E  .. ..
+	beq $0,$zerolength	# U  .. .. ..	: U L U L
+
+/* We know we have at least one trip through the byte loop */
+	EXI ( ldbu $2,0($17) )	# .. .. .. L	: No loads in the same quad
+	addq $16,1,$16		# .. .. E  ..	: as the store (Section 3.8 in CWG)
+	nop			# .. E  .. ..	:
+	br $31, $dirtyentry	# L0 .. .. ..	: L U U L
+/* Do the trailing byte loop load, then hop into the store part of the loop */
+
+/*
+ * A minimum of (33 - 7) bytes to do a quad at a time.
+ * Based upon the usage context, it's worth the effort to unroll this loop
+ * $0 - number of bytes to be moved
+ * $4 - number of bytes to move as quadwords
+ * $16 is current destination address
+ * $17 is current source address
+ */
+$quadaligned:
+	subq	$4, 32, $2	# .. .. .. E	: do not unroll for small stuff
+	nop			# .. .. E  ..
+	nop			# .. E  .. ..
+	blt	$2, $onequad	# U  .. .. ..	: U L U L
+
+/*
+ * There is a significant assumption here that the source and destination
+ * addresses differ by more than 32 bytes.  In this particular case, a
+ * sparsity of registers further bounds this to be a minimum of 8 bytes.
+ * But if this isn't met, then the output result will be incorrect.
+ * Furthermore, due to a lack of available registers, we really can't
+ * unroll this to be an 8x loop (which would enable us to use the wh64
+ * instruction memory hint instruction).
+ */
+$unroll4:
+	EXI( ldq $1,0($17) )	# .. .. .. L
+	EXI( ldq $2,8($17) )	# .. .. L  ..
+	subq	$4,32,$4	# .. E  .. ..
+	nop			# E  .. .. ..	: U U L L
+
+	addq	$17,16,$17	# .. .. .. E
+	EXO( stq $1,0($16) )	# .. .. L  ..
+	EXO( stq $2,8($16) )	# .. L  .. ..
+	subq	$0,16,$0	# E  .. .. ..	: U L L U
+
+	addq	$16,16,$16	# .. .. .. E
+	EXI( ldq $1,0($17) )	# .. .. L  ..
+	EXI( ldq $2,8($17) )	# .. L  .. ..
+	subq	$4, 32, $3	# E  .. .. ..	: U U L L : is there enough for another trip?
+
+	EXO( stq $1,0($16) )	# .. .. .. L
+	EXO( stq $2,8($16) )	# .. .. L  ..
+	subq	$0,16,$0	# .. E  .. ..
+	addq	$17,16,$17	# E  .. .. ..	: U L L U
+
+	nop			# .. .. .. E
+	nop			# .. .. E  ..
+	addq	$16,16,$16	# .. E  .. ..
+	bgt	$3,$unroll4	# U  .. .. ..	: U L U L
+
+	nop
+	nop
+	nop
+	beq	$4, $noquads
+
+$onequad:
+	EXI( ldq $1,0($17) )
+	subq	$4,8,$4
+	addq	$17,8,$17
+	nop
+
+	EXO( stq $1,0($16) )
+	subq	$0,8,$0
+	addq	$16,8,$16
+	bne	$4,$onequad
+
+$noquads:
+	nop
+	nop
+	nop
+	beq $0,$zerolength
+
+/*
+ * For small copies (or the tail of a larger copy), do a very simple byte loop.
+ * There's no point in doing a lot of complex alignment calculations to try to
+ * to quadword stuff for a small amount of data.
+ *	$0 - remaining number of bytes left to copy
+ *	$16 - current dest addr
+ *	$17 - current source addr
+ */
+
+$onebyteloop:
+	EXI ( ldbu $2,0($17) )	# .. .. .. L	: No loads in the same quad
+	addq $16,1,$16		# .. .. E  ..	: as the store (Section 3.8 in CWG)
+	nop			# .. E  .. ..	:
+	nop			# E  .. .. ..	: U L U L
+
+$dirtyentry:
+/*
+ * the -1 is to compensate for the inc($16) done in a previous quadpack
+ * which allows us zero dependencies within either quadpack in the loop
+ */
+	EXO ( stb $2,-1($16) )	# .. .. .. L	:
+	addq $17,1,$17		# .. .. E  ..	: quadpack as the load
+	subq $0,1,$0		# .. E  .. ..	: change count _after_ copy
+	bgt $0,$onebyteloop	# U  .. .. ..	: U L U L
+
+$zerolength:
+$exitin:
+$exitout:			# Destination for exception recovery(?)
+	nop			# .. .. .. E
+	nop			# .. .. E  ..
+	nop			# .. E  .. ..
+	ret $31,($26),1		# L0 .. .. ..	: L U L U
+
+	.end __copy_user
+	EXPORT_SYMBOL(__copy_user)
diff --git a/arch/alpha/lib/ev6-csum_ipv6_magic.S b/arch/alpha/lib/ev6-csum_ipv6_magic.S
new file mode 100644
index 0000000000..9a73f90700
--- /dev/null
+++ b/arch/alpha/lib/ev6-csum_ipv6_magic.S
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-csum_ipv6_magic.S
+ * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
+ *                                struct in6_addr *daddr,
+ *                                __u32 len,
+ *                                unsigned short proto,
+ *                                unsigned int csum);
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ * Determining actual stalls (other than slotting) doesn't appear to be easy to do.
+ *
+ * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
+ *                                struct in6_addr *daddr,
+ *                                __u32 len,
+ *                                unsigned short proto,
+ *                                unsigned int csum);
+ *
+ * Swap <proto> (takes form 0xaabb)
+ * Then shift it left by 48, so result is:
+ *	0xbbaa0000 00000000
+ * Then turn it back into a sign extended 32-bit item
+ *	0xbbaa0000
+ *
+ * Swap <len> (an unsigned int) using Mike Burrows' 7-instruction sequence
+ * (we can't hide the 3-cycle latency of the unpkbw in the 6-instruction sequence)
+ * Assume input takes form 0xAABBCCDD
+ *
+ * Finally, original 'folding' approach is to split the long into 4 unsigned shorts
+ * add 4 ushorts, resulting in ushort/carry
+ * add carry bits + ushort --> ushort
+ * add carry bits + ushort --> ushort (in case the carry results in an overflow)
+ * Truncate to a ushort.  (took 13 instructions)
+ * From doing some testing, using the approach in checksum.c:from64to16()
+ * results in the same outcome:
+ * split into 2 uints, add those, generating a ulong
+ * add the 3 low ushorts together, generating a uint
+ * a final add of the 2 lower ushorts
+ * truncating the result.
+ *
+ * Misalignment handling added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ * The cost is 16 instructions (~8 cycles), including two extra loads which
+ * may cause additional delay in rare cases (load-load replay traps).
+ */
+
+#include <asm/export.h>
+	.globl csum_ipv6_magic
+	.align 4
+	.ent csum_ipv6_magic
+	.frame $30,0,$26,0
+csum_ipv6_magic:
+	.prologue 0
+
+	ldq_u	$0,0($16)	# L : Latency: 3
+	inslh	$18,7,$4	# U : 0000000000AABBCC
+	ldq_u	$1,8($16)	# L : Latency: 3
+	sll	$19,8,$7	# U : U L U L : 0x00000000 00aabb00
+
+	and	$16,7,$6	# E : src misalignment
+	ldq_u	$5,15($16)	# L : Latency: 3
+	zapnot	$20,15,$20	# U : zero extend incoming csum
+	ldq_u	$2,0($17)	# L : U L U L : Latency: 3
+
+	extql	$0,$6,$0	# U :
+	extqh	$1,$6,$22	# U :
+	ldq_u	$3,8($17)	# L : Latency: 3
+	sll	$19,24,$19	# U : U U L U : 0x000000aa bb000000
+
+	cmoveq	$6,$31,$22	# E : src aligned?
+	ldq_u	$23,15($17)	# L : Latency: 3
+	inswl	$18,3,$18	# U : 000000CCDD000000
+	addl	$19,$7,$19	# E : U L U L : <sign bits>bbaabb00
+
+	or	$0,$22,$0	# E : 1st src word complete
+	extql	$1,$6,$1	# U :
+	or	$18,$4,$18	# E : 000000CCDDAABBCC
+	extqh	$5,$6,$5	# U : L U L U
+
+	and	$17,7,$6	# E : dst misalignment
+	extql	$2,$6,$2	# U :
+	or	$1,$5,$1	# E : 2nd src word complete
+	extqh	$3,$6,$22	# U : L U L U :
+
+	cmoveq	$6,$31,$22	# E : dst aligned?
+	extql	$3,$6,$3	# U :
+	addq	$20,$0,$20	# E : begin summing the words
+	extqh	$23,$6,$23	# U : L U L U :
+
+	srl	$18,16,$4	# U : 0000000000CCDDAA
+	or	$2,$22,$2	# E : 1st dst word complete
+	zap	$19,0x3,$19	# U : <sign bits>bbaa0000
+	or	$3,$23,$3	# E : U L U L : 2nd dst word complete
+
+	cmpult	$20,$0,$0	# E :
+	addq	$20,$1,$20	# E :
+	zapnot	$18,0xa,$18	# U : 00000000DD00BB00
+	zap	$4,0xa,$4	# U : U U L L : 0000000000CC00AA
+
+	or	$18,$4,$18	# E : 00000000DDCCBBAA
+	nop			# E :
+	cmpult	$20,$1,$1	# E :
+	addq	$20,$2,$20	# E : U L U L
+
+	cmpult	$20,$2,$2	# E :
+	addq	$20,$3,$20	# E :
+	cmpult	$20,$3,$3	# E : (1 cycle stall on $20)
+	addq	$20,$18,$20	# E : U L U L (1 cycle stall on $20)
+
+	cmpult	$20,$18,$18	# E :
+	addq	$20,$19,$20	# E : (1 cycle stall on $20)
+	addq	$0,$1,$0	# E : merge the carries back into the csum
+	addq	$2,$3,$2	# E :
+
+	cmpult	$20,$19,$19	# E :
+	addq	$18,$19,$18	# E : (1 cycle stall on $19)
+	addq	$0,$2,$0	# E :
+	addq	$20,$18,$20	# E : U L U L :
+		/* (1 cycle stall on $18, 2 cycles on $20) */
+
+	addq	$0,$20,$0	# E :
+	zapnot	$0,15,$1	# U : Start folding output (1 cycle stall on $0)
+	nop			# E :
+	srl	$0,32,$0	# U : U L U L : (1 cycle stall on $0)
+
+	addq	$1,$0,$1	# E : Finished generating ulong
+	extwl	$1,2,$2		# U : ushort[1] (1 cycle stall on $1)
+	zapnot	$1,3,$0		# U : ushort[0] (1 cycle stall on $1)
+	extwl	$1,4,$1		# U : ushort[2] (1 cycle stall on $1)
+
+	addq	$0,$2,$0	# E
+	addq	$0,$1,$3	# E : Finished generating uint
+		/* (1 cycle stall on $0) */
+	extwl	$3,2,$1		# U : ushort[1] (1 cycle stall on $3)
+	nop			# E : L U L U
+
+	addq	$1,$3,$0	# E : Final carry
+	not	$0,$4		# E : complement (1 cycle stall on $0)
+	zapnot	$4,3,$0		# U : clear upper garbage bits
+		/* (1 cycle stall on $4) */
+	ret			# L0 : L U L U
+
+	.end csum_ipv6_magic
+	EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/alpha/lib/ev6-divide.S b/arch/alpha/lib/ev6-divide.S
new file mode 100644
index 0000000000..137ff1a073
--- /dev/null
+++ b/arch/alpha/lib/ev6-divide.S
@@ -0,0 +1,263 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-divide.S
+ *
+ * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Alpha division..
+ */
+
+/*
+ * The alpha chip doesn't provide hardware division, so we have to do it
+ * by hand.  The compiler expects the functions
+ *
+ *	__divqu: 64-bit unsigned long divide
+ *	__remqu: 64-bit unsigned long remainder
+ *	__divqs/__remqs: signed 64-bit
+ *	__divlu/__remlu: unsigned 32-bit
+ *	__divls/__remls: signed 32-bit
+ *
+ * These are not normal C functions: instead of the normal
+ * calling sequence, these expect their arguments in registers
+ * $24 and $25, and return the result in $27. Register $28 may
+ * be clobbered (assembly temporary), anything else must be saved. 
+ *
+ * In short: painful.
+ *
+ * This is a rather simple bit-at-a-time algorithm: it's very good
+ * at dividing random 64-bit numbers, but the more usual case where
+ * the divisor is small is handled better by the DEC algorithm
+ * using lookup tables. This uses much less memory, though, and is
+ * nicer on the cache.. Besides, I don't know the copyright status
+ * of the DEC code.
+ */
+
+/*
+ * My temporaries:
+ *	$0 - current bit
+ *	$1 - shifted divisor
+ *	$2 - modulus/quotient
+ *
+ *	$23 - return address
+ *	$24 - dividend
+ *	$25 - divisor
+ *
+ *	$27 - quotient/modulus
+ *	$28 - compare status
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ */
+
+#include <asm/export.h>
+#define halt .long 0
+
+/*
+ * Select function type and registers
+ */
+#define mask	$0
+#define divisor	$1
+#define compare $28
+#define tmp1	$3
+#define tmp2	$4
+
+#ifdef DIV
+#define DIV_ONLY(x,y...) x,##y
+#define MOD_ONLY(x,y...)
+#define func(x) __div##x
+#define modulus $2
+#define quotient $27
+#define GETSIGN(x) xor $24,$25,x
+#define STACK 48
+#else
+#define DIV_ONLY(x,y...)
+#define MOD_ONLY(x,y...) x,##y
+#define func(x) __rem##x
+#define modulus $27
+#define quotient $2
+#define GETSIGN(x) bis $24,$24,x
+#define STACK 32
+#endif
+
+/*
+ * For 32-bit operations, we need to extend to 64-bit
+ */
+#ifdef INTSIZE
+#define ufunction func(lu)
+#define sfunction func(l)
+#define LONGIFY(x) zapnot x,15,x
+#define SLONGIFY(x) addl x,0,x
+#else
+#define ufunction func(qu)
+#define sfunction func(q)
+#define LONGIFY(x)
+#define SLONGIFY(x)
+#endif
+
+.set noat
+.align	4
+.globl	ufunction
+.ent	ufunction
+ufunction:
+	subq	$30,STACK,$30		# E :
+	.frame	$30,STACK,$23
+	.prologue 0
+
+7:	stq	$1, 0($30)		# L :
+	bis	$25,$25,divisor		# E :
+	stq	$2, 8($30)		# L : L U L U
+
+	bis	$24,$24,modulus		# E :
+	stq	$0,16($30)		# L :
+	bis	$31,$31,quotient	# E :
+	LONGIFY(divisor)		# E : U L L U
+
+	stq	tmp1,24($30)		# L :
+	LONGIFY(modulus)		# E :
+	bis	$31,1,mask		# E :
+	DIV_ONLY(stq tmp2,32($30))	# L : L U U L
+
+	beq	divisor, 9f			/* div by zero */
+	/*
+	 * In spite of the DIV_ONLY being either a non-instruction
+	 * or an actual stq, the addition of the .align directive
+	 * below ensures that label 1 is going to be nicely aligned
+	 */
+
+	.align	4
+#ifdef INTSIZE
+	/*
+	 * shift divisor left, using 3-bit shifts for
+	 * 32-bit divides as we can't overflow. Three-bit
+	 * shifts will result in looping three times less
+	 * here, but can result in two loops more later.
+	 * Thus using a large shift isn't worth it (and
+	 * s8add pairs better than a sll..)
+	 */
+1:	cmpult	divisor,modulus,compare	# E :
+	s8addq	divisor,$31,divisor	# E :
+	s8addq	mask,$31,mask		# E :
+	bne	compare,1b		# U : U L U L
+#else
+1:	cmpult	divisor,modulus,compare	# E :
+	nop				# E :
+	nop				# E :
+	blt     divisor, 2f		# U : U L U L
+
+	addq	divisor,divisor,divisor	# E :
+	addq	mask,mask,mask		# E :
+	unop				# E :
+	bne	compare,1b		# U : U L U L
+#endif
+
+	/* ok, start to go right again.. */
+2:
+	/*
+	 * Keep things nicely bundled... use a nop instead of not
+	 * having an instruction for DIV_ONLY
+	 */
+#ifdef DIV
+	DIV_ONLY(addq quotient,mask,tmp2) # E :
+#else
+	nop				# E :
+#endif
+	srl	mask,1,mask		# U :
+	cmpule	divisor,modulus,compare	# E :
+	subq	modulus,divisor,tmp1	# E :
+
+#ifdef DIV
+	DIV_ONLY(cmovne compare,tmp2,quotient)	# E : Latency 2, extra map slot
+	nop				# E : as part of the cmovne
+	srl	divisor,1,divisor	# U :
+	nop				# E : L U L U
+
+	nop				# E :
+	cmovne	compare,tmp1,modulus	# E : Latency 2, extra map slot
+	nop				# E : as part of the cmovne
+	bne	mask,2b			# U : U L U L
+#else
+	srl	divisor,1,divisor	# U :
+	cmovne	compare,tmp1,modulus	# E : Latency 2, extra map slot
+	nop				# E : as part of the cmovne
+	bne	mask,2b			# U : U L L U
+#endif
+
+9:	ldq	$1, 0($30)		# L :
+	ldq	$2, 8($30)		# L :
+	nop				# E :
+	nop				# E : U U L L
+
+	ldq	$0,16($30)		# L :
+	ldq	tmp1,24($30)		# L :
+	nop				# E :
+	nop				# E :
+
+#ifdef DIV
+	DIV_ONLY(ldq tmp2,32($30))	# L :
+#else
+	nop				# E :
+#endif
+	addq	$30,STACK,$30		# E :
+	ret	$31,($23),1		# L0 : L U U L
+	.end	ufunction
+EXPORT_SYMBOL(ufunction)
+
+/*
+ * Uhh.. Ugly signed division. I'd rather not have it at all, but
+ * it's needed in some circumstances. There are different ways to
+ * handle this, really. This does:
+ * 	-a / b = a / -b = -(a / b)
+ *	-a % b = -(a % b)
+ *	a % -b = a % b
+ * which is probably not the best solution, but at least should
+ * have the property that (x/y)*y + (x%y) = x.
+ */
+.align 4
+.globl	sfunction
+.ent	sfunction
+sfunction:
+	subq	$30,STACK,$30		# E :
+	.frame	$30,STACK,$23
+	.prologue 0
+	bis	$24,$25,$28		# E :
+	SLONGIFY($28)			# E :
+	bge	$28,7b			# U :
+
+	stq	$24,0($30)		# L :
+	subq	$31,$24,$28		# E :
+	stq	$25,8($30)		# L :
+	nop				# E : U L U L
+
+	cmovlt	$24,$28,$24	/* abs($24) */ # E : Latency 2, extra map slot
+	nop				# E : as part of the cmov
+	stq	$23,16($30)		# L :
+	subq	$31,$25,$28		# E : U L U L
+
+	stq	tmp1,24($30)		# L :
+	cmovlt	$25,$28,$25	/* abs($25) */ # E : Latency 2, extra map slot
+	nop				# E :
+	bsr	$23,ufunction		# L0: L U L U
+
+	ldq	$24,0($30)		# L :
+	ldq	$25,8($30)		# L :
+	GETSIGN($28)			# E :
+	subq	$31,$27,tmp1		# E : U U L L
+
+	SLONGIFY($28)			# E :
+	ldq	$23,16($30)		# L :
+	cmovlt	$28,tmp1,$27		# E : Latency 2, extra map slot
+	nop				# E : U L L U : as part of the cmov
+
+	ldq	tmp1,24($30)		# L :
+	nop				# E : as part of the cmov
+	addq	$30,STACK,$30		# E :
+	ret	$31,($23),1		# L0 : L U U L
+	.end	sfunction
+EXPORT_SYMBOL(sfunction)
diff --git a/arch/alpha/lib/ev6-memchr.S b/arch/alpha/lib/ev6-memchr.S
new file mode 100644
index 0000000000..56bf9e14ee
--- /dev/null
+++ b/arch/alpha/lib/ev6-memchr.S
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-memchr.S
+ *
+ * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Finds characters in a memory area.  Optimized for the Alpha:
+ *
+ *    - memory accessed as aligned quadwords only
+ *    - uses cmpbge to compare 8 bytes in parallel
+ *    - does binary search to find 0 byte in last
+ *      quadword (HAKMEM needed 12 instructions to
+ *      do this instead of the 9 instructions that
+ *      binary search needs).
+ *
+ * For correctness consider that:
+ *
+ *    - only minimum number of quadwords may be accessed
+ *    - the third argument is an unsigned long
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ */
+#include <asm/export.h>
+        .set noreorder
+        .set noat
+
+	.align	4
+	.globl memchr
+	.ent memchr
+memchr:
+	.frame $30,0,$26,0
+	.prologue 0
+
+	# Hack -- if someone passes in (size_t)-1, hoping to just
+	# search til the end of the address space, we will overflow
+	# below when we find the address of the last byte.  Given
+	# that we will never have a 56-bit address space, cropping
+	# the length is the easiest way to avoid trouble.
+	zap	$18, 0x80, $5	# U : Bound length
+	beq	$18, $not_found	# U :
+        ldq_u   $1, 0($16)	# L : load first quadword Latency=3
+	and	$17, 0xff, $17	# E : L L U U : 00000000000000ch
+
+	insbl	$17, 1, $2	# U : 000000000000ch00
+	cmpult	$18, 9, $4	# E : small (< 1 quad) string?
+	or	$2, $17, $17	# E : 000000000000chch
+        lda     $3, -1($31)	# E : U L L U
+
+	sll	$17, 16, $2	# U : 00000000chch0000
+	addq	$16, $5, $5	# E : Max search address
+	or	$2, $17, $17	# E : 00000000chchchch
+	sll	$17, 32, $2	# U : U L L U : chchchch00000000
+
+	or	$2, $17, $17	# E : chchchchchchchch
+	extql	$1, $16, $7	# U : $7 is upper bits
+	beq	$4, $first_quad	# U :
+	ldq_u	$6, -1($5)	# L : L U U L : eight or less bytes to search Latency=3
+
+	extqh	$6, $16, $6	# U : 2 cycle stall for $6
+	mov	$16, $0		# E :
+	nop			# E :
+	or	$7, $6, $1	# E : L U L U $1 = quadword starting at $16
+
+	# Deal with the case where at most 8 bytes remain to be searched
+	# in $1.  E.g.:
+	#	$18 = 6
+	#	$1 = ????c6c5c4c3c2c1
+$last_quad:
+	negq	$18, $6		# E :
+        xor	$17, $1, $1	# E :
+	srl	$3, $6, $6	# U : $6 = mask of $18 bits set
+        cmpbge  $31, $1, $2	# E : L U L U
+
+	nop
+	nop
+	and	$2, $6, $2	# E :
+        beq     $2, $not_found	# U : U L U L
+
+$found_it:
+#ifdef CONFIG_ALPHA_EV67
+	/*
+	 * Since we are guaranteed to have set one of the bits, we don't
+	 * have to worry about coming back with a 0x40 out of cttz...
+	 */
+	cttz	$2, $3		# U0 :
+	addq	$0, $3, $0	# E : All done
+	nop			# E :
+	ret			# L0 : L U L U
+#else
+	/*
+	 * Slow and clunky.  It can probably be improved.
+	 * An exercise left for others.
+	 */
+        negq    $2, $3		# E :
+        and     $2, $3, $2	# E :
+        and     $2, 0x0f, $1	# E :
+        addq    $0, 4, $3	# E :
+
+        cmoveq  $1, $3, $0	# E : Latency 2, extra map cycle
+	nop			# E : keep with cmov
+        and     $2, 0x33, $1	# E :
+        addq    $0, 2, $3	# E : U L U L : 2 cycle stall on $0
+
+        cmoveq  $1, $3, $0	# E : Latency 2, extra map cycle
+	nop			# E : keep with cmov
+        and     $2, 0x55, $1	# E :
+        addq    $0, 1, $3	# E : U L U L : 2 cycle stall on $0
+
+        cmoveq  $1, $3, $0	# E : Latency 2, extra map cycle
+	nop
+	nop
+	ret			# L0 : L U L U
+#endif
+
+	# Deal with the case where $18 > 8 bytes remain to be
+	# searched.  $16 may not be aligned.
+	.align 4
+$first_quad:
+	andnot	$16, 0x7, $0	# E :
+        insqh   $3, $16, $2	# U : $2 = 0000ffffffffffff ($16<0:2> ff)
+        xor	$1, $17, $1	# E :
+	or	$1, $2, $1	# E : U L U L $1 = ====ffffffffffff
+
+        cmpbge  $31, $1, $2	# E :
+        bne     $2, $found_it	# U :
+	# At least one byte left to process.
+	ldq	$1, 8($0)	# L :
+	subq	$5, 1, $18	# E : U L U L
+
+	addq	$0, 8, $0	# E :
+	# Make $18 point to last quad to be accessed (the
+	# last quad may or may not be partial).
+	andnot	$18, 0x7, $18	# E :
+	cmpult	$0, $18, $2	# E :
+	beq	$2, $final	# U : U L U L
+
+	# At least two quads remain to be accessed.
+
+	subq	$18, $0, $4	# E : $4 <- nr quads to be processed
+	and	$4, 8, $4	# E : odd number of quads?
+	bne	$4, $odd_quad_count # U :
+	# At least three quads remain to be accessed
+	mov	$1, $4		# E : L U L U : move prefetched value to correct reg
+
+	.align	4
+$unrolled_loop:
+	ldq	$1, 8($0)	# L : prefetch $1
+	xor	$17, $4, $2	# E :
+	cmpbge	$31, $2, $2	# E :
+	bne	$2, $found_it	# U : U L U L
+
+	addq	$0, 8, $0	# E :
+	nop			# E :
+	nop			# E :
+	nop			# E :
+
+$odd_quad_count:
+	xor	$17, $1, $2	# E :
+	ldq	$4, 8($0)	# L : prefetch $4
+	cmpbge	$31, $2, $2	# E :
+	addq	$0, 8, $6	# E :
+
+	bne	$2, $found_it	# U :
+	cmpult	$6, $18, $6	# E :
+	addq	$0, 8, $0	# E :
+	nop			# E :
+
+	bne	$6, $unrolled_loop # U :
+	mov	$4, $1		# E : move prefetched value into $1
+	nop			# E :
+	nop			# E :
+
+$final:	subq	$5, $0, $18	# E : $18 <- number of bytes left to do
+	nop			# E :
+	nop			# E :
+	bne	$18, $last_quad	# U :
+
+$not_found:
+	mov	$31, $0		# E :
+	nop			# E :
+	nop			# E :
+	ret			# L0 :
+
+        .end memchr
+	EXPORT_SYMBOL(memchr)
diff --git a/arch/alpha/lib/ev6-memcpy.S b/arch/alpha/lib/ev6-memcpy.S
new file mode 100644
index 0000000000..ffbd056b6e
--- /dev/null
+++ b/arch/alpha/lib/ev6-memcpy.S
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-memcpy.S
+ * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Reasonably optimized memcpy() routine for the Alpha 21264
+ *
+ *	- memory accessed as aligned quadwords only
+ *	- uses bcmpge to compare 8 bytes in parallel
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ *
+ * Temp usage notes:
+ *	$1,$2,		- scratch
+ */
+#include <asm/export.h>
+	.set noreorder
+	.set noat
+
+	.align	4
+	.globl memcpy
+	.ent memcpy
+memcpy:
+	.frame $30,0,$26,0
+	.prologue 0
+
+	mov	$16, $0			# E : copy dest to return
+	ble	$18, $nomoredata	# U : done with the copy?
+	xor	$16, $17, $1		# E : are source and dest alignments the same?
+	and	$1, 7, $1		# E : are they the same mod 8?
+
+	bne	$1, $misaligned		# U : Nope - gotta do this the slow way
+	/* source and dest are same mod 8 address */
+	and	$16, 7, $1		# E : Are both 0mod8?
+	beq	$1, $both_0mod8		# U : Yes
+	nop				# E :
+
+	/*
+	 * source and dest are same misalignment.  move a byte at a time
+	 * until a 0mod8 alignment for both is reached.
+	 * At least one byte more to move
+	 */
+
+$head_align:
+	ldbu	$1, 0($17)		# L : grab a byte
+	subq	$18, 1, $18		# E : count--
+	addq	$17, 1, $17		# E : src++
+	stb	$1, 0($16)		# L :
+	addq	$16, 1, $16		# E : dest++
+	and	$16, 7, $1		# E : Are we at 0mod8 yet?
+	ble	$18, $nomoredata	# U : done with the copy?
+	bne	$1, $head_align		# U :
+
+$both_0mod8:
+	cmple	$18, 127, $1		# E : Can we unroll the loop?
+	bne	$1, $no_unroll		# U :
+	and	$16, 63, $1		# E : get mod64 alignment
+	beq	$1, $do_unroll		# U : no single quads to fiddle
+
+$single_head_quad:
+	ldq	$1, 0($17)		# L : get 8 bytes
+	subq	$18, 8, $18		# E : count -= 8
+	addq	$17, 8, $17		# E : src += 8
+	nop				# E :
+
+	stq	$1, 0($16)		# L : store
+	addq	$16, 8, $16		# E : dest += 8
+	and	$16, 63, $1		# E : get mod64 alignment
+	bne	$1, $single_head_quad	# U : still not fully aligned
+
+$do_unroll:
+	addq	$16, 64, $7		# E : Initial (+1 trip) wh64 address
+	cmple	$18, 127, $1		# E : Can we go through the unrolled loop?
+	bne	$1, $tail_quads		# U : Nope
+	nop				# E : 
+
+$unroll_body:
+	wh64	($7)			# L1 : memory subsystem hint: 64 bytes at
+					# ($7) are about to be over-written
+	ldq	$6, 0($17)		# L0 : bytes 0..7
+	nop				# E :
+	nop				# E :
+
+	ldq	$4, 8($17)		# L : bytes 8..15
+	ldq	$5, 16($17)		# L : bytes 16..23
+	addq	$7, 64, $7		# E : Update next wh64 address
+	nop				# E :
+
+	ldq	$3, 24($17)		# L : bytes 24..31
+	addq	$16, 64, $1		# E : fallback value for wh64
+	nop				# E :
+	nop				# E :
+
+	addq	$17, 32, $17		# E : src += 32 bytes
+	stq	$6, 0($16)		# L : bytes 0..7
+	nop				# E :
+	nop				# E :
+
+	stq	$4, 8($16)		# L : bytes 8..15
+	stq	$5, 16($16)		# L : bytes 16..23
+	subq	$18, 192, $2		# E : At least two more trips to go?
+	nop				# E :
+
+	stq	$3, 24($16)		# L : bytes 24..31
+	addq	$16, 32, $16		# E : dest += 32 bytes
+	nop				# E :
+	nop				# E :
+
+	ldq	$6, 0($17)		# L : bytes 0..7
+	ldq	$4, 8($17)		# L : bytes 8..15
+	cmovlt	$2, $1, $7		# E : Latency 2, extra map slot - Use
+					# fallback wh64 address if < 2 more trips
+	nop				# E :
+
+	ldq	$5, 16($17)		# L : bytes 16..23
+	ldq	$3, 24($17)		# L : bytes 24..31
+	addq	$16, 32, $16		# E : dest += 32
+	subq	$18, 64, $18		# E : count -= 64
+
+	addq	$17, 32, $17		# E : src += 32
+	stq	$6, -32($16)		# L : bytes 0..7
+	stq	$4, -24($16)		# L : bytes 8..15
+	cmple	$18, 63, $1		# E : At least one more trip?
+
+	stq	$5, -16($16)		# L : bytes 16..23
+	stq	$3, -8($16)		# L : bytes 24..31
+	nop				# E :
+	beq	$1, $unroll_body
+
+$tail_quads:
+$no_unroll:
+	.align 4
+	subq	$18, 8, $18		# E : At least a quad left?
+	blt	$18, $less_than_8	# U : Nope
+	nop				# E :
+	nop				# E :
+
+$move_a_quad:
+	ldq	$1, 0($17)		# L : fetch 8
+	subq	$18, 8, $18		# E : count -= 8
+	addq	$17, 8, $17		# E : src += 8
+	nop				# E :
+
+	stq	$1, 0($16)		# L : store 8
+	addq	$16, 8, $16		# E : dest += 8
+	bge	$18, $move_a_quad	# U :
+	nop				# E :
+
+$less_than_8:
+	.align 4
+	addq	$18, 8, $18		# E : add back for trailing bytes
+	ble	$18, $nomoredata	# U : All-done
+	nop				# E :
+	nop				# E :
+
+	/* Trailing bytes */
+$tail_bytes:
+	subq	$18, 1, $18		# E : count--
+	ldbu	$1, 0($17)		# L : fetch a byte
+	addq	$17, 1, $17		# E : src++
+	nop				# E :
+
+	stb	$1, 0($16)		# L : store a byte
+	addq	$16, 1, $16		# E : dest++
+	bgt	$18, $tail_bytes	# U : more to be done?
+	nop				# E :
+
+	/* branching to exit takes 3 extra cycles, so replicate exit here */
+	ret	$31, ($26), 1		# L0 :
+	nop				# E :
+	nop				# E :
+	nop				# E :
+
+$misaligned:
+	mov	$0, $4			# E : dest temp
+	and	$0, 7, $1		# E : dest alignment mod8
+	beq	$1, $dest_0mod8		# U : life doesnt totally suck
+	nop
+
+$aligndest:
+	ble	$18, $nomoredata	# U :
+	ldbu	$1, 0($17)		# L : fetch a byte
+	subq	$18, 1, $18		# E : count--
+	addq	$17, 1, $17		# E : src++
+
+	stb	$1, 0($4)		# L : store it
+	addq	$4, 1, $4		# E : dest++
+	and	$4, 7, $1		# E : dest 0mod8 yet?
+	bne	$1, $aligndest		# U : go until we are aligned.
+
+	/* Source has unknown alignment, but dest is known to be 0mod8 */
+$dest_0mod8:
+	subq	$18, 8, $18		# E : At least a quad left?
+	blt	$18, $misalign_tail	# U : Nope
+	ldq_u	$3, 0($17)		# L : seed (rotating load) of 8 bytes
+	nop				# E :
+
+$mis_quad:
+	ldq_u	$16, 8($17)		# L : Fetch next 8
+	extql	$3, $17, $3		# U : masking
+	extqh	$16, $17, $1		# U : masking
+	bis	$3, $1, $1		# E : merged bytes to store
+
+	subq	$18, 8, $18		# E : count -= 8
+	addq	$17, 8, $17		# E : src += 8
+	stq	$1, 0($4)		# L : store 8 (aligned)
+	mov	$16, $3			# E : "rotate" source data
+
+	addq	$4, 8, $4		# E : dest += 8
+	bge	$18, $mis_quad		# U : More quads to move
+	nop
+	nop
+
+$misalign_tail:
+	addq	$18, 8, $18		# E : account for tail stuff
+	ble	$18, $nomoredata	# U :
+	nop
+	nop
+
+$misalign_byte:
+	ldbu	$1, 0($17)		# L : fetch 1
+	subq	$18, 1, $18		# E : count--
+	addq	$17, 1, $17		# E : src++
+	nop				# E :
+
+	stb	$1, 0($4)		# L : store
+	addq	$4, 1, $4		# E : dest++
+	bgt	$18, $misalign_byte	# U : more to go?
+	nop
+
+
+$nomoredata:
+	ret	$31, ($26), 1		# L0 :
+	nop				# E :
+	nop				# E :
+	nop				# E :
+
+	.end memcpy
+	EXPORT_SYMBOL(memcpy)
+
+/* For backwards module compatibility.  */
+__memcpy = memcpy
+.globl __memcpy
diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S
new file mode 100644
index 0000000000..1cfcfbbea6
--- /dev/null
+++ b/arch/alpha/lib/ev6-memset.S
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-memset.S
+ *
+ * This is an efficient (and relatively small) implementation of the C library
+ * "memset()" function for the 21264 implementation of Alpha.
+ *
+ * 21264 version  contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * The algorithm for the leading and trailing quadwords remains the same,
+ * however the loop has been unrolled to enable better memory throughput,
+ * and the code has been replicated for each of the entry points: __memset
+ * and __memset16 to permit better scheduling to eliminate the stalling
+ * encountered during the mask replication.
+ * A future enhancement might be to put in a byte store loop for really
+ * small (say < 32 bytes) memset()s.  Whether or not that change would be
+ * a win in the kernel would depend upon the contextual usage.
+ * WARNING: Maintaining this is going to be more work than the above version,
+ * as fixes will need to be made in multiple places.  The performance gain
+ * is worth it.
+ */
+#include <asm/export.h>
+	.set noat
+	.set noreorder
+.text
+	.globl memset
+	.globl __memset
+	.globl ___memset
+	.globl __memset16
+	.globl __constant_c_memset
+
+	.ent ___memset
+.align 5
+___memset:
+	.frame $30,0,$26,0
+	.prologue 0
+
+	/*
+	 * Serious stalling happens.  The only way to mitigate this is to
+	 * undertake a major re-write to interleave the constant materialization
+	 * with other parts of the fall-through code.  This is important, even
+	 * though it makes maintenance tougher.
+	 * Do this later.
+	 */
+	and $17,255,$1		# E : 00000000000000ch
+	insbl $17,1,$2		# U : 000000000000ch00
+	bis $16,$16,$0		# E : return value
+	ble $18,end_b		# U : zero length requested?
+
+	addq $18,$16,$6		# E : max address to write to
+	bis	$1,$2,$17	# E : 000000000000chch
+	insbl	$1,2,$3		# U : 0000000000ch0000
+	insbl	$1,3,$4		# U : 00000000ch000000
+
+	or	$3,$4,$3	# E : 00000000chch0000
+	inswl	$17,4,$5	# U : 0000chch00000000
+	xor	$16,$6,$1	# E : will complete write be within one quadword?
+	inswl	$17,6,$2	# U : chch000000000000
+
+	or	$17,$3,$17	# E : 00000000chchchch
+	or	$2,$5,$2	# E : chchchch00000000
+	bic	$1,7,$1		# E : fit within a single quadword?
+	and	$16,7,$3	# E : Target addr misalignment
+
+	or	$17,$2,$17	# E : chchchchchchchch
+	beq	$1,within_quad_b # U :
+	nop			# E :
+	beq	$3,aligned_b	# U : target is 0mod8
+
+	/*
+	 * Target address is misaligned, and won't fit within a quadword
+	 */
+	ldq_u $4,0($16)		# L : Fetch first partial
+	bis $16,$16,$5		# E : Save the address
+	insql $17,$16,$2	# U : Insert new bytes
+	subq $3,8,$3		# E : Invert (for addressing uses)
+
+	addq $18,$3,$18		# E : $18 is new count ($3 is negative)
+	mskql $4,$16,$4		# U : clear relevant parts of the quad
+	subq $16,$3,$16		# E : $16 is new aligned destination
+	bis $2,$4,$1		# E : Final bytes
+
+	nop
+	stq_u $1,0($5)		# L : Store result
+	nop
+	nop
+
+.align 4
+aligned_b:
+	/*
+	 * We are now guaranteed to be quad aligned, with at least
+	 * one partial quad to write.
+	 */
+
+	sra $18,3,$3		# U : Number of remaining quads to write
+	and $18,7,$18		# E : Number of trailing bytes to write
+	bis $16,$16,$5		# E : Save dest address
+	beq $3,no_quad_b	# U : tail stuff only
+
+	/*
+	 * it's worth the effort to unroll this and use wh64 if possible
+	 * Lifted a bunch of code from clear_user.S
+	 * At this point, entry values are:
+	 * $16	Current destination address
+	 * $5	A copy of $16
+	 * $6	The max quadword address to write to
+	 * $18	Number trailer bytes
+	 * $3	Number quads to write
+	 */
+
+	and	$16, 0x3f, $2	# E : Forward work (only useful for unrolled loop)
+	subq	$3, 16, $4	# E : Only try to unroll if > 128 bytes
+	subq	$2, 0x40, $1	# E : bias counter (aligning stuff 0mod64)
+	blt	$4, loop_b	# U :
+
+	/*
+	 * We know we've got at least 16 quads, minimum of one trip
+	 * through unrolled loop.  Do a quad at a time to get us 0mod64
+	 * aligned.
+	 */
+
+	nop			# E :
+	nop			# E :
+	nop			# E :
+	beq	$1, $bigalign_b	# U :
+
+$alignmod64_b:
+	stq	$17, 0($5)	# L :
+	subq	$3, 1, $3	# E : For consistency later
+	addq	$1, 8, $1	# E : Increment towards zero for alignment
+	addq	$5, 8, $4	# E : Initial wh64 address (filler instruction)
+
+	nop
+	nop
+	addq	$5, 8, $5	# E : Inc address
+	blt	$1, $alignmod64_b # U :
+
+$bigalign_b:
+	/*
+	 * $3 - number quads left to go
+	 * $5 - target address (aligned 0mod64)
+	 * $17 - mask of stuff to store
+	 * Scratch registers available: $7, $2, $4, $1
+	 * we know that we'll be taking a minimum of one trip through
+ 	 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
+	 * Assumes the wh64 needs to be for 2 trips through the loop in the future
+	 * The wh64 is issued on for the starting destination address for trip +2
+	 * through the loop, and if there are less than two trips left, the target
+	 * address will be for the current trip.
+	 */
+
+$do_wh64_b:
+	wh64	($4)		# L1 : memory subsystem write hint
+	subq	$3, 24, $2	# E : For determining future wh64 addresses
+	stq	$17, 0($5)	# L :
+	nop			# E :
+
+	addq	$5, 128, $4	# E : speculative target of next wh64
+	stq	$17, 8($5)	# L :
+	stq	$17, 16($5)	# L :
+	addq	$5, 64, $7	# E : Fallback address for wh64 (== next trip addr)
+
+	stq	$17, 24($5)	# L :
+	stq	$17, 32($5)	# L :
+	cmovlt	$2, $7, $4	# E : Latency 2, extra mapping cycle
+	nop
+
+	stq	$17, 40($5)	# L :
+	stq	$17, 48($5)	# L :
+	subq	$3, 16, $2	# E : Repeat the loop at least once more?
+	nop
+
+	stq	$17, 56($5)	# L :
+	addq	$5, 64, $5	# E :
+	subq	$3, 8, $3	# E :
+	bge	$2, $do_wh64_b	# U :
+
+	nop
+	nop
+	nop
+	beq	$3, no_quad_b	# U : Might have finished already
+
+.align 4
+	/*
+	 * Simple loop for trailing quadwords, or for small amounts
+	 * of data (where we can't use an unrolled loop and wh64)
+	 */
+loop_b:
+	stq $17,0($5)		# L :
+	subq $3,1,$3		# E : Decrement number quads left
+	addq $5,8,$5		# E : Inc address
+	bne $3,loop_b		# U : more?
+
+no_quad_b:
+	/*
+	 * Write 0..7 trailing bytes.
+	 */
+	nop			# E :
+	beq $18,end_b		# U : All done?
+	ldq $7,0($5)		# L :
+	mskqh $7,$6,$2		# U : Mask final quad
+
+	insqh $17,$6,$4		# U : New bits
+	bis $2,$4,$1		# E : Put it all together
+	stq $1,0($5)		# L : And back to memory
+	ret $31,($26),1		# L0 :
+
+within_quad_b:
+	ldq_u $1,0($16)		# L :
+	insql $17,$16,$2	# U : New bits
+	mskql $1,$16,$4		# U : Clear old
+	bis $2,$4,$2		# E : New result
+
+	mskql $2,$6,$4		# U :
+	mskqh $1,$6,$2		# U :
+	bis $2,$4,$1		# E :
+	stq_u $1,0($16)		# L :
+
+end_b:
+	nop
+	nop
+	nop
+	ret $31,($26),1		# L0 :
+	.end ___memset
+	EXPORT_SYMBOL(___memset)
+
+	/*
+	 * This is the original body of code, prior to replication and
+	 * rescheduling.  Leave it here, as there may be calls to this
+	 * entry point.
+	 */
+.align 4
+	.ent __constant_c_memset
+__constant_c_memset:
+	.frame $30,0,$26,0
+	.prologue 0
+
+	addq $18,$16,$6		# E : max address to write to
+	bis $16,$16,$0		# E : return value
+	xor $16,$6,$1		# E : will complete write be within one quadword?
+	ble $18,end		# U : zero length requested?
+
+	bic $1,7,$1		# E : fit within a single quadword
+	beq $1,within_one_quad	# U :
+	and $16,7,$3		# E : Target addr misalignment
+	beq $3,aligned		# U : target is 0mod8
+
+	/*
+	 * Target address is misaligned, and won't fit within a quadword
+	 */
+	ldq_u $4,0($16)		# L : Fetch first partial
+	bis $16,$16,$5		# E : Save the address
+	insql $17,$16,$2	# U : Insert new bytes
+	subq $3,8,$3		# E : Invert (for addressing uses)
+
+	addq $18,$3,$18		# E : $18 is new count ($3 is negative)
+	mskql $4,$16,$4		# U : clear relevant parts of the quad
+	subq $16,$3,$16		# E : $16 is new aligned destination
+	bis $2,$4,$1		# E : Final bytes
+
+	nop
+	stq_u $1,0($5)		# L : Store result
+	nop
+	nop
+
+.align 4
+aligned:
+	/*
+	 * We are now guaranteed to be quad aligned, with at least
+	 * one partial quad to write.
+	 */
+
+	sra $18,3,$3		# U : Number of remaining quads to write
+	and $18,7,$18		# E : Number of trailing bytes to write
+	bis $16,$16,$5		# E : Save dest address
+	beq $3,no_quad		# U : tail stuff only
+
+	/*
+	 * it's worth the effort to unroll this and use wh64 if possible
+	 * Lifted a bunch of code from clear_user.S
+	 * At this point, entry values are:
+	 * $16	Current destination address
+	 * $5	A copy of $16
+	 * $6	The max quadword address to write to
+	 * $18	Number trailer bytes
+	 * $3	Number quads to write
+	 */
+
+	and	$16, 0x3f, $2	# E : Forward work (only useful for unrolled loop)
+	subq	$3, 16, $4	# E : Only try to unroll if > 128 bytes
+	subq	$2, 0x40, $1	# E : bias counter (aligning stuff 0mod64)
+	blt	$4, loop	# U :
+
+	/*
+	 * We know we've got at least 16 quads, minimum of one trip
+	 * through unrolled loop.  Do a quad at a time to get us 0mod64
+	 * aligned.
+	 */
+
+	nop			# E :
+	nop			# E :
+	nop			# E :
+	beq	$1, $bigalign	# U :
+
+$alignmod64:
+	stq	$17, 0($5)	# L :
+	subq	$3, 1, $3	# E : For consistency later
+	addq	$1, 8, $1	# E : Increment towards zero for alignment
+	addq	$5, 8, $4	# E : Initial wh64 address (filler instruction)
+
+	nop
+	nop
+	addq	$5, 8, $5	# E : Inc address
+	blt	$1, $alignmod64	# U :
+
+$bigalign:
+	/*
+	 * $3 - number quads left to go
+	 * $5 - target address (aligned 0mod64)
+	 * $17 - mask of stuff to store
+	 * Scratch registers available: $7, $2, $4, $1
+	 * we know that we'll be taking a minimum of one trip through
+ 	 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
+	 * Assumes the wh64 needs to be for 2 trips through the loop in the future
+	 * The wh64 is issued on for the starting destination address for trip +2
+	 * through the loop, and if there are less than two trips left, the target
+	 * address will be for the current trip.
+	 */
+
+$do_wh64:
+	wh64	($4)		# L1 : memory subsystem write hint
+	subq	$3, 24, $2	# E : For determining future wh64 addresses
+	stq	$17, 0($5)	# L :
+	nop			# E :
+
+	addq	$5, 128, $4	# E : speculative target of next wh64
+	stq	$17, 8($5)	# L :
+	stq	$17, 16($5)	# L :
+	addq	$5, 64, $7	# E : Fallback address for wh64 (== next trip addr)
+
+	stq	$17, 24($5)	# L :
+	stq	$17, 32($5)	# L :
+	cmovlt	$2, $7, $4	# E : Latency 2, extra mapping cycle
+	nop
+
+	stq	$17, 40($5)	# L :
+	stq	$17, 48($5)	# L :
+	subq	$3, 16, $2	# E : Repeat the loop at least once more?
+	nop
+
+	stq	$17, 56($5)	# L :
+	addq	$5, 64, $5	# E :
+	subq	$3, 8, $3	# E :
+	bge	$2, $do_wh64	# U :
+
+	nop
+	nop
+	nop
+	beq	$3, no_quad	# U : Might have finished already
+
+.align 4
+	/*
+	 * Simple loop for trailing quadwords, or for small amounts
+	 * of data (where we can't use an unrolled loop and wh64)
+	 */
+loop:
+	stq $17,0($5)		# L :
+	subq $3,1,$3		# E : Decrement number quads left
+	addq $5,8,$5		# E : Inc address
+	bne $3,loop		# U : more?
+
+no_quad:
+	/*
+	 * Write 0..7 trailing bytes.
+	 */
+	nop			# E :
+	beq $18,end		# U : All done?
+	ldq $7,0($5)		# L :
+	mskqh $7,$6,$2		# U : Mask final quad
+
+	insqh $17,$6,$4		# U : New bits
+	bis $2,$4,$1		# E : Put it all together
+	stq $1,0($5)		# L : And back to memory
+	ret $31,($26),1		# L0 :
+
+within_one_quad:
+	ldq_u $1,0($16)		# L :
+	insql $17,$16,$2	# U : New bits
+	mskql $1,$16,$4		# U : Clear old
+	bis $2,$4,$2		# E : New result
+
+	mskql $2,$6,$4		# U :
+	mskqh $1,$6,$2		# U :
+	bis $2,$4,$1		# E :
+	stq_u $1,0($16)		# L :
+
+end:
+	nop
+	nop
+	nop
+	ret $31,($26),1		# L0 :
+	.end __constant_c_memset
+	EXPORT_SYMBOL(__constant_c_memset)
+
+	/*
+	 * This is a replicant of the __constant_c_memset code, rescheduled
+	 * to mask stalls.  Note that entry point names also had to change
+	 */
+	.align 5
+	.ent __memset16
+
+__memset16:
+	.frame $30,0,$26,0
+	.prologue 0
+
+	inswl $17,0,$5		# U : 000000000000c1c2
+	inswl $17,2,$2		# U : 00000000c1c20000
+	bis $16,$16,$0		# E : return value
+	addq	$18,$16,$6	# E : max address to write to
+
+	ble $18, end_w		# U : zero length requested?
+	inswl	$17,4,$3	# U : 0000c1c200000000
+	inswl	$17,6,$4	# U : c1c2000000000000
+	xor	$16,$6,$1	# E : will complete write be within one quadword?
+
+	or	$2,$5,$2	# E : 00000000c1c2c1c2
+	or	$3,$4,$17	# E : c1c2c1c200000000
+	bic	$1,7,$1		# E : fit within a single quadword
+	and	$16,7,$3	# E : Target addr misalignment
+
+	or	$17,$2,$17	# E : c1c2c1c2c1c2c1c2
+	beq $1,within_quad_w	# U :
+	nop
+	beq $3,aligned_w	# U : target is 0mod8
+
+	/*
+	 * Target address is misaligned, and won't fit within a quadword
+	 */
+	ldq_u $4,0($16)		# L : Fetch first partial
+	bis $16,$16,$5		# E : Save the address
+	insql $17,$16,$2	# U : Insert new bytes
+	subq $3,8,$3		# E : Invert (for addressing uses)
+
+	addq $18,$3,$18		# E : $18 is new count ($3 is negative)
+	mskql $4,$16,$4		# U : clear relevant parts of the quad
+	subq $16,$3,$16		# E : $16 is new aligned destination
+	bis $2,$4,$1		# E : Final bytes
+
+	nop
+	stq_u $1,0($5)		# L : Store result
+	nop
+	nop
+
+.align 4
+aligned_w:
+	/*
+	 * We are now guaranteed to be quad aligned, with at least
+	 * one partial quad to write.
+	 */
+
+	sra $18,3,$3		# U : Number of remaining quads to write
+	and $18,7,$18		# E : Number of trailing bytes to write
+	bis $16,$16,$5		# E : Save dest address
+	beq $3,no_quad_w	# U : tail stuff only
+
+	/*
+	 * it's worth the effort to unroll this and use wh64 if possible
+	 * Lifted a bunch of code from clear_user.S
+	 * At this point, entry values are:
+	 * $16	Current destination address
+	 * $5	A copy of $16
+	 * $6	The max quadword address to write to
+	 * $18	Number trailer bytes
+	 * $3	Number quads to write
+	 */
+
+	and	$16, 0x3f, $2	# E : Forward work (only useful for unrolled loop)
+	subq	$3, 16, $4	# E : Only try to unroll if > 128 bytes
+	subq	$2, 0x40, $1	# E : bias counter (aligning stuff 0mod64)
+	blt	$4, loop_w	# U :
+
+	/*
+	 * We know we've got at least 16 quads, minimum of one trip
+	 * through unrolled loop.  Do a quad at a time to get us 0mod64
+	 * aligned.
+	 */
+
+	nop			# E :
+	nop			# E :
+	nop			# E :
+	beq	$1, $bigalign_w	# U :
+
+$alignmod64_w:
+	stq	$17, 0($5)	# L :
+	subq	$3, 1, $3	# E : For consistency later
+	addq	$1, 8, $1	# E : Increment towards zero for alignment
+	addq	$5, 8, $4	# E : Initial wh64 address (filler instruction)
+
+	nop
+	nop
+	addq	$5, 8, $5	# E : Inc address
+	blt	$1, $alignmod64_w	# U :
+
+$bigalign_w:
+	/*
+	 * $3 - number quads left to go
+	 * $5 - target address (aligned 0mod64)
+	 * $17 - mask of stuff to store
+	 * Scratch registers available: $7, $2, $4, $1
+	 * we know that we'll be taking a minimum of one trip through
+ 	 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
+	 * Assumes the wh64 needs to be for 2 trips through the loop in the future
+	 * The wh64 is issued on for the starting destination address for trip +2
+	 * through the loop, and if there are less than two trips left, the target
+	 * address will be for the current trip.
+	 */
+
+$do_wh64_w:
+	wh64	($4)		# L1 : memory subsystem write hint
+	subq	$3, 24, $2	# E : For determining future wh64 addresses
+	stq	$17, 0($5)	# L :
+	nop			# E :
+
+	addq	$5, 128, $4	# E : speculative target of next wh64
+	stq	$17, 8($5)	# L :
+	stq	$17, 16($5)	# L :
+	addq	$5, 64, $7	# E : Fallback address for wh64 (== next trip addr)
+
+	stq	$17, 24($5)	# L :
+	stq	$17, 32($5)	# L :
+	cmovlt	$2, $7, $4	# E : Latency 2, extra mapping cycle
+	nop
+
+	stq	$17, 40($5)	# L :
+	stq	$17, 48($5)	# L :
+	subq	$3, 16, $2	# E : Repeat the loop at least once more?
+	nop
+
+	stq	$17, 56($5)	# L :
+	addq	$5, 64, $5	# E :
+	subq	$3, 8, $3	# E :
+	bge	$2, $do_wh64_w	# U :
+
+	nop
+	nop
+	nop
+	beq	$3, no_quad_w	# U : Might have finished already
+
+.align 4
+	/*
+	 * Simple loop for trailing quadwords, or for small amounts
+	 * of data (where we can't use an unrolled loop and wh64)
+	 */
+loop_w:
+	stq $17,0($5)		# L :
+	subq $3,1,$3		# E : Decrement number quads left
+	addq $5,8,$5		# E : Inc address
+	bne $3,loop_w		# U : more?
+
+no_quad_w:
+	/*
+	 * Write 0..7 trailing bytes.
+	 */
+	nop			# E :
+	beq $18,end_w		# U : All done?
+	ldq $7,0($5)		# L :
+	mskqh $7,$6,$2		# U : Mask final quad
+
+	insqh $17,$6,$4		# U : New bits
+	bis $2,$4,$1		# E : Put it all together
+	stq $1,0($5)		# L : And back to memory
+	ret $31,($26),1		# L0 :
+
+within_quad_w:
+	ldq_u $1,0($16)		# L :
+	insql $17,$16,$2	# U : New bits
+	mskql $1,$16,$4		# U : Clear old
+	bis $2,$4,$2		# E : New result
+
+	mskql $2,$6,$4		# U :
+	mskqh $1,$6,$2		# U :
+	bis $2,$4,$1		# E :
+	stq_u $1,0($16)		# L :
+
+end_w:
+	nop
+	nop
+	nop
+	ret $31,($26),1		# L0 :
+
+	.end __memset16
+	EXPORT_SYMBOL(__memset16)
+
+memset = ___memset
+__memset = ___memset
+	EXPORT_SYMBOL(memset)
+	EXPORT_SYMBOL(__memset)
diff --git a/arch/alpha/lib/ev6-stxcpy.S b/arch/alpha/lib/ev6-stxcpy.S
new file mode 100644
index 0000000000..65f5f7310d
--- /dev/null
+++ b/arch/alpha/lib/ev6-stxcpy.S
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-stxcpy.S
+ * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Copy a null-terminated string from SRC to DST.
+ *
+ * This is an internal routine used by strcpy, stpcpy, and strcat.
+ * As such, it uses special linkage conventions to make implementation
+ * of these public functions more efficient.
+ *
+ * On input:
+ *	t9 = return address
+ *	a0 = DST
+ *	a1 = SRC
+ *
+ * On output:
+ *	t12 = bitmask (with one bit set) indicating the last byte written
+ *	a0  = unaligned address of the last *word* written
+ *
+ * Furthermore, v0, a3-a5, t11, and t12 are untouched.
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ */
+
+#include <asm/regdef.h>
+
+	.set noat
+	.set noreorder
+
+	.text
+
+/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
+   doesn't like putting the entry point for a procedure somewhere in the
+   middle of the procedure descriptor.  Work around this by putting the
+   aligned copy in its own procedure descriptor */
+
+
+	.ent stxcpy_aligned
+	.align 4
+stxcpy_aligned:
+	.frame sp, 0, t9
+	.prologue 0
+
+	/* On entry to this basic block:
+	   t0 == the first destination word for masking back in
+	   t1 == the first source word.  */
+
+	/* Create the 1st output word and detect 0's in the 1st input word.  */
+	lda	t2, -1		# E : build a mask against false zero
+	mskqh	t2, a1, t2	# U :   detection in the src word (stall)
+	mskqh	t1, a1, t3	# U :
+	ornot	t1, t2, t2	# E : (stall)
+
+	mskql	t0, a1, t0	# U : assemble the first output word
+	cmpbge	zero, t2, t8	# E : bits set iff null found
+	or	t0, t3, t1	# E : (stall)
+	bne	t8, $a_eos	# U : (stall)
+
+	/* On entry to this basic block:
+	   t0 == the first destination word for masking back in
+	   t1 == a source word not containing a null.  */
+	/* Nops here to separate store quads from load quads */
+
+$a_loop:
+	stq_u	t1, 0(a0)	# L :
+	addq	a0, 8, a0	# E :
+	nop
+	nop
+
+	ldq_u	t1, 0(a1)	# L : Latency=3
+	addq	a1, 8, a1	# E :
+	cmpbge	zero, t1, t8	# E : (3 cycle stall)
+	beq	t8, $a_loop	# U : (stall for t8)
+
+	/* Take care of the final (partial) word store.
+	   On entry to this basic block we have:
+	   t1 == the source word containing the null
+	   t8 == the cmpbge mask that found it.  */
+$a_eos:
+	negq	t8, t6		# E : find low bit set
+	and	t8, t6, t12	# E : (stall)
+	/* For the sake of the cache, don't read a destination word
+	   if we're not going to need it.  */
+	and	t12, 0x80, t6	# E : (stall)
+	bne	t6, 1f		# U : (stall)
+
+	/* We're doing a partial word store and so need to combine
+	   our source and original destination words.  */
+	ldq_u	t0, 0(a0)	# L : Latency=3
+	subq	t12, 1, t6	# E :
+	zapnot	t1, t6, t1	# U : clear src bytes >= null (stall)
+	or	t12, t6, t8	# E : (stall)
+
+	zap	t0, t8, t0	# E : clear dst bytes <= null
+	or	t0, t1, t1	# E : (stall)
+	nop
+	nop
+
+1:	stq_u	t1, 0(a0)	# L :
+	ret	(t9)		# L0 : Latency=3
+	nop
+	nop
+
+	.end stxcpy_aligned
+
+	.align 4
+	.ent __stxcpy
+	.globl __stxcpy
+__stxcpy:
+	.frame sp, 0, t9
+	.prologue 0
+
+	/* Are source and destination co-aligned?  */
+	xor	a0, a1, t0	# E :
+	unop			# E :
+	and	t0, 7, t0	# E : (stall)
+	bne	t0, $unaligned	# U : (stall)
+
+	/* We are co-aligned; take care of a partial first word.  */
+	ldq_u	t1, 0(a1)		# L : load first src word
+	and	a0, 7, t0		# E : take care not to load a word ...
+	addq	a1, 8, a1		# E :
+	beq	t0, stxcpy_aligned	# U : ... if we wont need it (stall)
+
+	ldq_u	t0, 0(a0)	# L :
+	br	stxcpy_aligned	# L0 : Latency=3
+	nop
+	nop
+
+
+/* The source and destination are not co-aligned.  Align the destination
+   and cope.  We have to be very careful about not reading too much and
+   causing a SEGV.  */
+
+	.align 4
+$u_head:
+	/* We know just enough now to be able to assemble the first
+	   full source word.  We can still find a zero at the end of it
+	   that prevents us from outputting the whole thing.
+
+	   On entry to this basic block:
+	   t0 == the first dest word, for masking back in, if needed else 0
+	   t1 == the low bits of the first source word
+	   t6 == bytemask that is -1 in dest word bytes */
+
+	ldq_u	t2, 8(a1)	# L :
+	addq	a1, 8, a1	# E :
+	extql	t1, a1, t1	# U : (stall on a1)
+	extqh	t2, a1, t4	# U : (stall on a1)
+
+	mskql	t0, a0, t0	# U :
+	or	t1, t4, t1	# E :
+	mskqh	t1, a0, t1	# U : (stall on t1)
+	or	t0, t1, t1	# E : (stall on t1)
+
+	or	t1, t6, t6	# E :
+	cmpbge	zero, t6, t8	# E : (stall)
+	lda	t6, -1		# E : for masking just below
+	bne	t8, $u_final	# U : (stall)
+
+	mskql	t6, a1, t6		# U : mask out the bits we have
+	or	t6, t2, t2		# E :   already extracted before (stall)
+	cmpbge	zero, t2, t8		# E :   testing eos (stall)
+	bne	t8, $u_late_head_exit	# U : (stall)
+
+	/* Finally, we've got all the stupid leading edge cases taken care
+	   of and we can set up to enter the main loop.  */
+
+	stq_u	t1, 0(a0)	# L : store first output word
+	addq	a0, 8, a0	# E :
+	extql	t2, a1, t0	# U : position ho-bits of lo word
+	ldq_u	t2, 8(a1)	# U : read next high-order source word
+
+	addq	a1, 8, a1	# E :
+	cmpbge	zero, t2, t8	# E : (stall for t2)
+	nop			# E :
+	bne	t8, $u_eos	# U : (stall)
+
+	/* Unaligned copy main loop.  In order to avoid reading too much,
+	   the loop is structured to detect zeros in aligned source words.
+	   This has, unfortunately, effectively pulled half of a loop
+	   iteration out into the head and half into the tail, but it does
+	   prevent nastiness from accumulating in the very thing we want
+	   to run as fast as possible.
+
+	   On entry to this basic block:
+	   t0 == the shifted high-order bits from the previous source word
+	   t2 == the unshifted current source word
+
+	   We further know that t2 does not contain a null terminator.  */
+
+	.align 3
+$u_loop:
+	extqh	t2, a1, t1	# U : extract high bits for current word
+	addq	a1, 8, a1	# E : (stall)
+	extql	t2, a1, t3	# U : extract low bits for next time (stall)
+	addq	a0, 8, a0	# E :
+
+	or	t0, t1, t1	# E : current dst word now complete
+	ldq_u	t2, 0(a1)	# L : Latency=3 load high word for next time
+	stq_u	t1, -8(a0)	# L : save the current word (stall)
+	mov	t3, t0		# E :
+
+	cmpbge	zero, t2, t8	# E : test new word for eos
+	beq	t8, $u_loop	# U : (stall)
+	nop
+	nop
+
+	/* We've found a zero somewhere in the source word we just read.
+	   If it resides in the lower half, we have one (probably partial)
+	   word to write out, and if it resides in the upper half, we
+	   have one full and one partial word left to write out.
+
+	   On entry to this basic block:
+	   t0 == the shifted high-order bits from the previous source word
+	   t2 == the unshifted current source word.  */
+$u_eos:
+	extqh	t2, a1, t1	# U :
+	or	t0, t1, t1	# E : first (partial) source word complete (stall)
+	cmpbge	zero, t1, t8	# E : is the null in this first bit? (stall)
+	bne	t8, $u_final	# U : (stall)
+
+$u_late_head_exit:
+	stq_u	t1, 0(a0)	# L : the null was in the high-order bits
+	addq	a0, 8, a0	# E :
+	extql	t2, a1, t1	# U :
+	cmpbge	zero, t1, t8	# E : (stall)
+
+	/* Take care of a final (probably partial) result word.
+	   On entry to this basic block:
+	   t1 == assembled source word
+	   t8 == cmpbge mask that found the null.  */
+$u_final:
+	negq	t8, t6		# E : isolate low bit set
+	and	t6, t8, t12	# E : (stall)
+	and	t12, 0x80, t6	# E : avoid dest word load if we can (stall)
+	bne	t6, 1f		# U : (stall)
+
+	ldq_u	t0, 0(a0)	# E :
+	subq	t12, 1, t6	# E :
+	or	t6, t12, t8	# E : (stall)
+	zapnot	t1, t6, t1	# U : kill source bytes >= null (stall)
+
+	zap	t0, t8, t0	# U : kill dest bytes <= null (2 cycle data stall)
+	or	t0, t1, t1	# E : (stall)
+	nop
+	nop
+
+1:	stq_u	t1, 0(a0)	# L :
+	ret	(t9)		# L0 : Latency=3
+	nop
+	nop
+
+	/* Unaligned copy entry point.  */
+	.align 4
+$unaligned:
+
+	ldq_u	t1, 0(a1)	# L : load first source word
+	and	a0, 7, t4	# E : find dest misalignment
+	and	a1, 7, t5	# E : find src misalignment
+	/* Conditionally load the first destination word and a bytemask
+	   with 0xff indicating that the destination byte is sacrosanct.  */
+	mov	zero, t0	# E :
+
+	mov	zero, t6	# E :
+	beq	t4, 1f		# U :
+	ldq_u	t0, 0(a0)	# L :
+	lda	t6, -1		# E :
+
+	mskql	t6, a0, t6	# U :
+	nop
+	nop
+	nop
+1:
+	subq	a1, t4, a1	# E : sub dest misalignment from src addr
+	/* If source misalignment is larger than dest misalignment, we need
+	   extra startup checks to avoid SEGV.  */
+	cmplt	t4, t5, t12	# E :
+	beq	t12, $u_head	# U :
+	lda	t2, -1		# E : mask out leading garbage in source
+
+	mskqh	t2, t5, t2	# U :
+	ornot	t1, t2, t3	# E : (stall)
+	cmpbge	zero, t3, t8	# E : is there a zero? (stall)
+	beq	t8, $u_head	# U : (stall)
+
+	/* At this point we've found a zero in the first partial word of
+	   the source.  We need to isolate the valid source data and mask
+	   it into the original destination data.  (Incidentally, we know
+	   that we'll need at least one byte of that original dest word.) */
+
+	ldq_u	t0, 0(a0)	# L :
+	negq	t8, t6		# E : build bitmask of bytes <= zero
+	and	t6, t8, t12	# E : (stall)
+	and	a1, 7, t5	# E :
+
+	subq	t12, 1, t6	# E :
+	or	t6, t12, t8	# E : (stall)
+	srl	t12, t5, t12	# U : adjust final null return value
+	zapnot	t2, t8, t2	# U : prepare source word; mirror changes (stall)
+
+	and	t1, t2, t1	# E : to source validity mask
+	extql	t2, a1, t2	# U :
+	extql	t1, a1, t1	# U : (stall)
+	andnot	t0, t2, t0	# .. e1 : zero place for source to reside (stall)
+
+	or	t0, t1, t1	# e1    : and put it there
+	stq_u	t1, 0(a0)	# .. e0 : (stall)
+	ret	(t9)		# e1    :
+	nop
+
+	.end __stxcpy
+
diff --git a/arch/alpha/lib/ev6-stxncpy.S b/arch/alpha/lib/ev6-stxncpy.S
new file mode 100644
index 0000000000..76da205282
--- /dev/null
+++ b/arch/alpha/lib/ev6-stxncpy.S
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev6-stxncpy.S
+ * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com>
+ *
+ * Copy no more than COUNT bytes of the null-terminated string from
+ * SRC to DST.
+ *
+ * This is an internal routine used by strncpy, stpncpy, and strncat.
+ * As such, it uses special linkage conventions to make implementation
+ * of these public functions more efficient.
+ *
+ * On input:
+ *	t9 = return address
+ *	a0 = DST
+ *	a1 = SRC
+ *	a2 = COUNT
+ *
+ * Furthermore, COUNT may not be zero.
+ *
+ * On output:
+ *	t0  = last word written
+ *	t10 = bitmask (with one bit set) indicating the byte position of
+ *	      the end of the range specified by COUNT
+ *	t12 = bitmask (with one bit set) indicating the last byte written
+ *	a0  = unaligned address of the last *word* written
+ *	a2  = the number of full words left in COUNT
+ *
+ * Furthermore, v0, a3-a5, t11, and $at are untouched.
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ */
+
+#include <asm/regdef.h>
+
+	.set noat
+	.set noreorder
+
+	.text
+
+/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
+   doesn't like putting the entry point for a procedure somewhere in the
+   middle of the procedure descriptor.  Work around this by putting the
+   aligned copy in its own procedure descriptor */
+
+
+	.ent stxncpy_aligned
+	.align 4
+stxncpy_aligned:
+	.frame sp, 0, t9, 0
+	.prologue 0
+
+	/* On entry to this basic block:
+	   t0 == the first destination word for masking back in
+	   t1 == the first source word.  */
+
+	/* Create the 1st output word and detect 0's in the 1st input word.  */
+	lda	t2, -1		# E : build a mask against false zero
+	mskqh	t2, a1, t2	# U :   detection in the src word (stall)
+	mskqh	t1, a1, t3	# U :
+	ornot	t1, t2, t2	# E : (stall)
+
+	mskql	t0, a1, t0	# U : assemble the first output word
+	cmpbge	zero, t2, t8	# E : bits set iff null found
+	or	t0, t3, t0	# E : (stall)
+	beq	a2, $a_eoc	# U :
+
+	bne	t8, $a_eos	# U :
+	nop
+	nop
+	nop
+
+	/* On entry to this basic block:
+	   t0 == a source word not containing a null.  */
+
+	/*
+	 * nops here to:
+	 *	separate store quads from load quads
+	 *	limit of 1 bcond/quad to permit training
+	 */
+$a_loop:
+	stq_u	t0, 0(a0)	# L :
+	addq	a0, 8, a0	# E :
+	subq	a2, 1, a2	# E :
+	nop
+
+	ldq_u	t0, 0(a1)	# L :
+	addq	a1, 8, a1	# E :
+	cmpbge	zero, t0, t8	# E :
+	beq	a2, $a_eoc      # U :
+
+	beq	t8, $a_loop	# U :
+	nop
+	nop
+	nop
+
+	/* Take care of the final (partial) word store.  At this point
+	   the end-of-count bit is set in t8 iff it applies.
+
+	   On entry to this basic block we have:
+	   t0 == the source word containing the null
+	   t8 == the cmpbge mask that found it.  */
+
+$a_eos:
+	negq	t8, t12		# E : find low bit set
+	and	t8, t12, t12	# E : (stall)
+	/* For the sake of the cache, don't read a destination word
+	   if we're not going to need it.  */
+	and	t12, 0x80, t6	# E : (stall)
+	bne	t6, 1f		# U : (stall)
+
+	/* We're doing a partial word store and so need to combine
+	   our source and original destination words.  */
+	ldq_u	t1, 0(a0)	# L :
+	subq	t12, 1, t6	# E :
+	or	t12, t6, t8	# E : (stall)
+	zapnot	t0, t8, t0	# U : clear src bytes > null (stall)
+
+	zap	t1, t8, t1	# .. e1 : clear dst bytes <= null
+	or	t0, t1, t0	# e1    : (stall)
+	nop
+	nop
+
+1:	stq_u	t0, 0(a0)	# L :
+	ret	(t9)		# L0 : Latency=3
+	nop
+	nop
+
+	/* Add the end-of-count bit to the eos detection bitmask.  */
+$a_eoc:
+	or	t10, t8, t8	# E :
+	br	$a_eos		# L0 : Latency=3
+	nop
+	nop
+
+	.end stxncpy_aligned
+
+	.align 4
+	.ent __stxncpy
+	.globl __stxncpy
+__stxncpy:
+	.frame sp, 0, t9, 0
+	.prologue 0
+
+	/* Are source and destination co-aligned?  */
+	xor	a0, a1, t1	# E :
+	and	a0, 7, t0	# E : find dest misalignment
+	and	t1, 7, t1	# E : (stall)
+	addq	a2, t0, a2	# E : bias count by dest misalignment (stall)
+
+	subq	a2, 1, a2	# E :
+	and	a2, 7, t2	# E : (stall)
+	srl	a2, 3, a2	# U : a2 = loop counter = (count - 1)/8 (stall)
+	addq	zero, 1, t10	# E :
+
+	sll	t10, t2, t10	# U : t10 = bitmask of last count byte
+	bne	t1, $unaligned	# U :
+	/* We are co-aligned; take care of a partial first word.  */
+	ldq_u	t1, 0(a1)	# L : load first src word
+	addq	a1, 8, a1	# E :
+
+	beq	t0, stxncpy_aligned     # U : avoid loading dest word if not needed
+	ldq_u	t0, 0(a0)	# L :
+	nop
+	nop
+
+	br	stxncpy_aligned	# .. e1 :
+	nop
+	nop
+	nop
+
+
+
+/* The source and destination are not co-aligned.  Align the destination
+   and cope.  We have to be very careful about not reading too much and
+   causing a SEGV.  */
+
+	.align 4
+$u_head:
+	/* We know just enough now to be able to assemble the first
+	   full source word.  We can still find a zero at the end of it
+	   that prevents us from outputting the whole thing.
+
+	   On entry to this basic block:
+	   t0 == the first dest word, unmasked
+	   t1 == the shifted low bits of the first source word
+	   t6 == bytemask that is -1 in dest word bytes */
+
+	ldq_u	t2, 8(a1)	# L : Latency=3 load second src word
+	addq	a1, 8, a1	# E :
+	mskql	t0, a0, t0	# U : mask trailing garbage in dst
+	extqh	t2, a1, t4	# U : (3 cycle stall on t2)
+
+	or	t1, t4, t1	# E : first aligned src word complete (stall)
+	mskqh	t1, a0, t1	# U : mask leading garbage in src (stall)
+	or	t0, t1, t0	# E : first output word complete (stall)
+	or	t0, t6, t6	# E : mask original data for zero test (stall)
+
+	cmpbge	zero, t6, t8	# E :
+	beq	a2, $u_eocfin	# U :
+	lda	t6, -1		# E :
+	nop
+
+	bne	t8, $u_final	# U :
+	mskql	t6, a1, t6	# U : mask out bits already seen
+	stq_u	t0, 0(a0)	# L : store first output word
+	or      t6, t2, t2	# E : (stall)
+
+	cmpbge	zero, t2, t8	# E : find nulls in second partial
+	addq	a0, 8, a0	# E :
+	subq	a2, 1, a2	# E :
+	bne	t8, $u_late_head_exit	# U :
+
+	/* Finally, we've got all the stupid leading edge cases taken care
+	   of and we can set up to enter the main loop.  */
+	extql	t2, a1, t1	# U : position hi-bits of lo word
+	beq	a2, $u_eoc	# U :
+	ldq_u	t2, 8(a1)	# L : read next high-order source word
+	addq	a1, 8, a1	# E :
+
+	extqh	t2, a1, t0	# U : position lo-bits of hi word (stall)
+	cmpbge	zero, t2, t8	# E :
+	nop
+	bne	t8, $u_eos	# U :
+
+	/* Unaligned copy main loop.  In order to avoid reading too much,
+	   the loop is structured to detect zeros in aligned source words.
+	   This has, unfortunately, effectively pulled half of a loop
+	   iteration out into the head and half into the tail, but it does
+	   prevent nastiness from accumulating in the very thing we want
+	   to run as fast as possible.
+
+	   On entry to this basic block:
+	   t0 == the shifted low-order bits from the current source word
+	   t1 == the shifted high-order bits from the previous source word
+	   t2 == the unshifted current source word
+
+	   We further know that t2 does not contain a null terminator.  */
+
+	.align 4
+$u_loop:
+	or	t0, t1, t0	# E : current dst word now complete
+	subq	a2, 1, a2	# E : decrement word count
+	extql	t2, a1, t1	# U : extract low bits for next time
+	addq	a0, 8, a0	# E :
+
+	stq_u	t0, -8(a0)	# U : save the current word
+	beq	a2, $u_eoc	# U :
+	ldq_u	t2, 8(a1)	# U : Latency=3 load high word for next time
+	addq	a1, 8, a1	# E :
+
+	extqh	t2, a1, t0	# U : extract low bits (2 cycle stall)
+	cmpbge	zero, t2, t8	# E : test new word for eos
+	nop
+	beq	t8, $u_loop	# U :
+
+	/* We've found a zero somewhere in the source word we just read.
+	   If it resides in the lower half, we have one (probably partial)
+	   word to write out, and if it resides in the upper half, we
+	   have one full and one partial word left to write out.
+
+	   On entry to this basic block:
+	   t0 == the shifted low-order bits from the current source word
+	   t1 == the shifted high-order bits from the previous source word
+	   t2 == the unshifted current source word.  */
+$u_eos:
+	or	t0, t1, t0	# E : first (partial) source word complete
+	nop
+	cmpbge	zero, t0, t8	# E : is the null in this first bit? (stall)
+	bne	t8, $u_final	# U : (stall)
+
+	stq_u	t0, 0(a0)	# L : the null was in the high-order bits
+	addq	a0, 8, a0	# E :
+	subq	a2, 1, a2	# E :
+	nop
+
+$u_late_head_exit:
+	extql	t2, a1, t0	# U :
+	cmpbge	zero, t0, t8	# E :
+	or	t8, t10, t6	# E : (stall)
+	cmoveq	a2, t6, t8	# E : Latency=2, extra map slot (stall)
+
+	/* Take care of a final (probably partial) result word.
+	   On entry to this basic block:
+	   t0 == assembled source word
+	   t8 == cmpbge mask that found the null.  */
+$u_final:
+	negq	t8, t6		# E : isolate low bit set
+	and	t6, t8, t12	# E : (stall)
+	and	t12, 0x80, t6	# E : avoid dest word load if we can (stall)
+	bne	t6, 1f		# U : (stall)
+
+	ldq_u	t1, 0(a0)	# L :
+	subq	t12, 1, t6	# E :
+	or	t6, t12, t8	# E : (stall)
+	zapnot	t0, t8, t0	# U : kill source bytes > null
+
+	zap	t1, t8, t1	# U : kill dest bytes <= null
+	or	t0, t1, t0	# E : (stall)
+	nop
+	nop
+
+1:	stq_u	t0, 0(a0)	# L :
+	ret	(t9)		# L0 : Latency=3
+
+	  /* Got to end-of-count before end of string.  
+	     On entry to this basic block:
+	     t1 == the shifted high-order bits from the previous source word  */
+$u_eoc:
+	and	a1, 7, t6	# E : avoid final load if possible
+	sll	t10, t6, t6	# U : (stall)
+	and	t6, 0xff, t6	# E : (stall)
+	bne	t6, 1f		# U : (stall)
+
+	ldq_u	t2, 8(a1)	# L : load final src word
+	nop
+	extqh	t2, a1, t0	# U : extract low bits for last word (stall)
+	or	t1, t0, t1	# E : (stall)
+
+1:	cmpbge	zero, t1, t8	# E :
+	mov	t1, t0		# E :
+
+$u_eocfin:			# end-of-count, final word
+	or	t10, t8, t8	# E :
+	br	$u_final	# L0 : Latency=3
+
+	/* Unaligned copy entry point.  */
+	.align 4
+$unaligned:
+
+	ldq_u	t1, 0(a1)	# L : load first source word
+	and	a0, 7, t4	# E : find dest misalignment
+	and	a1, 7, t5	# E : find src misalignment
+	/* Conditionally load the first destination word and a bytemask
+	   with 0xff indicating that the destination byte is sacrosanct.  */
+	mov	zero, t0	# E :
+
+	mov	zero, t6	# E :
+	beq	t4, 1f		# U :
+	ldq_u	t0, 0(a0)	# L :
+	lda	t6, -1		# E :
+
+	mskql	t6, a0, t6	# U :
+	nop
+	nop
+	subq	a1, t4, a1	# E : sub dest misalignment from src addr
+
+	/* If source misalignment is larger than dest misalignment, we need
+	   extra startup checks to avoid SEGV.  */
+
+1:	cmplt	t4, t5, t12	# E :
+	extql	t1, a1, t1	# U : shift src into place
+	lda	t2, -1		# E : for creating masks later
+	beq	t12, $u_head	# U : (stall)
+
+	extql	t2, a1, t2	# U :
+	cmpbge	zero, t1, t8	# E : is there a zero?
+	andnot	t2, t6, t2	# E : dest mask for a single word copy
+	or	t8, t10, t5	# E : test for end-of-count too
+
+	cmpbge	zero, t2, t3	# E :
+	cmoveq	a2, t5, t8	# E : Latency=2, extra map slot
+	nop			# E : keep with cmoveq
+	andnot	t8, t3, t8	# E : (stall)
+
+	beq	t8, $u_head	# U :
+	/* At this point we've found a zero in the first partial word of
+	   the source.  We need to isolate the valid source data and mask
+	   it into the original destination data.  (Incidentally, we know
+	   that we'll need at least one byte of that original dest word.) */
+	ldq_u	t0, 0(a0)	# L :
+	negq	t8, t6		# E : build bitmask of bytes <= zero
+	mskqh	t1, t4, t1	# U :
+
+	and	t6, t8, t12	# E :
+	subq	t12, 1, t6	# E : (stall)
+	or	t6, t12, t8	# E : (stall)
+	zapnot	t2, t8, t2	# U : prepare source word; mirror changes (stall)
+
+	zapnot	t1, t8, t1	# U : to source validity mask
+	andnot	t0, t2, t0	# E : zero place for source to reside
+	or	t0, t1, t0	# E : and put it there (stall both t0, t1)
+	stq_u	t0, 0(a0)	# L : (stall)
+
+	ret	(t9)		# L0 : Latency=3
+	nop
+	nop
+	nop
+
+	.end __stxncpy
diff --git a/arch/alpha/lib/ev67-strcat.S b/arch/alpha/lib/ev67-strcat.S
new file mode 100644
index 0000000000..ec3096a9e8
--- /dev/null
+++ b/arch/alpha/lib/ev67-strcat.S
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev67-strcat.S
+ * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Append a null-terminated string from SRC to DST.
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ * Commentary: It seems bogus to walk the input string twice - once
+ * to determine the length, and then again while doing the copy.
+ * A significant (future) enhancement would be to only read the input
+ * string once.
+ */
+
+#include <asm/export.h>
+	.text
+
+	.align 4
+	.globl strcat
+	.ent strcat
+strcat:
+	.frame $30, 0, $26
+	.prologue 0
+
+	mov	$16, $0		# E : set up return value
+	/* Find the end of the string.  */
+	ldq_u   $1, 0($16)	# L : load first quadword (a0 may be misaligned)
+	lda     $2, -1		# E :
+	insqh   $2, $16, $2	# U :
+
+	andnot  $16, 7, $16	# E :
+	or      $2, $1, $1	# E :
+	cmpbge  $31, $1, $2	# E : bits set iff byte == 0
+	bne     $2, $found	# U :
+
+$loop:	ldq     $1, 8($16)	# L :
+	addq    $16, 8, $16	# E :
+	cmpbge  $31, $1, $2	# E :
+	beq     $2, $loop	# U :
+
+$found:	cttz	$2, $3		# U0 :
+	addq	$16, $3, $16	# E :
+	/* Now do the append.  */
+	mov	$26, $23	# E :
+	br	__stxcpy	# L0 :
+
+	.end strcat
+	EXPORT_SYMBOL(strcat)
diff --git a/arch/alpha/lib/ev67-strchr.S b/arch/alpha/lib/ev67-strchr.S
new file mode 100644
index 0000000000..fbf89e0b6d
--- /dev/null
+++ b/arch/alpha/lib/ev67-strchr.S
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev67-strchr.S
+ * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Return the address of a given character within a null-terminated
+ * string, or null if it is not found.
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ */
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+	.set noreorder
+	.set noat
+
+	.align 4
+	.globl strchr
+	.ent strchr
+strchr:
+	.frame sp, 0, ra
+	.prologue 0
+
+	ldq_u   t0, 0(a0)	# L : load first quadword Latency=3
+	and	a1, 0xff, t3	# E : 00000000000000ch
+	insbl	a1, 1, t5	# U : 000000000000ch00
+	insbl	a1, 7, a2	# U : ch00000000000000
+
+	insbl	t3, 6, a3	# U : 00ch000000000000
+	or	t5, t3, a1	# E : 000000000000chch
+	andnot  a0, 7, v0	# E : align our loop pointer
+	lda	t4, -1		# E : build garbage mask
+
+	mskqh	t4, a0, t4	# U : only want relevant part of first quad
+	or	a2, a3, a2	# E : chch000000000000
+	inswl	a1, 2, t5	# E : 00000000chch0000
+	inswl	a1, 4, a3	# E : 0000chch00000000
+
+	or	a1, a2, a1	# E : chch00000000chch
+	or	a3, t5, t5	# E : 0000chchchch0000
+	cmpbge  zero, t0, t2	# E : bits set iff byte == zero
+	cmpbge	zero, t4, t4	# E : bits set iff byte is garbage
+
+	/* This quad is _very_ serialized.  Lots of stalling happens */
+	or	t5, a1, a1	# E : chchchchchchchch
+	xor	t0, a1, t1	# E : make bytes == c zero
+	cmpbge  zero, t1, t3	# E : bits set iff byte == c
+	or	t2, t3, t0	# E : bits set iff char match or zero match
+
+	andnot	t0, t4, t0	# E : clear garbage bits
+	cttz	t0, a2		# U0 : speculative (in case we get a match)
+	nop			# E :
+	bne	t0, $found	# U :
+
+	/*
+	 * Yuk.  This loop is going to stall like crazy waiting for the
+	 * data to be loaded.  Not much can be done about it unless it's
+	 * unrolled multiple times - is that safe to do in kernel space?
+	 * Or would exception handling recovery code do the trick here?
+	 */
+$loop:	ldq	t0, 8(v0)	# L : Latency=3
+	addq	v0, 8, v0	# E :
+	xor	t0, a1, t1	# E :
+	cmpbge	zero, t0, t2	# E : bits set iff byte == 0
+
+	cmpbge	zero, t1, t3	# E : bits set iff byte == c
+	or	t2, t3, t0	# E :
+	cttz	t3, a2		# U0 : speculative (in case we get a match)
+	beq	t0, $loop	# U :
+
+$found:	negq    t0, t1		# E : clear all but least set bit
+	and     t0, t1, t0	# E :
+	and	t0, t3, t1	# E : bit set iff byte was the char
+	addq	v0, a2, v0	# E : Add in the bit number from above
+
+	cmoveq	t1, $31, v0	# E : Two mapping slots, latency = 2
+	nop
+	nop
+	ret			# L0 :
+
+	.end strchr
+	EXPORT_SYMBOL(strchr)
diff --git a/arch/alpha/lib/ev67-strlen.S b/arch/alpha/lib/ev67-strlen.S
new file mode 100644
index 0000000000..b73106ffbb
--- /dev/null
+++ b/arch/alpha/lib/ev67-strlen.S
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev67-strlen.S
+ * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Finds length of a 0-terminated string.  Optimized for the
+ * Alpha architecture:
+ *
+ *	- memory accessed as aligned quadwords only
+ *	- uses bcmpge to compare 8 bytes in parallel
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ */
+#include <asm/export.h>
+	.set noreorder
+	.set noat
+
+	.globl	strlen
+	.ent	strlen
+	.align 4
+strlen:
+	ldq_u	$1, 0($16)	# L : load first quadword ($16  may be misaligned)
+	lda	$2, -1($31)	# E :
+	insqh	$2, $16, $2	# U :
+	andnot	$16, 7, $0	# E :
+
+	or	$2, $1, $1	# E :
+	cmpbge	$31, $1, $2	# E : $2  <- bitmask: bit i == 1 <==> i-th byte == 0
+	nop			# E :
+	bne	$2, $found	# U :
+
+$loop:	ldq	$1, 8($0)	# L :
+	addq	$0, 8, $0	# E : addr += 8
+	cmpbge	$31, $1, $2	# E :
+	beq	$2, $loop	# U :
+
+$found:
+	cttz	$2, $3		# U0 :
+	addq	$0, $3, $0	# E :
+	subq	$0, $16, $0	# E :
+	ret	$31, ($26)	# L0 :
+
+	.end	strlen
+	EXPORT_SYMBOL(strlen)
diff --git a/arch/alpha/lib/ev67-strncat.S b/arch/alpha/lib/ev67-strncat.S
new file mode 100644
index 0000000000..ceb0ca5287
--- /dev/null
+++ b/arch/alpha/lib/ev67-strncat.S
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev67-strncat.S
+ * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com>
+ *
+ * Append no more than COUNT characters from the null-terminated string SRC
+ * to the null-terminated string DST.  Always null-terminate the new DST.
+ *
+ * This differs slightly from the semantics in libc in that we never write
+ * past count, whereas libc may write to count+1.  This follows the generic
+ * implementation in lib/string.c and is, IMHO, more sensible.
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ * Try not to change the actual algorithm if possible for consistency.
+ */
+
+#include <asm/export.h>
+	.text
+
+	.align 4
+	.globl strncat
+	.ent strncat
+strncat:
+	.frame $30, 0, $26
+	.prologue 0
+
+	mov	$16, $0		# set up return value
+	beq	$18, $zerocount	# U :
+	/* Find the end of the string.  */
+	ldq_u   $1, 0($16)	# L : load first quadword ($16 may be misaligned)
+	lda     $2, -1($31)	# E :
+
+	insqh   $2, $0, $2	# U :
+	andnot  $16, 7, $16	# E :
+	nop			# E :
+	or      $2, $1, $1	# E :
+
+	nop			# E :
+	nop			# E :
+	cmpbge  $31, $1, $2	# E : bits set iff byte == 0
+	bne     $2, $found	# U :
+
+$loop:	ldq     $1, 8($16)	# L :
+	addq    $16, 8, $16	# E :
+	cmpbge  $31, $1, $2	# E :
+	beq     $2, $loop	# U :
+
+$found:	cttz	$2, $3		# U0 :
+	addq	$16, $3, $16	# E :
+	nop			# E :
+	bsr	$23, __stxncpy	# L0 :/* Now do the append.  */
+
+	/* Worry about the null termination.  */
+
+	zapnot	$1, $27, $2	# U : was last byte a null?
+	cmplt	$27, $24, $5	# E : did we fill the buffer completely?
+	bne	$2, 0f		# U :
+	ret			# L0 :
+
+0:	or	$5, $18, $2	# E :
+	nop
+	bne	$2, 2f		# U :
+	and	$24, 0x80, $3	# E : no zero next byte
+
+	nop			# E :
+	bne	$3, 1f		# U :
+	/* Here there are bytes left in the current word.  Clear one.  */
+	addq	$24, $24, $24	# E : end-of-count bit <<= 1
+	nop			# E :
+
+2:	zap	$1, $24, $1	# U :
+	nop			# E :
+	stq_u	$1, 0($16)	# L :
+	ret			# L0 :
+
+1:	/* Here we must clear the first byte of the next DST word */
+	stb	$31, 8($16)	# L :
+	nop			# E :
+	nop			# E :
+	ret			# L0 :
+
+$zerocount:
+	nop			# E :
+	nop			# E :
+	nop			# E :
+	ret			# L0 :
+
+	.end strncat
+	EXPORT_SYMBOL(strncat)
diff --git a/arch/alpha/lib/ev67-strrchr.S b/arch/alpha/lib/ev67-strrchr.S
new file mode 100644
index 0000000000..7f80e39853
--- /dev/null
+++ b/arch/alpha/lib/ev67-strrchr.S
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/ev67-strrchr.S
+ * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
+ *
+ * Finds length of a 0-terminated string.  Optimized for the
+ * Alpha architecture:
+ *
+ *	- memory accessed as aligned quadwords only
+ *	- uses bcmpge to compare 8 bytes in parallel
+ *
+ * Much of the information about 21264 scheduling/coding comes from:
+ *	Compiler Writer's Guide for the Alpha 21264
+ *	abbreviated as 'CWG' in other comments here
+ *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ *	E	- either cluster
+ *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ */
+
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+	.set noreorder
+	.set noat
+
+	.align 4
+	.ent strrchr
+	.globl strrchr
+strrchr:
+	.frame sp, 0, ra
+	.prologue 0
+
+	and	a1, 0xff, t2	# E : 00000000000000ch
+	insbl	a1, 1, t4	# U : 000000000000ch00
+	insbl	a1, 2, t5	# U : 0000000000ch0000
+	ldq_u   t0, 0(a0)	# L : load first quadword Latency=3
+
+	mov	zero, t6	# E : t6 is last match aligned addr
+	or	t2, t4, a1	# E : 000000000000chch
+	sll	t5, 8, t3	# U : 00000000ch000000
+	mov	zero, t8	# E : t8 is last match byte compare mask
+
+	andnot  a0, 7, v0	# E : align source addr
+	or	t5, t3, t3	# E : 00000000chch0000
+	sll	a1, 32, t2	# U : 0000chch00000000
+	sll	a1, 48, t4	# U : chch000000000000
+
+	or	t4, a1, a1	# E : chch00000000chch
+	or	t2, t3, t2	# E : 0000chchchch0000
+	or	a1, t2, a1	# E : chchchchchchchch
+	lda	t5, -1		# E : build garbage mask
+
+	cmpbge  zero, t0, t1	# E : bits set iff byte == zero
+	mskqh	t5, a0, t4	# E : Complete garbage mask
+	xor	t0, a1, t2	# E : make bytes == c zero
+	cmpbge	zero, t4, t4	# E : bits set iff byte is garbage
+
+	cmpbge  zero, t2, t3	# E : bits set iff byte == c
+	andnot	t1, t4, t1	# E : clear garbage from null test
+	andnot	t3, t4, t3	# E : clear garbage from char test
+	bne	t1, $eos	# U : did we already hit the terminator?
+
+	/* Character search main loop */
+$loop:
+	ldq	t0, 8(v0)	# L : load next quadword
+	cmovne	t3, v0, t6	# E : save previous comparisons match
+	nop			#   : Latency=2, extra map slot (keep nop with cmov)
+	nop
+
+	cmovne	t3, t3, t8	# E : Latency=2, extra map slot
+	nop			#   : keep with cmovne
+	addq	v0, 8, v0	# E :
+	xor	t0, a1, t2	# E :
+
+	cmpbge	zero, t0, t1	# E : bits set iff byte == zero
+	cmpbge	zero, t2, t3	# E : bits set iff byte == c
+	beq	t1, $loop	# U : if we havnt seen a null, loop
+	nop
+
+	/* Mask out character matches after terminator */
+$eos:
+	negq	t1, t4		# E : isolate first null byte match
+	and	t1, t4, t4	# E :
+	subq	t4, 1, t5	# E : build a mask of the bytes up to...
+	or	t4, t5, t4	# E : ... and including the null
+
+	and	t3, t4, t3	# E : mask out char matches after null
+	cmovne	t3, t3, t8	# E : save it, if match found Latency=2, extra map slot
+	nop			#   : Keep with cmovne
+	nop
+
+	cmovne	t3, v0, t6	# E :
+	nop			#   : Keep with cmovne
+	/* Locate the address of the last matched character */
+	ctlz	t8, t2		# U0 : Latency=3 (0x40 for t8=0)
+	nop
+
+	cmoveq	t8, 0x3f, t2	# E : Compensate for case when no match is seen
+	nop			# E : hide the cmov latency (2) behind ctlz latency
+	lda	t5, 0x3f($31)	# E :
+	subq	t5, t2, t5	# E : Normalize leading zero count
+
+	addq	t6, t5, v0	# E : and add to quadword address
+	ret			# L0 : Latency=3
+	nop
+	nop
+
+	.end strrchr
+	EXPORT_SYMBOL(strrchr)
diff --git a/arch/alpha/lib/fls.c b/arch/alpha/lib/fls.c
new file mode 100644
index 0000000000..02e1567766
--- /dev/null
+++ b/arch/alpha/lib/fls.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* 
+ * arch/alpha/lib/fls.c
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+
+/* This is fls(x)-1, except zero is held to zero.  This allows most
+   efficient input into extbl, plus it allows easy handling of fls(0)=0.  */
+
+const unsigned char __flsm1_tab[256] = 
+{
+  0,
+  0,
+  1, 1,
+  2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+};
+
+EXPORT_SYMBOL(__flsm1_tab);
diff --git a/arch/alpha/lib/fpreg.c b/arch/alpha/lib/fpreg.c
new file mode 100644
index 0000000000..34fea46564
--- /dev/null
+++ b/arch/alpha/lib/fpreg.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/alpha/lib/fpreg.c
+ *
+ * (C) Copyright 1998 Linus Torvalds
+ */
+
+#include <linux/compiler.h>
+#include <linux/export.h>
+
+#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
+#define STT(reg,val)  asm volatile ("ftoit $f"#reg",%0" : "=r"(val));
+#else
+#define STT(reg,val)  asm volatile ("stt $f"#reg",%0" : "=m"(val));
+#endif
+
+unsigned long
+alpha_read_fp_reg (unsigned long reg)
+{
+	unsigned long val;
+
+	switch (reg) {
+	      case  0: STT( 0, val); break;
+	      case  1: STT( 1, val); break;
+	      case  2: STT( 2, val); break;
+	      case  3: STT( 3, val); break;
+	      case  4: STT( 4, val); break;
+	      case  5: STT( 5, val); break;
+	      case  6: STT( 6, val); break;
+	      case  7: STT( 7, val); break;
+	      case  8: STT( 8, val); break;
+	      case  9: STT( 9, val); break;
+	      case 10: STT(10, val); break;
+	      case 11: STT(11, val); break;
+	      case 12: STT(12, val); break;
+	      case 13: STT(13, val); break;
+	      case 14: STT(14, val); break;
+	      case 15: STT(15, val); break;
+	      case 16: STT(16, val); break;
+	      case 17: STT(17, val); break;
+	      case 18: STT(18, val); break;
+	      case 19: STT(19, val); break;
+	      case 20: STT(20, val); break;
+	      case 21: STT(21, val); break;
+	      case 22: STT(22, val); break;
+	      case 23: STT(23, val); break;
+	      case 24: STT(24, val); break;
+	      case 25: STT(25, val); break;
+	      case 26: STT(26, val); break;
+	      case 27: STT(27, val); break;
+	      case 28: STT(28, val); break;
+	      case 29: STT(29, val); break;
+	      case 30: STT(30, val); break;
+	      case 31: STT(31, val); break;
+	      default: return 0;
+	}
+	return val;
+}
+EXPORT_SYMBOL(alpha_read_fp_reg);
+
+#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
+#define LDT(reg,val)  asm volatile ("itoft %0,$f"#reg : : "r"(val));
+#else
+#define LDT(reg,val)  asm volatile ("ldt $f"#reg",%0" : : "m"(val));
+#endif
+
+void
+alpha_write_fp_reg (unsigned long reg, unsigned long val)
+{
+	switch (reg) {
+	      case  0: LDT( 0, val); break;
+	      case  1: LDT( 1, val); break;
+	      case  2: LDT( 2, val); break;
+	      case  3: LDT( 3, val); break;
+	      case  4: LDT( 4, val); break;
+	      case  5: LDT( 5, val); break;
+	      case  6: LDT( 6, val); break;
+	      case  7: LDT( 7, val); break;
+	      case  8: LDT( 8, val); break;
+	      case  9: LDT( 9, val); break;
+	      case 10: LDT(10, val); break;
+	      case 11: LDT(11, val); break;
+	      case 12: LDT(12, val); break;
+	      case 13: LDT(13, val); break;
+	      case 14: LDT(14, val); break;
+	      case 15: LDT(15, val); break;
+	      case 16: LDT(16, val); break;
+	      case 17: LDT(17, val); break;
+	      case 18: LDT(18, val); break;
+	      case 19: LDT(19, val); break;
+	      case 20: LDT(20, val); break;
+	      case 21: LDT(21, val); break;
+	      case 22: LDT(22, val); break;
+	      case 23: LDT(23, val); break;
+	      case 24: LDT(24, val); break;
+	      case 25: LDT(25, val); break;
+	      case 26: LDT(26, val); break;
+	      case 27: LDT(27, val); break;
+	      case 28: LDT(28, val); break;
+	      case 29: LDT(29, val); break;
+	      case 30: LDT(30, val); break;
+	      case 31: LDT(31, val); break;
+	}
+}
+EXPORT_SYMBOL(alpha_write_fp_reg);
+
+#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
+#define STS(reg,val)  asm volatile ("ftois $f"#reg",%0" : "=r"(val));
+#else
+#define STS(reg,val)  asm volatile ("sts $f"#reg",%0" : "=m"(val));
+#endif
+
+unsigned long
+alpha_read_fp_reg_s (unsigned long reg)
+{
+	unsigned long val;
+
+	switch (reg) {
+	      case  0: STS( 0, val); break;
+	      case  1: STS( 1, val); break;
+	      case  2: STS( 2, val); break;
+	      case  3: STS( 3, val); break;
+	      case  4: STS( 4, val); break;
+	      case  5: STS( 5, val); break;
+	      case  6: STS( 6, val); break;
+	      case  7: STS( 7, val); break;
+	      case  8: STS( 8, val); break;
+	      case  9: STS( 9, val); break;
+	      case 10: STS(10, val); break;
+	      case 11: STS(11, val); break;
+	      case 12: STS(12, val); break;
+	      case 13: STS(13, val); break;
+	      case 14: STS(14, val); break;
+	      case 15: STS(15, val); break;
+	      case 16: STS(16, val); break;
+	      case 17: STS(17, val); break;
+	      case 18: STS(18, val); break;
+	      case 19: STS(19, val); break;
+	      case 20: STS(20, val); break;
+	      case 21: STS(21, val); break;
+	      case 22: STS(22, val); break;
+	      case 23: STS(23, val); break;
+	      case 24: STS(24, val); break;
+	      case 25: STS(25, val); break;
+	      case 26: STS(26, val); break;
+	      case 27: STS(27, val); break;
+	      case 28: STS(28, val); break;
+	      case 29: STS(29, val); break;
+	      case 30: STS(30, val); break;
+	      case 31: STS(31, val); break;
+	      default: return 0;
+	}
+	return val;
+}
+EXPORT_SYMBOL(alpha_read_fp_reg_s);
+
+#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
+#define LDS(reg,val)  asm volatile ("itofs %0,$f"#reg : : "r"(val));
+#else
+#define LDS(reg,val)  asm volatile ("lds $f"#reg",%0" : : "m"(val));
+#endif
+
+void
+alpha_write_fp_reg_s (unsigned long reg, unsigned long val)
+{
+	switch (reg) {
+	      case  0: LDS( 0, val); break;
+	      case  1: LDS( 1, val); break;
+	      case  2: LDS( 2, val); break;
+	      case  3: LDS( 3, val); break;
+	      case  4: LDS( 4, val); break;
+	      case  5: LDS( 5, val); break;
+	      case  6: LDS( 6, val); break;
+	      case  7: LDS( 7, val); break;
+	      case  8: LDS( 8, val); break;
+	      case  9: LDS( 9, val); break;
+	      case 10: LDS(10, val); break;
+	      case 11: LDS(11, val); break;
+	      case 12: LDS(12, val); break;
+	      case 13: LDS(13, val); break;
+	      case 14: LDS(14, val); break;
+	      case 15: LDS(15, val); break;
+	      case 16: LDS(16, val); break;
+	      case 17: LDS(17, val); break;
+	      case 18: LDS(18, val); break;
+	      case 19: LDS(19, val); break;
+	      case 20: LDS(20, val); break;
+	      case 21: LDS(21, val); break;
+	      case 22: LDS(22, val); break;
+	      case 23: LDS(23, val); break;
+	      case 24: LDS(24, val); break;
+	      case 25: LDS(25, val); break;
+	      case 26: LDS(26, val); break;
+	      case 27: LDS(27, val); break;
+	      case 28: LDS(28, val); break;
+	      case 29: LDS(29, val); break;
+	      case 30: LDS(30, val); break;
+	      case 31: LDS(31, val); break;
+	}
+}
+EXPORT_SYMBOL(alpha_write_fp_reg_s);
diff --git a/arch/alpha/lib/memchr.S b/arch/alpha/lib/memchr.S
new file mode 100644
index 0000000000..c13d3eca2e
--- /dev/null
+++ b/arch/alpha/lib/memchr.S
@@ -0,0 +1,165 @@
+/* Copyright (C) 1996 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David Mosberger (davidm@cs.arizona.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* Finds characters in a memory area.  Optimized for the Alpha:
+
+      - memory accessed as aligned quadwords only
+      - uses cmpbge to compare 8 bytes in parallel
+      - does binary search to find 0 byte in last
+        quadword (HAKMEM needed 12 instructions to
+        do this instead of the 9 instructions that
+        binary search needs).
+
+For correctness consider that:
+
+      - only minimum number of quadwords may be accessed
+      - the third argument is an unsigned long
+*/
+#include <asm/export.h>
+        .set noreorder
+        .set noat
+
+	.globl memchr
+	.ent memchr
+memchr:
+	.frame $30,0,$26,0
+	.prologue 0
+
+	# Hack -- if someone passes in (size_t)-1, hoping to just
+	# search til the end of the address space, we will overflow
+	# below when we find the address of the last byte.  Given
+	# that we will never have a 56-bit address space, cropping
+	# the length is the easiest way to avoid trouble.
+	zap	$18, 0x80, $5	#-e0	:
+
+	beq	$18, $not_found	# .. e1 :
+        ldq_u   $1, 0($16)	# e1	: load first quadword
+	insbl	$17, 1, $2	# .. e0 : $2 = 000000000000ch00
+	and	$17, 0xff, $17	#-e0    : $17 = 00000000000000ch
+	cmpult	$18, 9, $4	# .. e1 :
+	or	$2, $17, $17	# e0    : $17 = 000000000000chch
+        lda     $3, -1($31)	# .. e1 :
+	sll	$17, 16, $2	#-e0    : $2 = 00000000chch0000
+	addq	$16, $5, $5	# .. e1 :
+	or	$2, $17, $17	# e1    : $17 = 00000000chchchch
+	unop			#	:
+	sll	$17, 32, $2	#-e0    : $2 = chchchch00000000
+	or	$2, $17, $17	# e1	: $17 = chchchchchchchch
+	extql	$1, $16, $7	# e0    : 
+	beq	$4, $first_quad	# .. e1 :
+
+	ldq_u	$6, -1($5)	#-e1	: eight or less bytes to search
+	extqh	$6, $16, $6	# .. e0 :
+	mov	$16, $0		# e0	:
+	or	$7, $6, $1	# .. e1 : $1 = quadword starting at $16
+
+	# Deal with the case where at most 8 bytes remain to be searched
+	# in $1.  E.g.:
+	#	$18 = 6
+	#	$1 = ????c6c5c4c3c2c1
+$last_quad:
+	negq	$18, $6		#-e0	:
+        xor	$17, $1, $1	# .. e1 :
+	srl	$3, $6, $6	# e0    : $6 = mask of $18 bits set
+        cmpbge  $31, $1, $2	# .. e1 :
+	and	$2, $6, $2	#-e0	:
+        beq     $2, $not_found	# .. e1 :
+
+$found_it:
+	# Now, determine which byte matched:
+        negq    $2, $3		# e0	:
+        and     $2, $3, $2	# e1	:
+
+        and     $2, 0x0f, $1	#-e0	:
+        addq    $0, 4, $3	# .. e1 :
+        cmoveq  $1, $3, $0	# e0	:
+
+        addq    $0, 2, $3	# .. e1 :
+        and     $2, 0x33, $1	#-e0	:
+        cmoveq  $1, $3, $0	# .. e1 :
+
+        and     $2, 0x55, $1	# e0	:
+        addq    $0, 1, $3	# .. e1 :
+        cmoveq  $1, $3, $0	#-e0	:
+
+$done:	ret			# .. e1 :
+
+	# Deal with the case where $18 > 8 bytes remain to be
+	# searched.  $16 may not be aligned.
+	.align 4
+$first_quad:
+	andnot	$16, 0x7, $0	#-e1	:
+        insqh   $3, $16, $2	# .. e0	: $2 = 0000ffffffffffff ($16<0:2> ff)
+        xor	$1, $17, $1	# e0	:
+	or	$1, $2, $1	# e1	: $1 = ====ffffffffffff
+        cmpbge  $31, $1, $2	#-e0	:
+        bne     $2, $found_it	# .. e1 :
+
+	# At least one byte left to process.
+
+	ldq	$1, 8($0)	# e0	:
+	subq	$5, 1, $18	# .. e1 :
+	addq	$0, 8, $0	#-e0	:
+
+	# Make $18 point to last quad to be accessed (the
+	# last quad may or may not be partial).
+
+	andnot	$18, 0x7, $18	# .. e1 :
+	cmpult	$0, $18, $2	# e0	:
+	beq	$2, $final	# .. e1 :
+
+	# At least two quads remain to be accessed.
+
+	subq	$18, $0, $4	#-e0	: $4 <- nr quads to be processed
+	and	$4, 8, $4	# e1	: odd number of quads?
+	bne	$4, $odd_quad_count # e1 :
+
+	# At least three quads remain to be accessed
+
+	mov	$1, $4		# e0	: move prefetched value to correct reg
+
+	.align	4
+$unrolled_loop:
+	ldq	$1, 8($0)	#-e0	: prefetch $1
+	xor	$17, $4, $2	# .. e1 :
+	cmpbge	$31, $2, $2	# e0	:
+	bne	$2, $found_it	# .. e1 :
+
+	addq	$0, 8, $0	#-e0	:
+$odd_quad_count:
+	xor	$17, $1, $2	# .. e1 :
+	ldq	$4, 8($0)	# e0	: prefetch $4
+	cmpbge	$31, $2, $2	# .. e1 :
+	addq	$0, 8, $6	#-e0	:
+	bne	$2, $found_it	# .. e1	:
+
+	cmpult	$6, $18, $6	# e0	:
+	addq	$0, 8, $0	# .. e1 :
+	bne	$6, $unrolled_loop #-e1 :
+
+	mov	$4, $1		# e0	: move prefetched value into $1
+$final:	subq	$5, $0, $18	# .. e1	: $18 <- number of bytes left to do
+	bne	$18, $last_quad	# e1	:
+
+$not_found:
+	mov	$31, $0		#-e0	:
+	ret			# .. e1 :
+
+        .end memchr
+	EXPORT_SYMBOL(memchr)
diff --git a/arch/alpha/lib/memcpy.c b/arch/alpha/lib/memcpy.c
new file mode 100644
index 0000000000..cbac3dc6d9
--- /dev/null
+++ b/arch/alpha/lib/memcpy.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/lib/memcpy.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/*
+ * This is a reasonably optimized memcpy() routine.
+ */
+
+/*
+ * Note that the C code is written to be optimized into good assembly. However,
+ * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
+ * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
+ * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+
+/*
+ * This should be done in one go with ldq_u*2/mask/stq_u. Do it
+ * with a macro so that we can fix it up later..
+ */
+#define ALIGN_DEST_TO8_UP(d,s,n) \
+	while (d & 7) { \
+		if (n <= 0) return; \
+		n--; \
+		*(char *) d = *(char *) s; \
+		d++; s++; \
+	}
+#define ALIGN_DEST_TO8_DN(d,s,n) \
+	while (d & 7) { \
+		if (n <= 0) return; \
+		n--; \
+		d--; s--; \
+		*(char *) d = *(char *) s; \
+	}
+
+/*
+ * This should similarly be done with ldq_u*2/mask/stq. The destination
+ * is aligned, but we don't fill in a full quad-word
+ */
+#define DO_REST_UP(d,s,n) \
+	while (n > 0) { \
+		n--; \
+		*(char *) d = *(char *) s; \
+		d++; s++; \
+	}
+#define DO_REST_DN(d,s,n) \
+	while (n > 0) { \
+		n--; \
+		d--; s--; \
+		*(char *) d = *(char *) s; \
+	}
+
+/*
+ * This should be done with ldq/mask/stq. The source and destination are
+ * aligned, but we don't fill in a full quad-word
+ */
+#define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
+#define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
+
+/*
+ * This does unaligned memory copies. We want to avoid storing to
+ * an unaligned address, as that would do a read-modify-write cycle.
+ * We also want to avoid double-reading the unaligned reads.
+ *
+ * Note the ordering to try to avoid load (and address generation) latencies.
+ */
+static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
+					  long n)
+{
+	ALIGN_DEST_TO8_UP(d,s,n);
+	n -= 8;			/* to avoid compare against 8 in the loop */
+	if (n >= 0) {
+		unsigned long low_word, high_word;
+		__asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
+		do {
+			unsigned long tmp;
+			__asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
+			n -= 8;
+			__asm__("extql %1,%2,%0"
+				:"=r" (low_word)
+				:"r" (low_word), "r" (s));
+			__asm__("extqh %1,%2,%0"
+				:"=r" (tmp)
+				:"r" (high_word), "r" (s));
+			s += 8;
+			*(unsigned long *) d = low_word | tmp;
+			d += 8;
+			low_word = high_word;
+		} while (n >= 0);
+	}
+	n += 8;
+	DO_REST_UP(d,s,n);
+}
+
+static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
+					  long n)
+{
+	/* I don't understand AXP assembler well enough for this. -Tim */
+	s += n;
+	d += n;
+	while (n--)
+		* (char *) --d = * (char *) --s;
+}
+
+/*
+ * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
+ * for the load-store. I don't know why, but it would seem that using a floating
+ * point register for the move seems to slow things down (very small difference,
+ * though).
+ *
+ * Note the ordering to try to avoid load (and address generation) latencies.
+ */
+static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
+					long n)
+{
+	ALIGN_DEST_TO8_UP(d,s,n);
+	n -= 8;
+	while (n >= 0) {
+		unsigned long tmp;
+		__asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
+		n -= 8;
+		s += 8;
+		*(unsigned long *) d = tmp;
+		d += 8;
+	}
+	n += 8;
+	DO_REST_ALIGNED_UP(d,s,n);
+}
+static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
+					long n)
+{
+	s += n;
+	d += n;
+	ALIGN_DEST_TO8_DN(d,s,n);
+	n -= 8;
+	while (n >= 0) {
+		unsigned long tmp;
+		s -= 8;
+		__asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
+		n -= 8;
+		d -= 8;
+		*(unsigned long *) d = tmp;
+	}
+	n += 8;
+	DO_REST_ALIGNED_DN(d,s,n);
+}
+
+void * memcpy(void * dest, const void *src, size_t n)
+{
+	if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
+		__memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
+				     n);
+		return dest;
+	}
+	__memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
+	return dest;
+}
+EXPORT_SYMBOL(memcpy);
diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S
new file mode 100644
index 0000000000..42d1922d0e
--- /dev/null
+++ b/arch/alpha/lib/memmove.S
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/memmove.S
+ *
+ * Barely optimized memmove routine for Alpha EV5.
+ *
+ * This is hand-massaged output from the original memcpy.c.  We defer to
+ * memcpy whenever possible; the backwards copy loops are not unrolled.
+ */
+#include <asm/export.h>        
+	.set noat
+	.set noreorder
+	.text
+
+	.align 4
+	.globl memmove
+	.ent memmove
+memmove:
+	ldgp $29, 0($27)
+	unop
+	nop
+	.prologue 1
+
+	addq $16,$18,$4
+	addq $17,$18,$5
+	cmpule $4,$17,$1		/*  dest + n <= src  */
+	cmpule $5,$16,$2		/*  dest >= src + n  */
+
+	bis $1,$2,$1
+	mov $16,$0
+	xor $16,$17,$2
+	bne $1,memcpy			!samegp
+
+	and $2,7,$2			/* Test for src/dest co-alignment.  */
+	and $16,7,$1
+	cmpule $16,$17,$3
+	bne $3,$memmove_up		/* dest < src */
+
+	and $4,7,$1
+	bne $2,$misaligned_dn
+	unop
+	beq $1,$skip_aligned_byte_loop_head_dn
+
+$aligned_byte_loop_head_dn:
+	lda $4,-1($4)
+	lda $5,-1($5)
+	unop
+	ble $18,$egress
+
+	ldq_u $3,0($5)
+	ldq_u $2,0($4)
+	lda $18,-1($18)
+	extbl $3,$5,$1
+
+	insbl $1,$4,$1
+	mskbl $2,$4,$2
+	bis $1,$2,$1
+	and $4,7,$6
+
+	stq_u $1,0($4)
+	bne $6,$aligned_byte_loop_head_dn
+
+$skip_aligned_byte_loop_head_dn:
+	lda $18,-8($18)
+	blt $18,$skip_aligned_word_loop_dn
+
+$aligned_word_loop_dn:
+	ldq $1,-8($5)
+	nop
+	lda $5,-8($5)
+	lda $18,-8($18)
+
+	stq $1,-8($4)
+	nop
+	lda $4,-8($4)
+	bge $18,$aligned_word_loop_dn
+
+$skip_aligned_word_loop_dn:
+	lda $18,8($18)
+	bgt $18,$byte_loop_tail_dn
+	unop
+	ret $31,($26),1
+
+	.align 4
+$misaligned_dn:
+	nop
+	fnop
+	unop
+	beq $18,$egress
+
+$byte_loop_tail_dn:
+	ldq_u $3,-1($5)
+	ldq_u $2,-1($4)
+	lda $5,-1($5)
+	lda $4,-1($4)
+
+	lda $18,-1($18)
+	extbl $3,$5,$1
+	insbl $1,$4,$1
+	mskbl $2,$4,$2
+
+	bis $1,$2,$1
+	stq_u $1,0($4)
+	bgt $18,$byte_loop_tail_dn
+	br $egress
+
+$memmove_up:
+	mov $16,$4
+	mov $17,$5
+	bne $2,$misaligned_up
+	beq $1,$skip_aligned_byte_loop_head_up
+
+$aligned_byte_loop_head_up:
+	unop
+	ble $18,$egress
+	ldq_u $3,0($5)
+	ldq_u $2,0($4)
+
+	lda $18,-1($18)
+	extbl $3,$5,$1
+	insbl $1,$4,$1
+	mskbl $2,$4,$2
+
+	bis $1,$2,$1
+	lda $5,1($5)
+	stq_u $1,0($4)
+	lda $4,1($4)
+
+	and $4,7,$6
+	bne $6,$aligned_byte_loop_head_up
+
+$skip_aligned_byte_loop_head_up:
+	lda $18,-8($18)
+	blt $18,$skip_aligned_word_loop_up
+
+$aligned_word_loop_up:
+	ldq $1,0($5)
+	nop
+	lda $5,8($5)
+	lda $18,-8($18)
+
+	stq $1,0($4)
+	nop
+	lda $4,8($4)
+	bge $18,$aligned_word_loop_up
+
+$skip_aligned_word_loop_up:
+	lda $18,8($18)
+	bgt $18,$byte_loop_tail_up
+	unop
+	ret $31,($26),1
+
+	.align 4
+$misaligned_up:
+	nop
+	fnop
+	unop
+	beq $18,$egress
+
+$byte_loop_tail_up:
+	ldq_u $3,0($5)
+	ldq_u $2,0($4)
+	lda $18,-1($18)
+	extbl $3,$5,$1
+
+	insbl $1,$4,$1
+	mskbl $2,$4,$2
+	bis $1,$2,$1
+	stq_u $1,0($4)
+
+	lda $5,1($5)
+	lda $4,1($4)
+	nop
+	bgt $18,$byte_loop_tail_up
+
+$egress:
+	ret $31,($26),1
+	nop
+	nop
+	nop
+
+	.end memmove
+	EXPORT_SYMBOL(memmove)
diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S
new file mode 100644
index 0000000000..00393e30df
--- /dev/null
+++ b/arch/alpha/lib/memset.S
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/arch/alpha/lib/memset.S
+ *
+ * This is an efficient (and small) implementation of the C library "memset()"
+ * function for the alpha.
+ *
+ *	(C) Copyright 1996 Linus Torvalds
+ *
+ * This routine is "moral-ware": you are free to use it any way you wish, and
+ * the only obligation I put on you is a moral one: if you make any improvements
+ * to the routine, please send me your improvements for me to use similarly.
+ *
+ * The scheduling comments are according to the EV5 documentation (and done by 
+ * hand, so they might well be incorrect, please do tell me about it..)
+ */
+#include <asm/export.h>
+	.set noat
+	.set noreorder
+.text
+	.globl memset
+	.globl __memset
+	.globl ___memset
+	.globl __memset16
+	.globl __constant_c_memset
+
+	.ent ___memset
+.align 5
+___memset:
+	.frame $30,0,$26,0
+	.prologue 0
+
+	and $17,255,$1		/* E1 */
+	insbl $17,1,$17		/* .. E0 */
+	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
+	sll $17,16,$1		/* E1 (p-c latency, next cycle) */
+
+	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
+	sll $17,32,$1		/* E1 (p-c latency, next cycle) */
+	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
+	ldq_u $31,0($30)	/* .. E1 */
+
+.align 5
+__constant_c_memset:
+	addq $18,$16,$6		/* E0 */
+	bis $16,$16,$0		/* .. E1 */
+	xor $16,$6,$1		/* E0 */
+	ble $18,end		/* .. E1 */
+
+	bic $1,7,$1		/* E0 */
+	beq $1,within_one_quad	/* .. E1 (note EV5 zero-latency forwarding) */
+	and $16,7,$3		/* E0 */
+	beq $3,aligned		/* .. E1 (note EV5 zero-latency forwarding) */
+
+	ldq_u $4,0($16)		/* E0 */
+	bis $16,$16,$5		/* .. E1 */
+	insql $17,$16,$2	/* E0 */
+	subq $3,8,$3		/* .. E1 */
+
+	addq $18,$3,$18		/* E0	$18 is new count ($3 is negative) */
+	mskql $4,$16,$4		/* .. E1 (and possible load stall) */
+	subq $16,$3,$16		/* E0 	$16 is new aligned destination */
+	bis $2,$4,$1		/* .. E1 */
+
+	bis $31,$31,$31		/* E0 */
+	ldq_u $31,0($30)	/* .. E1 */
+	stq_u $1,0($5)		/* E0 */
+	bis $31,$31,$31		/* .. E1 */
+
+.align 4
+aligned:
+	sra $18,3,$3		/* E0 */
+	and $18,7,$18		/* .. E1 */
+	bis $16,$16,$5		/* E0 */
+	beq $3,no_quad		/* .. E1 */
+
+.align 3
+loop:
+	stq $17,0($5)		/* E0 */
+	subq $3,1,$3		/* .. E1 */
+	addq $5,8,$5		/* E0 */
+	bne $3,loop		/* .. E1 */
+
+no_quad:
+	bis $31,$31,$31		/* E0 */
+	beq $18,end		/* .. E1 */
+	ldq $7,0($5)		/* E0 */
+	mskqh $7,$6,$2		/* .. E1 (and load stall) */
+
+	insqh $17,$6,$4		/* E0 */
+	bis $2,$4,$1		/* .. E1 */
+	stq $1,0($5)		/* E0 */
+	ret $31,($26),1		/* .. E1 */
+
+.align 3
+within_one_quad:
+	ldq_u $1,0($16)		/* E0 */
+	insql $17,$16,$2	/* E1 */
+	mskql $1,$16,$4		/* E0 (after load stall) */
+	bis $2,$4,$2		/* E0 */
+
+	mskql $2,$6,$4		/* E0 */
+	mskqh $1,$6,$2		/* .. E1 */
+	bis $2,$4,$1		/* E0 */
+	stq_u $1,0($16)		/* E0 */
+
+end:
+	ret $31,($26),1		/* E1 */
+	.end ___memset
+EXPORT_SYMBOL(___memset)
+EXPORT_SYMBOL(__constant_c_memset)
+
+	.align 5
+	.ent __memset16
+__memset16:
+	.prologue 0
+
+	inswl $17,0,$1		/* E0 */
+	inswl $17,2,$2		/* E0 */
+	inswl $17,4,$3		/* E0 */
+	or $1,$2,$1		/* .. E1 */
+	inswl $17,6,$4		/* E0 */
+	or $1,$3,$1		/* .. E1 */
+	or $1,$4,$17		/* E0 */
+	br __constant_c_memset	/* .. E1 */
+
+	.end __memset16
+EXPORT_SYMBOL(__memset16)
+
+memset = ___memset
+__memset = ___memset
+	EXPORT_SYMBOL(memset)
+	EXPORT_SYMBOL(__memset)
diff --git a/arch/alpha/lib/srm_printk.c b/arch/alpha/lib/srm_printk.c
new file mode 100644
index 0000000000..6276eed762
--- /dev/null
+++ b/arch/alpha/lib/srm_printk.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	arch/alpha/lib/srm_printk.c
+ */
+
+#include <linux/kernel.h>
+#include <asm/console.h>
+
+long
+srm_printk(const char *fmt, ...)
+{
+	static char buf[1024];
+	va_list args;
+	long len, num_lf;
+	char *src, *dst;
+
+	va_start(args, fmt);
+	len = vsprintf(buf, fmt, args);
+	va_end(args);
+
+	/* count number of linefeeds in string: */
+
+	num_lf = 0;
+	for (src = buf; *src; ++src) {
+		if (*src == '\n') {
+			++num_lf;
+		}
+	}
+
+	if (num_lf) {
+		/* expand each linefeed into carriage-return/linefeed: */
+		for (dst = src + num_lf; src >= buf; ) {
+			if (*src == '\n') {
+				*dst-- = '\r';
+			}
+			*dst-- = *src--;
+		}
+	}
+
+	srm_puts(buf, num_lf+len);	
+        return len;
+}
diff --git a/arch/alpha/lib/srm_puts.c b/arch/alpha/lib/srm_puts.c
new file mode 100644
index 0000000000..df7991f6fc
--- /dev/null
+++ b/arch/alpha/lib/srm_puts.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	arch/alpha/lib/srm_puts.c
+ */
+
+#include <linux/string.h>
+#include <asm/console.h>
+
+long
+srm_puts(const char *str, long len)
+{
+	long remaining, written;
+
+	if (!callback_init_done)
+		return len;
+
+	for (remaining = len; remaining > 0; remaining -= written)
+	{
+		written = callback_puts(0, str, remaining);
+		written &= 0xffffffff;
+		str += written;
+	}
+	return len;
+}
diff --git a/arch/alpha/lib/stacktrace.c b/arch/alpha/lib/stacktrace.c
new file mode 100644
index 0000000000..62454a7810
--- /dev/null
+++ b/arch/alpha/lib/stacktrace.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+
+typedef unsigned int instr;
+
+#define MAJOR_OP	0xfc000000
+#define LDA_OP		0x20000000
+#define STQ_OP		0xb4000000
+#define BR_OP		0xc0000000
+
+#define STK_ALLOC_1	0x23de8000 /* lda $30,-X($30) */
+#define STK_ALLOC_1M	0xffff8000
+#define STK_ALLOC_2	0x43c0153e /* subq $30,X,$30 */
+#define STK_ALLOC_2M	0xffe01fff
+
+#define MEM_REG		0x03e00000
+#define MEM_BASE	0x001f0000
+#define MEM_OFF		0x0000ffff
+#define MEM_OFF_SIGN	0x00008000
+#define	BASE_SP		0x001e0000
+
+#define STK_ALLOC_MATCH(INSTR)			\
+  (((INSTR) & STK_ALLOC_1M) == STK_ALLOC_1	\
+   || ((INSTR) & STK_ALLOC_2M) == STK_ALLOC_2)
+#define STK_PUSH_MATCH(INSTR) \
+  (((INSTR) & (MAJOR_OP | MEM_BASE | MEM_OFF_SIGN)) == (STQ_OP | BASE_SP))
+#define MEM_OP_OFFSET(INSTR) \
+  (((long)((INSTR) & MEM_OFF) << 48) >> 48)
+#define MEM_OP_REG(INSTR) \
+  (((INSTR) & MEM_REG) >> 22)
+
+/* Branches, jumps, PAL calls, and illegal opcodes end a basic block. */
+#define BB_END(INSTR)						\
+  (((instr)(INSTR) >= BR_OP) | ((instr)(INSTR) < LDA_OP) |	\
+   ((((instr)(INSTR) ^ 0x60000000) < 0x20000000) &		\
+    (((instr)(INSTR) & 0x0c000000) != 0)))
+
+#define IS_KERNEL_TEXT(PC) ((unsigned long)(PC) > START_ADDR)
+
+static char reg_name[][4] = {
+	"v0 ", "t0 ", "t1 ", "t2 ", "t3 ", "t4 ", "t5 ", "t6 ", "t7 ",
+	"s0 ", "s1 ", "s2 ", "s3 ", "s4 ", "s5 ", "s6 ", "a0 ", "a1 ",
+	"a2 ", "a3 ", "a4 ", "a5 ", "t8 ", "t9 ", "t10", "t11", "ra ",
+	"pv ", "at ", "gp ", "sp ", "0"
+};
+
+
+static instr *
+display_stored_regs(instr * pro_pc, unsigned char * sp)
+{
+	instr * ret_pc = 0;
+	int reg;
+	unsigned long value;
+
+	printk("Prologue [<%p>], Frame %p:\n", pro_pc, sp);
+	while (!BB_END(*pro_pc))
+		if (STK_PUSH_MATCH(*pro_pc)) {
+			reg = (*pro_pc & MEM_REG) >> 21;
+			value = *(unsigned long *)(sp + (*pro_pc & MEM_OFF));
+			if (reg == 26)
+				ret_pc = (instr *)value;
+			printk("\t\t%s / 0x%016lx\n", reg_name[reg], value);
+		}
+	return ret_pc;
+}
+
+static instr *
+seek_prologue(instr * pc)
+{
+	while (!STK_ALLOC_MATCH(*pc))
+		--pc;
+	while (!BB_END(*(pc - 1)))
+		--pc;
+	return pc;
+}
+
+static long
+stack_increment(instr * prologue_pc)
+{
+	while (!STK_ALLOC_MATCH(*prologue_pc))
+		++prologue_pc;
+
+	/* Count the bytes allocated. */
+	if ((*prologue_pc & STK_ALLOC_1M) == STK_ALLOC_1M)
+		return -(((long)(*prologue_pc) << 48) >> 48);
+	else
+		return (*prologue_pc >> 13) & 0xff;
+}
+
+void
+stacktrace(void)
+{
+	instr * ret_pc;
+	instr * prologue = (instr *)stacktrace;
+	register unsigned char * sp __asm__ ("$30");
+
+	printk("\tstack trace:\n");
+	do {
+		ret_pc = display_stored_regs(prologue, sp);
+		sp += stack_increment(prologue);
+		prologue = seek_prologue(ret_pc);
+	} while (IS_KERNEL_TEXT(ret_pc));
+}
diff --git a/arch/alpha/lib/strcat.S b/arch/alpha/lib/strcat.S
new file mode 100644
index 0000000000..055877dccd
--- /dev/null
+++ b/arch/alpha/lib/strcat.S
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/strcat.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Append a null-terminated string from SRC to DST.
+ */
+#include <asm/export.h>
+
+	.text
+
+	.align 3
+	.globl strcat
+	.ent strcat
+strcat:
+	.frame $30, 0, $26
+	.prologue 0
+
+	mov	$16, $0		# set up return value
+
+	/* Find the end of the string.  */
+
+	ldq_u   $1, 0($16)	# load first quadword (a0 may be misaligned)
+	lda     $2, -1
+	insqh   $2, $16, $2
+	andnot  $16, 7, $16
+	or      $2, $1, $1
+	cmpbge  $31, $1, $2	# bits set iff byte == 0
+	bne     $2, $found
+
+$loop:	ldq     $1, 8($16)
+	addq    $16, 8, $16
+	cmpbge  $31, $1, $2
+	beq     $2, $loop
+
+$found:	negq    $2, $3		# clear all but least set bit
+	and     $2, $3, $2
+
+	and     $2, 0xf0, $3	# binary search for that set bit
+	and	$2, 0xcc, $4
+	and	$2, 0xaa, $5
+	cmovne	$3, 4, $3
+	cmovne	$4, 2, $4
+	cmovne	$5, 1, $5
+	addq	$3, $4, $3
+	addq	$16, $5, $16
+	addq	$16, $3, $16
+
+	/* Now do the append.  */
+
+	mov	$26, $23
+	br	__stxcpy
+
+	.end strcat
+EXPORT_SYMBOL(strcat);
diff --git a/arch/alpha/lib/strchr.S b/arch/alpha/lib/strchr.S
new file mode 100644
index 0000000000..17871dd002
--- /dev/null
+++ b/arch/alpha/lib/strchr.S
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/strchr.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Return the address of a given character within a null-terminated
+ * string, or null if it is not found.
+ */
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+	.set noreorder
+	.set noat
+
+	.align 3
+	.globl strchr
+	.ent strchr
+strchr:
+	.frame sp, 0, ra
+	.prologue 0
+
+	zapnot	a1, 1, a1	# e0    : zero extend the search character
+	ldq_u   t0, 0(a0)	# .. e1 : load first quadword
+	sll	a1, 8, t5	# e0    : replicate the search character
+	andnot  a0, 7, v0	# .. e1 : align our loop pointer
+	or	t5, a1, a1	# e0    :
+	lda	t4, -1		# .. e1 : build garbage mask
+	sll	a1, 16, t5	# e0    :
+	cmpbge  zero, t0, t2	# .. e1 : bits set iff byte == zero
+	mskqh	t4, a0, t4	# e0    :
+	or	t5, a1, a1	# .. e1 :
+	sll	a1, 32, t5	# e0    :
+	cmpbge	zero, t4, t4	# .. e1 : bits set iff byte is garbage
+	or	t5, a1, a1	# e0    :
+	xor	t0, a1, t1	# .. e1 : make bytes == c zero
+	cmpbge  zero, t1, t3	# e0    : bits set iff byte == c
+	or	t2, t3, t0	# e1    : bits set iff char match or zero match
+	andnot	t0, t4, t0	# e0    : clear garbage bits
+	bne	t0, $found	# .. e1 (zdb)
+
+$loop:	ldq	t0, 8(v0)	# e0    :
+	addq	v0, 8, v0	# .. e1 :
+	nop			# e0    :
+	xor	t0, a1, t1	# .. e1 (ev5 data stall)
+	cmpbge	zero, t0, t2	# e0    : bits set iff byte == 0
+	cmpbge	zero, t1, t3	# .. e1 : bits set iff byte == c
+	or	t2, t3, t0	# e0    :
+	beq	t0, $loop	# .. e1 (zdb)
+
+$found:	negq    t0, t1		# e0    : clear all but least set bit
+	and     t0, t1, t0	# e1 (stall)
+
+	and	t0, t3, t1	# e0    : bit set iff byte was the char
+	beq	t1, $retnull	# .. e1 (zdb)
+
+	and     t0, 0xf0, t2	# e0    : binary search for that set bit
+	and	t0, 0xcc, t3	# .. e1 :
+	and	t0, 0xaa, t4	# e0    :
+	cmovne	t2, 4, t2	# .. e1 :
+	cmovne	t3, 2, t3	# e0    :
+	cmovne	t4, 1, t4	# .. e1 :
+	addq	t2, t3, t2	# e0    :
+	addq	v0, t4, v0	# .. e1 :
+	addq	v0, t2, v0	# e0    :
+	ret			# .. e1 :
+
+$retnull:
+	mov	zero, v0	# e0    :
+	ret			# .. e1 :
+
+	.end strchr
+	EXPORT_SYMBOL(strchr)
diff --git a/arch/alpha/lib/strcpy.S b/arch/alpha/lib/strcpy.S
new file mode 100644
index 0000000000..cb74ad23a9
--- /dev/null
+++ b/arch/alpha/lib/strcpy.S
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/strcpy.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Copy a null-terminated string from SRC to DST.  Return a pointer
+ * to the null-terminator in the source.
+ */
+#include <asm/export.h>
+	.text
+
+	.align 3
+	.globl strcpy
+	.ent strcpy
+strcpy:
+	.frame $30, 0, $26
+	.prologue 0
+
+	mov	$16, $0		# set up return value
+	mov	$26, $23	# set up return address
+	unop
+	br	__stxcpy	# do the copy
+
+	.end strcpy
+	EXPORT_SYMBOL(strcpy)
diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S
new file mode 100644
index 0000000000..dd882fe4d7
--- /dev/null
+++ b/arch/alpha/lib/strlen.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
+ *
+ * Finds length of a 0-terminated string.  Optimized for the
+ * Alpha architecture:
+ *
+ *	- memory accessed as aligned quadwords only
+ *	- uses bcmpge to compare 8 bytes in parallel
+ *	- does binary search to find 0 byte in last
+ *	  quadword (HAKMEM needed 12 instructions to
+ *	  do this instead of the 9 instructions that
+ *	  binary search needs).
+ */
+#include <asm/export.h>
+	.set noreorder
+	.set noat
+
+	.align 3
+
+	.globl	strlen
+	.ent	strlen
+
+strlen:
+	ldq_u	$1, 0($16)	# load first quadword ($16  may be misaligned)
+	lda	$2, -1($31)
+	insqh	$2, $16, $2
+	andnot	$16, 7, $0
+	or	$2, $1, $1
+	cmpbge	$31, $1, $2	# $2  <- bitmask: bit i == 1 <==> i-th byte == 0
+	bne	$2, found
+
+loop:	ldq	$1, 8($0)
+	addq	$0, 8, $0	# addr += 8
+	nop			# helps dual issue last two insns
+	cmpbge	$31, $1, $2
+	beq	$2, loop
+
+found:	blbs	$2, done	# make aligned case fast
+	negq	$2, $3
+	and	$2, $3, $2
+
+	and	$2, 0x0f, $1
+	addq	$0, 4, $3
+	cmoveq	$1, $3, $0
+
+	and	$2, 0x33, $1
+	addq	$0, 2, $3
+	cmoveq	$1, $3, $0
+
+	and	$2, 0x55, $1
+	addq	$0, 1, $3
+	cmoveq	$1, $3, $0
+
+done:	subq	$0, $16, $0
+	ret	$31, ($26)
+
+	.end	strlen
+	EXPORT_SYMBOL(strlen)
diff --git a/arch/alpha/lib/strncat.S b/arch/alpha/lib/strncat.S
new file mode 100644
index 0000000000..522fee3e26
--- /dev/null
+++ b/arch/alpha/lib/strncat.S
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/strncat.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Append no more than COUNT characters from the null-terminated string SRC
+ * to the null-terminated string DST.  Always null-terminate the new DST.
+ *
+ * This differs slightly from the semantics in libc in that we never write
+ * past count, whereas libc may write to count+1.  This follows the generic
+ * implementation in lib/string.c and is, IMHO, more sensible.
+ */
+#include <asm/export.h>
+	.text
+
+	.align 3
+	.globl strncat
+	.ent strncat
+strncat:
+	.frame $30, 0, $26
+	.prologue 0
+
+	mov	$16, $0		# set up return value
+	beq	$18, $zerocount
+
+	/* Find the end of the string.  */
+
+	ldq_u   $1, 0($16)	# load first quadword ($16 may be misaligned)
+	lda     $2, -1($31)
+	insqh   $2, $16, $2
+	andnot  $16, 7, $16
+	or      $2, $1, $1
+	cmpbge  $31, $1, $2	# bits set iff byte == 0
+	bne     $2, $found
+
+$loop:	ldq     $1, 8($16)
+	addq    $16, 8, $16
+	cmpbge  $31, $1, $2
+	beq     $2, $loop
+
+$found:	negq    $2, $3		# clear all but least set bit
+	and     $2, $3, $2
+
+	and     $2, 0xf0, $3	# binary search for that set bit
+	and	$2, 0xcc, $4
+	and	$2, 0xaa, $5
+	cmovne	$3, 4, $3
+	cmovne	$4, 2, $4
+	cmovne	$5, 1, $5
+	addq	$3, $4, $3
+	addq	$16, $5, $16
+	addq	$16, $3, $16
+
+	/* Now do the append.  */
+
+	bsr	$23, __stxncpy
+
+	/* Worry about the null termination.  */
+
+	zapnot	$1, $27, $2	# was last byte a null?
+	bne	$2, 0f
+	ret
+
+0:	cmplt	$27, $24, $2	# did we fill the buffer completely?
+	or	$2, $18, $2
+	bne	$2, 2f
+
+	and	$24, 0x80, $2	# no zero next byte
+	bne	$2, 1f
+
+	/* Here there are bytes left in the current word.  Clear one.  */
+	addq	$24, $24, $24	# end-of-count bit <<= 1
+2:	zap	$1, $24, $1
+	stq_u	$1, 0($16)
+	ret
+
+1:	/* Here we must read the next DST word and clear the first byte.  */
+	ldq_u	$1, 8($16)
+	zap	$1, 1, $1
+	stq_u	$1, 8($16)
+
+$zerocount:
+	ret
+
+	.end strncat
+	EXPORT_SYMBOL(strncat)
diff --git a/arch/alpha/lib/strncpy.S b/arch/alpha/lib/strncpy.S
new file mode 100644
index 0000000000..cc57fad8b7
--- /dev/null
+++ b/arch/alpha/lib/strncpy.S
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/strncpy.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Copy no more than COUNT bytes of the null-terminated string from
+ * SRC to DST.  If SRC does not cover all of COUNT, the balance is
+ * zeroed.
+ *
+ * Or, rather, if the kernel cared about that weird ANSI quirk.  This
+ * version has cropped that bit o' nastiness as well as assuming that
+ * __stxncpy is in range of a branch.
+ */
+#include <asm/export.h>
+	.set noat
+	.set noreorder
+
+	.text
+
+	.align 4
+	.globl strncpy
+	.ent strncpy
+strncpy:
+	.frame $30, 0, $26
+	.prologue 0
+
+	mov	$16, $0		# set return value now
+	beq	$18, $zerolen
+	unop
+	bsr	$23, __stxncpy	# do the work of the copy
+
+	unop
+	bne	$18, $multiword	# do we have full words left?
+	subq	$24, 1, $3	# nope
+	subq	$27, 1, $4
+
+	or	$3, $24, $3	# clear the bits between the last
+	or	$4, $27, $4	# written byte and the last byte in COUNT
+	andnot	$3, $4, $4
+	zap	$1, $4, $1
+
+	stq_u	$1, 0($16)
+	ret
+
+	.align	4
+$multiword:
+	subq	$27, 1, $2	# clear the final bits in the prev word
+	or	$2, $27, $2
+	zapnot	$1, $2, $1
+	subq	$18, 1, $18
+
+	stq_u	$1, 0($16)
+	addq	$16, 8, $16
+	unop
+	beq	$18, 1f
+
+	nop
+	unop
+	nop
+	blbc	$18, 0f
+
+	stq_u	$31, 0($16)	# zero one word
+	subq	$18, 1, $18
+	addq	$16, 8, $16
+	beq	$18, 1f
+
+0:	stq_u	$31, 0($16)	# zero two words
+	subq	$18, 2, $18
+	stq_u	$31, 8($16)
+	addq	$16, 16, $16
+	bne	$18, 0b
+
+1:	ldq_u	$1, 0($16)	# clear the leading bits in the final word
+	subq	$24, 1, $2
+	or	$2, $24, $2
+
+	zap	$1, $2, $1
+	stq_u	$1, 0($16)
+$zerolen:
+	ret
+
+	.end	strncpy
+	EXPORT_SYMBOL(strncpy)
diff --git a/arch/alpha/lib/strrchr.S b/arch/alpha/lib/strrchr.S
new file mode 100644
index 0000000000..7650ba99b7
--- /dev/null
+++ b/arch/alpha/lib/strrchr.S
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/strrchr.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Return the address of the last occurrence of a given character
+ * within a null-terminated string, or null if it is not found.
+ */
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+	.set noreorder
+	.set noat
+
+	.align 3
+	.ent strrchr
+	.globl strrchr
+strrchr:
+	.frame sp, 0, ra
+	.prologue 0
+
+	zapnot	a1, 1, a1	# e0    : zero extend our test character
+	mov	zero, t6	# .. e1 : t6 is last match aligned addr
+	sll	a1, 8, t5	# e0    : replicate our test character
+	mov	zero, t8	# .. e1 : t8 is last match byte compare mask
+	or	t5, a1, a1	# e0    :
+	ldq_u   t0, 0(a0)	# .. e1 : load first quadword
+	sll	a1, 16, t5	# e0    :
+	andnot  a0, 7, v0	# .. e1 : align source addr
+	or	t5, a1, a1	# e0    :
+	lda	t4, -1		# .. e1 : build garbage mask
+	sll	a1, 32, t5	# e0    :
+	cmpbge  zero, t0, t1	# .. e1 : bits set iff byte == zero
+	mskqh	t4, a0, t4	# e0    :
+	or	t5, a1, a1	# .. e1 : character replication complete
+	xor	t0, a1, t2	# e0    : make bytes == c zero
+	cmpbge	zero, t4, t4	# .. e1 : bits set iff byte is garbage
+	cmpbge  zero, t2, t3	# e0    : bits set iff byte == c
+	andnot	t1, t4, t1	# .. e1 : clear garbage from null test
+	andnot	t3, t4, t3	# e0    : clear garbage from char test
+	bne	t1, $eos	# .. e1 : did we already hit the terminator?
+
+	/* Character search main loop */
+$loop:
+	ldq	t0, 8(v0)	# e0    : load next quadword
+	cmovne	t3, v0, t6	# .. e1 : save previous comparisons match
+	cmovne	t3, t3, t8	# e0    :
+	addq	v0, 8, v0	# .. e1 :
+	xor	t0, a1, t2	# e0    :
+	cmpbge	zero, t0, t1	# .. e1 : bits set iff byte == zero
+	cmpbge	zero, t2, t3	# e0    : bits set iff byte == c
+	beq	t1, $loop	# .. e1 : if we havnt seen a null, loop
+
+	/* Mask out character matches after terminator */
+$eos:
+	negq	t1, t4		# e0    : isolate first null byte match
+	and	t1, t4, t4	# e1    :
+	subq	t4, 1, t5	# e0    : build a mask of the bytes up to...
+	or	t4, t5, t4	# e1    : ... and including the null
+
+	and	t3, t4, t3	# e0    : mask out char matches after null
+	cmovne	t3, t3, t8	# .. e1 : save it, if match found
+	cmovne	t3, v0, t6	# e0    :
+
+	/* Locate the address of the last matched character */
+
+	/* Retain the early exit for the ev4 -- the ev5 mispredict penalty
+	   is 5 cycles -- the same as just falling through.  */
+	beq	t8, $retnull	# .. e1 :
+
+	and	t8, 0xf0, t2	# e0    : binary search for the high bit set
+	cmovne	t2, t2, t8	# .. e1 (zdb)
+	cmovne	t2, 4, t2	# e0    :
+	and	t8, 0xcc, t1	# .. e1 :
+	cmovne	t1, t1, t8	# e0    :
+	cmovne	t1, 2, t1	# .. e1 :
+	and	t8, 0xaa, t0	# e0    :
+	cmovne	t0, 1, t0	# .. e1 (zdb)
+	addq	t2, t1, t1	# e0    :
+	addq	t6, t0, v0	# .. e1 : add our aligned base ptr to the mix
+	addq	v0, t1, v0	# e0    :
+	ret			# .. e1 :
+
+$retnull:
+	mov	zero, v0	# e0    :
+	ret			# .. e1 :
+
+	.end strrchr
+	EXPORT_SYMBOL(strrchr)
diff --git a/arch/alpha/lib/stxcpy.S b/arch/alpha/lib/stxcpy.S
new file mode 100644
index 0000000000..58723b0a36
--- /dev/null
+++ b/arch/alpha/lib/stxcpy.S
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/stxcpy.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Copy a null-terminated string from SRC to DST.
+ *
+ * This is an internal routine used by strcpy, stpcpy, and strcat.
+ * As such, it uses special linkage conventions to make implementation
+ * of these public functions more efficient.
+ *
+ * On input:
+ *	t9 = return address
+ *	a0 = DST
+ *	a1 = SRC
+ *
+ * On output:
+ *	t12 = bitmask (with one bit set) indicating the last byte written
+ *	a0  = unaligned address of the last *word* written
+ *
+ * Furthermore, v0, a3-a5, t11, and t12 are untouched.
+ */
+
+#include <asm/regdef.h>
+
+	.set noat
+	.set noreorder
+
+	.text
+
+/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
+   doesn't like putting the entry point for a procedure somewhere in the
+   middle of the procedure descriptor.  Work around this by putting the
+   aligned copy in its own procedure descriptor */
+
+	.ent stxcpy_aligned
+	.align 3
+stxcpy_aligned:
+	.frame sp, 0, t9
+	.prologue 0
+
+	/* On entry to this basic block:
+	   t0 == the first destination word for masking back in
+	   t1 == the first source word.  */
+
+	/* Create the 1st output word and detect 0's in the 1st input word.  */
+	lda	t2, -1		# e1    : build a mask against false zero
+	mskqh	t2, a1, t2	# e0    :   detection in the src word
+	mskqh	t1, a1, t3	# e0    :
+	ornot	t1, t2, t2	# .. e1 :
+	mskql	t0, a1, t0	# e0    : assemble the first output word
+	cmpbge	zero, t2, t8	# .. e1 : bits set iff null found
+	or	t0, t3, t1	# e0    :
+	bne	t8, $a_eos	# .. e1 :
+
+	/* On entry to this basic block:
+	   t0 == the first destination word for masking back in
+	   t1 == a source word not containing a null.  */
+
+$a_loop:
+	stq_u	t1, 0(a0)	# e0    :
+	addq	a0, 8, a0	# .. e1 :
+	ldq_u	t1, 0(a1)	# e0    :
+	addq	a1, 8, a1	# .. e1 :
+	cmpbge	zero, t1, t8	# e0 (stall)
+	beq	t8, $a_loop	# .. e1 (zdb)
+
+	/* Take care of the final (partial) word store.
+	   On entry to this basic block we have:
+	   t1 == the source word containing the null
+	   t8 == the cmpbge mask that found it.  */
+$a_eos:
+	negq	t8, t6		# e0    : find low bit set
+	and	t8, t6, t12	# e1 (stall)
+
+	/* For the sake of the cache, don't read a destination word
+	   if we're not going to need it.  */
+	and	t12, 0x80, t6	# e0    :
+	bne	t6, 1f		# .. e1 (zdb)
+
+	/* We're doing a partial word store and so need to combine
+	   our source and original destination words.  */
+	ldq_u	t0, 0(a0)	# e0    :
+	subq	t12, 1, t6	# .. e1 :
+	zapnot	t1, t6, t1	# e0    : clear src bytes >= null
+	or	t12, t6, t8	# .. e1 :
+	zap	t0, t8, t0	# e0    : clear dst bytes <= null
+	or	t0, t1, t1	# e1    :
+
+1:	stq_u	t1, 0(a0)	# e0    :
+	ret	(t9)		# .. e1 :
+
+	.end stxcpy_aligned
+
+	.align 3
+	.ent __stxcpy
+	.globl __stxcpy
+__stxcpy:
+	.frame sp, 0, t9
+	.prologue 0
+
+	/* Are source and destination co-aligned?  */
+	xor	a0, a1, t0	# e0    :
+	unop			#       :
+	and	t0, 7, t0	# e0    :
+	bne	t0, $unaligned	# .. e1 :
+
+	/* We are co-aligned; take care of a partial first word.  */
+	ldq_u	t1, 0(a1)	# e0    : load first src word
+	and	a0, 7, t0	# .. e1 : take care not to load a word ...
+	addq	a1, 8, a1		# e0    :
+	beq	t0, stxcpy_aligned	# .. e1 : ... if we wont need it
+	ldq_u	t0, 0(a0)	# e0    :
+	br	stxcpy_aligned	# .. e1 :
+
+
+/* The source and destination are not co-aligned.  Align the destination
+   and cope.  We have to be very careful about not reading too much and
+   causing a SEGV.  */
+
+	.align 3
+$u_head:
+	/* We know just enough now to be able to assemble the first
+	   full source word.  We can still find a zero at the end of it
+	   that prevents us from outputting the whole thing.
+
+	   On entry to this basic block:
+	   t0 == the first dest word, for masking back in, if needed else 0
+	   t1 == the low bits of the first source word
+	   t6 == bytemask that is -1 in dest word bytes */
+
+	ldq_u	t2, 8(a1)	# e0    :
+	addq	a1, 8, a1	# .. e1 :
+
+	extql	t1, a1, t1	# e0    :
+	extqh	t2, a1, t4	# e0    :
+	mskql	t0, a0, t0	# e0    :
+	or	t1, t4, t1	# .. e1 :
+	mskqh	t1, a0, t1	# e0    :
+	or	t0, t1, t1	# e1    :
+
+	or	t1, t6, t6	# e0    :
+	cmpbge	zero, t6, t8	# .. e1 :
+	lda	t6, -1		# e0    : for masking just below
+	bne	t8, $u_final	# .. e1 :
+
+	mskql	t6, a1, t6		# e0    : mask out the bits we have
+	or	t6, t2, t2		# e1    :   already extracted before
+	cmpbge	zero, t2, t8		# e0    :   testing eos
+	bne	t8, $u_late_head_exit	# .. e1 (zdb)
+
+	/* Finally, we've got all the stupid leading edge cases taken care
+	   of and we can set up to enter the main loop.  */
+
+	stq_u	t1, 0(a0)	# e0    : store first output word
+	addq	a0, 8, a0	# .. e1 :
+	extql	t2, a1, t0	# e0    : position ho-bits of lo word
+	ldq_u	t2, 8(a1)	# .. e1 : read next high-order source word
+	addq	a1, 8, a1	# e0    :
+	cmpbge	zero, t2, t8	# .. e1 :
+	nop			# e0    :
+	bne	t8, $u_eos	# .. e1 :
+
+	/* Unaligned copy main loop.  In order to avoid reading too much,
+	   the loop is structured to detect zeros in aligned source words.
+	   This has, unfortunately, effectively pulled half of a loop
+	   iteration out into the head and half into the tail, but it does
+	   prevent nastiness from accumulating in the very thing we want
+	   to run as fast as possible.
+
+	   On entry to this basic block:
+	   t0 == the shifted high-order bits from the previous source word
+	   t2 == the unshifted current source word
+
+	   We further know that t2 does not contain a null terminator.  */
+
+	.align 3
+$u_loop:
+	extqh	t2, a1, t1	# e0    : extract high bits for current word
+	addq	a1, 8, a1	# .. e1 :
+	extql	t2, a1, t3	# e0    : extract low bits for next time
+	addq	a0, 8, a0	# .. e1 :
+	or	t0, t1, t1	# e0    : current dst word now complete
+	ldq_u	t2, 0(a1)	# .. e1 : load high word for next time
+	stq_u	t1, -8(a0)	# e0    : save the current word
+	mov	t3, t0		# .. e1 :
+	cmpbge	zero, t2, t8	# e0    : test new word for eos
+	beq	t8, $u_loop	# .. e1 :
+
+	/* We've found a zero somewhere in the source word we just read.
+	   If it resides in the lower half, we have one (probably partial)
+	   word to write out, and if it resides in the upper half, we
+	   have one full and one partial word left to write out.
+
+	   On entry to this basic block:
+	   t0 == the shifted high-order bits from the previous source word
+	   t2 == the unshifted current source word.  */
+$u_eos:
+	extqh	t2, a1, t1	# e0    :
+	or	t0, t1, t1	# e1    : first (partial) source word complete
+
+	cmpbge	zero, t1, t8	# e0    : is the null in this first bit?
+	bne	t8, $u_final	# .. e1 (zdb)
+
+$u_late_head_exit:
+	stq_u	t1, 0(a0)	# e0    : the null was in the high-order bits
+	addq	a0, 8, a0	# .. e1 :
+	extql	t2, a1, t1	# e0    :
+	cmpbge	zero, t1, t8	# .. e1 :
+
+	/* Take care of a final (probably partial) result word.
+	   On entry to this basic block:
+	   t1 == assembled source word
+	   t8 == cmpbge mask that found the null.  */
+$u_final:
+	negq	t8, t6		# e0    : isolate low bit set
+	and	t6, t8, t12	# e1    :
+
+	and	t12, 0x80, t6	# e0    : avoid dest word load if we can
+	bne	t6, 1f		# .. e1 (zdb)
+
+	ldq_u	t0, 0(a0)	# e0    :
+	subq	t12, 1, t6	# .. e1 :
+	or	t6, t12, t8	# e0    :
+	zapnot	t1, t6, t1	# .. e1 : kill source bytes >= null
+	zap	t0, t8, t0	# e0    : kill dest bytes <= null
+	or	t0, t1, t1	# e1    :
+
+1:	stq_u	t1, 0(a0)	# e0    :
+	ret	(t9)		# .. e1 :
+
+	/* Unaligned copy entry point.  */
+	.align 3
+$unaligned:
+
+	ldq_u	t1, 0(a1)	# e0    : load first source word
+
+	and	a0, 7, t4	# .. e1 : find dest misalignment
+	and	a1, 7, t5	# e0    : find src misalignment
+
+	/* Conditionally load the first destination word and a bytemask
+	   with 0xff indicating that the destination byte is sacrosanct.  */
+
+	mov	zero, t0	# .. e1 :
+	mov	zero, t6	# e0    :
+	beq	t4, 1f		# .. e1 :
+	ldq_u	t0, 0(a0)	# e0    :
+	lda	t6, -1		# .. e1 :
+	mskql	t6, a0, t6	# e0    :
+1:
+	subq	a1, t4, a1	# .. e1 : sub dest misalignment from src addr
+
+	/* If source misalignment is larger than dest misalignment, we need
+	   extra startup checks to avoid SEGV.  */
+
+	cmplt	t4, t5, t12	# e0    :
+	beq	t12, $u_head	# .. e1 (zdb)
+
+	lda	t2, -1		# e1    : mask out leading garbage in source
+	mskqh	t2, t5, t2	# e0    :
+	nop			# e0    :
+	ornot	t1, t2, t3	# .. e1 :
+	cmpbge	zero, t3, t8	# e0    : is there a zero?
+	beq	t8, $u_head	# .. e1 (zdb)
+
+	/* At this point we've found a zero in the first partial word of
+	   the source.  We need to isolate the valid source data and mask
+	   it into the original destination data.  (Incidentally, we know
+	   that we'll need at least one byte of that original dest word.) */
+
+	ldq_u	t0, 0(a0)	# e0    :
+
+	negq	t8, t6		# .. e1 : build bitmask of bytes <= zero
+	and	t6, t8, t12	# e0    :
+	and	a1, 7, t5	# .. e1 :
+	subq	t12, 1, t6	# e0    :
+	or	t6, t12, t8	# e1    :
+	srl	t12, t5, t12	# e0    : adjust final null return value
+
+	zapnot	t2, t8, t2	# .. e1 : prepare source word; mirror changes
+	and	t1, t2, t1	# e1    : to source validity mask
+	extql	t2, a1, t2	# .. e0 :
+	extql	t1, a1, t1	# e0    :
+
+	andnot	t0, t2, t0	# .. e1 : zero place for source to reside
+	or	t0, t1, t1	# e1    : and put it there
+	stq_u	t1, 0(a0)	# .. e0 :
+	ret	(t9)		# e1    :
+
+	.end __stxcpy
diff --git a/arch/alpha/lib/stxncpy.S b/arch/alpha/lib/stxncpy.S
new file mode 100644
index 0000000000..011d9091c6
--- /dev/null
+++ b/arch/alpha/lib/stxncpy.S
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/alpha/lib/stxncpy.S
+ * Contributed by Richard Henderson (rth@tamu.edu)
+ *
+ * Copy no more than COUNT bytes of the null-terminated string from
+ * SRC to DST.
+ *
+ * This is an internal routine used by strncpy, stpncpy, and strncat.
+ * As such, it uses special linkage conventions to make implementation
+ * of these public functions more efficient.
+ *
+ * On input:
+ *	t9 = return address
+ *	a0 = DST
+ *	a1 = SRC
+ *	a2 = COUNT
+ *
+ * Furthermore, COUNT may not be zero.
+ *
+ * On output:
+ *	t0  = last word written
+ *	t10 = bitmask (with one bit set) indicating the byte position of
+ *	      the end of the range specified by COUNT
+ *	t12 = bitmask (with one bit set) indicating the last byte written
+ *	a0  = unaligned address of the last *word* written
+ *	a2  = the number of full words left in COUNT
+ *
+ * Furthermore, v0, a3-a5, t11, and $at are untouched.
+ */
+
+#include <asm/regdef.h>
+
+	.set noat
+	.set noreorder
+
+	.text
+
+/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
+   doesn't like putting the entry point for a procedure somewhere in the
+   middle of the procedure descriptor.  Work around this by putting the
+   aligned copy in its own procedure descriptor */
+
+	.ent stxncpy_aligned
+	.align 3
+stxncpy_aligned:
+	.frame sp, 0, t9, 0
+	.prologue 0
+
+	/* On entry to this basic block:
+	   t0 == the first destination word for masking back in
+	   t1 == the first source word.  */
+
+	/* Create the 1st output word and detect 0's in the 1st input word.  */
+	lda	t2, -1		# e1    : build a mask against false zero
+	mskqh	t2, a1, t2	# e0    :   detection in the src word
+	mskqh	t1, a1, t3	# e0    :
+	ornot	t1, t2, t2	# .. e1 :
+	mskql	t0, a1, t0	# e0    : assemble the first output word
+	cmpbge	zero, t2, t8	# .. e1 : bits set iff null found
+	or	t0, t3, t0	# e0    :
+	beq	a2, $a_eoc	# .. e1 :
+	bne	t8, $a_eos	# .. e1 :
+
+	/* On entry to this basic block:
+	   t0 == a source word not containing a null.  */
+
+$a_loop:
+	stq_u	t0, 0(a0)	# e0    :
+	addq	a0, 8, a0	# .. e1 :
+	ldq_u	t0, 0(a1)	# e0    :
+	addq	a1, 8, a1	# .. e1 :
+	subq	a2, 1, a2	# e0    :
+	cmpbge	zero, t0, t8	# .. e1 (stall)
+	beq	a2, $a_eoc      # e1    :
+	beq	t8, $a_loop	# e1    :
+
+	/* Take care of the final (partial) word store.  At this point
+	   the end-of-count bit is set in t8 iff it applies.
+
+	   On entry to this basic block we have:
+	   t0 == the source word containing the null
+	   t8 == the cmpbge mask that found it.  */
+
+$a_eos:
+	negq	t8, t12		# e0    : find low bit set
+	and	t8, t12, t12	# e1 (stall)
+
+	/* For the sake of the cache, don't read a destination word
+	   if we're not going to need it.  */
+	and	t12, 0x80, t6	# e0    :
+	bne	t6, 1f		# .. e1 (zdb)
+
+	/* We're doing a partial word store and so need to combine
+	   our source and original destination words.  */
+	ldq_u	t1, 0(a0)	# e0    :
+	subq	t12, 1, t6	# .. e1 :
+	or	t12, t6, t8	# e0    :
+	unop			#
+	zapnot	t0, t8, t0	# e0    : clear src bytes > null
+	zap	t1, t8, t1	# .. e1 : clear dst bytes <= null
+	or	t0, t1, t0	# e1    :
+
+1:	stq_u	t0, 0(a0)	# e0    :
+	ret	(t9)		# e1    :
+
+	/* Add the end-of-count bit to the eos detection bitmask.  */
+$a_eoc:
+	or	t10, t8, t8
+	br	$a_eos
+
+	.end stxncpy_aligned
+
+	.align 3
+	.ent __stxncpy
+	.globl __stxncpy
+__stxncpy:
+	.frame sp, 0, t9, 0
+	.prologue 0
+
+	/* Are source and destination co-aligned?  */
+	xor	a0, a1, t1	# e0    :
+	and	a0, 7, t0	# .. e1 : find dest misalignment
+	and	t1, 7, t1	# e0    :
+	addq	a2, t0, a2	# .. e1 : bias count by dest misalignment
+	subq	a2, 1, a2	# e0    :
+	and	a2, 7, t2	# e1    :
+	srl	a2, 3, a2	# e0    : a2 = loop counter = (count - 1)/8
+	addq	zero, 1, t10	# .. e1 :
+	sll	t10, t2, t10	# e0    : t10 = bitmask of last count byte
+	bne	t1, $unaligned	# .. e1 :
+
+	/* We are co-aligned; take care of a partial first word.  */
+
+	ldq_u	t1, 0(a1)	# e0    : load first src word
+	addq	a1, 8, a1	# .. e1 :
+
+	beq	t0, stxncpy_aligned     # avoid loading dest word if not needed
+	ldq_u	t0, 0(a0)	# e0    :
+	br	stxncpy_aligned	# .. e1 :
+
+
+/* The source and destination are not co-aligned.  Align the destination
+   and cope.  We have to be very careful about not reading too much and
+   causing a SEGV.  */
+
+	.align 3
+$u_head:
+	/* We know just enough now to be able to assemble the first
+	   full source word.  We can still find a zero at the end of it
+	   that prevents us from outputting the whole thing.
+
+	   On entry to this basic block:
+	   t0 == the first dest word, unmasked
+	   t1 == the shifted low bits of the first source word
+	   t6 == bytemask that is -1 in dest word bytes */
+
+	ldq_u	t2, 8(a1)	# e0    : load second src word
+	addq	a1, 8, a1	# .. e1 :
+	mskql	t0, a0, t0	# e0    : mask trailing garbage in dst
+	extqh	t2, a1, t4	# e0    :
+	or	t1, t4, t1	# e1    : first aligned src word complete
+	mskqh	t1, a0, t1	# e0    : mask leading garbage in src
+	or	t0, t1, t0	# e0    : first output word complete
+	or	t0, t6, t6	# e1    : mask original data for zero test
+	cmpbge	zero, t6, t8	# e0    :
+	beq	a2, $u_eocfin	# .. e1 :
+	lda	t6, -1		# e0    :
+	bne	t8, $u_final	# .. e1 :
+
+	mskql	t6, a1, t6	# e0    : mask out bits already seen
+	nop			# .. e1 :
+	stq_u	t0, 0(a0)	# e0    : store first output word
+	or      t6, t2, t2	# .. e1 :
+	cmpbge	zero, t2, t8	# e0    : find nulls in second partial
+	addq	a0, 8, a0	# .. e1 :
+	subq	a2, 1, a2	# e0    :
+	bne	t8, $u_late_head_exit	# .. e1 :
+
+	/* Finally, we've got all the stupid leading edge cases taken care
+	   of and we can set up to enter the main loop.  */
+
+	extql	t2, a1, t1	# e0    : position hi-bits of lo word
+	beq	a2, $u_eoc	# .. e1 :
+	ldq_u	t2, 8(a1)	# e0    : read next high-order source word
+	addq	a1, 8, a1	# .. e1 :
+	extqh	t2, a1, t0	# e0    : position lo-bits of hi word (stall)
+	cmpbge	zero, t2, t8	# .. e1 :
+	nop			# e0    :
+	bne	t8, $u_eos	# .. e1 :
+
+	/* Unaligned copy main loop.  In order to avoid reading too much,
+	   the loop is structured to detect zeros in aligned source words.
+	   This has, unfortunately, effectively pulled half of a loop
+	   iteration out into the head and half into the tail, but it does
+	   prevent nastiness from accumulating in the very thing we want
+	   to run as fast as possible.
+
+	   On entry to this basic block:
+	   t0 == the shifted low-order bits from the current source word
+	   t1 == the shifted high-order bits from the previous source word
+	   t2 == the unshifted current source word
+
+	   We further know that t2 does not contain a null terminator.  */
+
+	.align 3
+$u_loop:
+	or	t0, t1, t0	# e0    : current dst word now complete
+	subq	a2, 1, a2	# .. e1 : decrement word count
+	stq_u	t0, 0(a0)	# e0    : save the current word
+	addq	a0, 8, a0	# .. e1 :
+	extql	t2, a1, t1	# e0    : extract high bits for next time
+	beq	a2, $u_eoc	# .. e1 :
+	ldq_u	t2, 8(a1)	# e0    : load high word for next time
+	addq	a1, 8, a1	# .. e1 :
+	nop			# e0    :
+	cmpbge	zero, t2, t8	# e1    : test new word for eos (stall)
+	extqh	t2, a1, t0	# e0    : extract low bits for current word
+	beq	t8, $u_loop	# .. e1 :
+
+	/* We've found a zero somewhere in the source word we just read.
+	   If it resides in the lower half, we have one (probably partial)
+	   word to write out, and if it resides in the upper half, we
+	   have one full and one partial word left to write out.
+
+	   On entry to this basic block:
+	   t0 == the shifted low-order bits from the current source word
+	   t1 == the shifted high-order bits from the previous source word
+	   t2 == the unshifted current source word.  */
+$u_eos:
+	or	t0, t1, t0	# e0    : first (partial) source word complete
+	nop			# .. e1 :
+	cmpbge	zero, t0, t8	# e0    : is the null in this first bit?
+	bne	t8, $u_final	# .. e1 (zdb)
+
+	stq_u	t0, 0(a0)	# e0    : the null was in the high-order bits
+	addq	a0, 8, a0	# .. e1 :
+	subq	a2, 1, a2	# e1    :
+
+$u_late_head_exit:
+	extql	t2, a1, t0	# .. e0 :
+	cmpbge	zero, t0, t8	# e0    :
+	or	t8, t10, t6	# e1    :
+	cmoveq	a2, t6, t8	# e0    :
+	nop			# .. e1 :
+
+	/* Take care of a final (probably partial) result word.
+	   On entry to this basic block:
+	   t0 == assembled source word
+	   t8 == cmpbge mask that found the null.  */
+$u_final:
+	negq	t8, t6		# e0    : isolate low bit set
+	and	t6, t8, t12	# e1    :
+
+	and	t12, 0x80, t6	# e0    : avoid dest word load if we can
+	bne	t6, 1f		# .. e1 (zdb)
+
+	ldq_u	t1, 0(a0)	# e0    :
+	subq	t12, 1, t6	# .. e1 :
+	or	t6, t12, t8	# e0    :
+	zapnot	t0, t8, t0	# .. e1 : kill source bytes > null
+	zap	t1, t8, t1	# e0    : kill dest bytes <= null
+	or	t0, t1, t0	# e1    :
+
+1:	stq_u	t0, 0(a0)	# e0    :
+	ret	(t9)		# .. e1 :
+
+	/* Got to end-of-count before end of string.  
+	   On entry to this basic block:
+	   t1 == the shifted high-order bits from the previous source word  */
+$u_eoc:
+	and	a1, 7, t6	# e1    :
+	sll	t10, t6, t6	# e0    :
+	and	t6, 0xff, t6	# e0    :
+	bne	t6, 1f		# .. e1 :
+
+	ldq_u	t2, 8(a1)	# e0    : load final src word
+	nop			# .. e1 :
+	extqh	t2, a1, t0	# e0    : extract low bits for last word
+	or	t1, t0, t1	# e1    :
+
+1:	cmpbge	zero, t1, t8
+	mov	t1, t0
+
+$u_eocfin:			# end-of-count, final word
+	or	t10, t8, t8
+	br	$u_final
+
+	/* Unaligned copy entry point.  */
+	.align 3
+$unaligned:
+
+	ldq_u	t1, 0(a1)	# e0    : load first source word
+
+	and	a0, 7, t4	# .. e1 : find dest misalignment
+	and	a1, 7, t5	# e0    : find src misalignment
+
+	/* Conditionally load the first destination word and a bytemask
+	   with 0xff indicating that the destination byte is sacrosanct.  */
+
+	mov	zero, t0	# .. e1 :
+	mov	zero, t6	# e0    :
+	beq	t4, 1f		# .. e1 :
+	ldq_u	t0, 0(a0)	# e0    :
+	lda	t6, -1		# .. e1 :
+	mskql	t6, a0, t6	# e0    :
+	subq	a1, t4, a1	# .. e1 : sub dest misalignment from src addr
+
+	/* If source misalignment is larger than dest misalignment, we need
+	   extra startup checks to avoid SEGV.  */
+
+1:	cmplt	t4, t5, t12	# e1    :
+	extql	t1, a1, t1	# .. e0 : shift src into place
+	lda	t2, -1		# e0    : for creating masks later
+	beq	t12, $u_head	# .. e1 :
+
+	extql	t2, a1, t2	# e0    :
+	cmpbge	zero, t1, t8	# .. e1 : is there a zero?
+	andnot	t2, t6, t2	# e0    : dest mask for a single word copy
+	or	t8, t10, t5	# .. e1 : test for end-of-count too
+	cmpbge	zero, t2, t3	# e0    :
+	cmoveq	a2, t5, t8	# .. e1 :
+	andnot	t8, t3, t8	# e0    :
+	beq	t8, $u_head	# .. e1 (zdb)
+
+	/* At this point we've found a zero in the first partial word of
+	   the source.  We need to isolate the valid source data and mask
+	   it into the original destination data.  (Incidentally, we know
+	   that we'll need at least one byte of that original dest word.) */
+
+	ldq_u	t0, 0(a0)	# e0    :
+	negq	t8, t6		# .. e1 : build bitmask of bytes <= zero
+	mskqh	t1, t4, t1	# e0    :
+	and	t6, t8, t12	# .. e1 :
+	subq	t12, 1, t6	# e0    :
+	or	t6, t12, t8	# e1    :
+
+	zapnot	t2, t8, t2	# e0    : prepare source word; mirror changes
+	zapnot	t1, t8, t1	# .. e1 : to source validity mask
+
+	andnot	t0, t2, t0	# e0    : zero place for source to reside
+	or	t0, t1, t0	# e1    : and put it there
+	stq_u	t0, 0(a0)	# e0    :
+	ret	(t9)		# .. e1 :
+
+	.end __stxncpy
diff --git a/arch/alpha/lib/udelay.c b/arch/alpha/lib/udelay.c
new file mode 100644
index 0000000000..8736482634
--- /dev/null
+++ b/arch/alpha/lib/udelay.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1993, 2000 Linus Torvalds
+ *
+ * Delay routines, using a pre-computed "loops_per_jiffy" value.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h> /* for udelay's use of smp_processor_id */
+#include <asm/param.h>
+#include <asm/smp.h>
+#include <linux/delay.h>
+
+/*
+ * Use only for very small delays (< 1 msec). 
+ *
+ * The active part of our cycle counter is only 32-bits wide, and
+ * we're treating the difference between two marks as signed.  On
+ * a 1GHz box, that's about 2 seconds.
+ */
+
+void
+__delay(int loops)
+{
+	int tmp;
+	__asm__ __volatile__(
+		"	rpcc %0\n"
+		"	addl %1,%0,%1\n"
+		"1:	rpcc %0\n"
+		"	subl %1,%0,%0\n"
+		"	bgt %0,1b"
+		: "=&r" (tmp), "=r" (loops) : "1"(loops));
+}
+EXPORT_SYMBOL(__delay);
+
+#ifdef CONFIG_SMP
+#define LPJ	 cpu_data[smp_processor_id()].loops_per_jiffy
+#else
+#define LPJ	 loops_per_jiffy
+#endif
+
+void
+udelay(unsigned long usecs)
+{
+	usecs *= (((unsigned long)HZ << 32) / 1000000) * LPJ;
+	__delay((long)usecs >> 32);
+}
+EXPORT_SYMBOL(udelay);
+
+void
+ndelay(unsigned long nsecs)
+{
+	nsecs *= (((unsigned long)HZ << 32) / 1000000000) * LPJ;
+	__delay((long)nsecs >> 32);
+}
+EXPORT_SYMBOL(ndelay);
diff --git a/arch/alpha/lib/udiv-qrnnd.S b/arch/alpha/lib/udiv-qrnnd.S
new file mode 100644
index 0000000000..b887aa5428
--- /dev/null
+++ b/arch/alpha/lib/udiv-qrnnd.S
@@ -0,0 +1,165 @@
+ # Alpha 21064 __udiv_qrnnd
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of GCC.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # In addition to the permissions in the GNU General Public License, the
+ # Free Software Foundation gives you unlimited permission to link the
+ # compiled version of this file with other programs, and to distribute
+ # those programs without any restriction coming from the use of this
+ # file.  (The General Public License restrictions do apply in other
+ # respects; for example, they cover modification of the file, and
+ # distribution when not linked into another program.)
+
+ # This file is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU General Public License
+ # along with GCC; see the file COPYING.  If not, write to the 
+ # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+#include <asm/export.h>
+
+        .set noreorder
+        .set noat
+
+	.text
+
+	.globl __udiv_qrnnd
+	.ent __udiv_qrnnd
+__udiv_qrnnd:
+	.frame $30,0,$26,0
+	.prologue 0
+
+#define cnt	$2
+#define tmp	$3
+#define rem_ptr	$16
+#define n1	$17
+#define n0	$18
+#define d	$19
+#define qb	$20
+#define AT	$at
+
+	ldiq	cnt,16
+	blt	d,$largedivisor
+
+$loop1:	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,$loop1
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+$largedivisor:
+	and	n0,1,$4
+
+	srl	n0,1,n0
+	sll	n1,63,tmp
+	or	tmp,n0,n0
+	srl	n1,1,n1
+
+	and	d,1,$6
+	srl	d,1,$5
+	addq	$5,$6,$5
+
+$loop2:	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,$loop2
+
+	addq	n1,n1,n1
+	addq	$4,n1,n1
+	bne	$6,$Odd
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+$Odd:
+	/* q' in n0. r' in n1 */
+	addq	n1,n0,n1
+
+	cmpult	n1,n0,tmp	# tmp := carry from addq
+	subq	n1,d,AT
+	addq	n0,tmp,n0
+	cmovne	tmp,AT,n1
+
+	cmpult	n1,d,tmp
+	addq	n0,1,AT
+	cmoveq	tmp,AT,n0
+	subq	n1,d,AT
+	cmoveq	tmp,AT,n1
+
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+	.end	__udiv_qrnnd
+EXPORT_SYMBOL(__udiv_qrnnd)
diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile
new file mode 100644
index 0000000000..3206402a87
--- /dev/null
+++ b/arch/alpha/math-emu/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the FPU instruction emulation.
+#
+
+ccflags-y := -w
+
+obj-$(CONFIG_MATHEMU) += math-emu.o
+
+math-emu-objs := math.o
diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
new file mode 100644
index 0000000000..4212258f3c
--- /dev/null
+++ b/arch/alpha/math-emu/math.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+#include <linux/uaccess.h>
+
+#include "sfp-util.h"
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+#include <math-emu/double.h>
+
+#define	OPC_PAL		0x00
+#define OPC_INTA	0x10
+#define OPC_INTL	0x11
+#define OPC_INTS	0x12
+#define OPC_INTM	0x13
+#define OPC_FLTC	0x14
+#define OPC_FLTV	0x15
+#define OPC_FLTI	0x16
+#define OPC_FLTL	0x17
+#define OPC_MISC	0x18
+#define	OPC_JSR		0x1a
+
+#define FOP_SRC_S	0
+#define FOP_SRC_T	2
+#define FOP_SRC_Q	3
+
+#define FOP_FNC_ADDx	0
+#define FOP_FNC_CVTQL	0
+#define FOP_FNC_SUBx	1
+#define FOP_FNC_MULx	2
+#define FOP_FNC_DIVx	3
+#define FOP_FNC_CMPxUN	4
+#define FOP_FNC_CMPxEQ	5
+#define FOP_FNC_CMPxLT	6
+#define FOP_FNC_CMPxLE	7
+#define FOP_FNC_SQRTx	11
+#define FOP_FNC_CVTxS	12
+#define FOP_FNC_CVTxT	14
+#define FOP_FNC_CVTxQ	15
+
+#define MISC_TRAPB	0x0000
+#define MISC_EXCB	0x0400
+
+extern unsigned long alpha_read_fp_reg (unsigned long reg);
+extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
+extern unsigned long alpha_read_fp_reg_s (unsigned long reg);
+extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);
+
+
+#ifdef MODULE
+
+MODULE_DESCRIPTION("FP Software completion module");
+MODULE_LICENSE("GPL v2");
+
+extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long);
+extern long (*alpha_fp_emul) (unsigned long pc);
+
+static long (*save_emul_imprecise)(struct pt_regs *, unsigned long);
+static long (*save_emul) (unsigned long pc);
+
+long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
+long do_alpha_fp_emul(unsigned long);
+
+static int alpha_fp_emul_init_module(void)
+{
+	save_emul_imprecise = alpha_fp_emul_imprecise;
+	save_emul = alpha_fp_emul;
+	alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise;
+	alpha_fp_emul = do_alpha_fp_emul;
+	return 0;
+}
+module_init(alpha_fp_emul_init_module);
+
+static void alpha_fp_emul_cleanup_module(void)
+{
+	alpha_fp_emul_imprecise = save_emul_imprecise;
+	alpha_fp_emul = save_emul;
+}
+module_exit(alpha_fp_emul_cleanup_module);
+
+#undef  alpha_fp_emul_imprecise
+#define alpha_fp_emul_imprecise		do_alpha_fp_emul_imprecise
+#undef  alpha_fp_emul
+#define alpha_fp_emul			do_alpha_fp_emul
+
+#endif /* MODULE */
+
+
+/*
+ * Emulate the floating point instruction at address PC.  Returns -1 if the
+ * instruction to be emulated is illegal (such as with the opDEC trap), else
+ * the SI_CODE for a SIGFPE signal, else 0 if everything's ok.
+ *
+ * Notice that the kernel does not and cannot use FP regs.  This is good
+ * because it means that instead of saving/restoring all fp regs, we simply
+ * stick the result of the operation into the appropriate register.
+ */
+long
+alpha_fp_emul (unsigned long pc)
+{
+	FP_DECL_EX;
+	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
+
+	unsigned long fa, fb, fc, func, mode, src;
+	unsigned long res, va, vb, vc, swcr, fpcr;
+	__u32 insn;
+	long si_code;
+
+	get_user(insn, (__u32 __user *)pc);
+	fc     = (insn >>  0) & 0x1f;	/* destination register */
+	fb     = (insn >> 16) & 0x1f;
+	fa     = (insn >> 21) & 0x1f;
+	func   = (insn >>  5) & 0xf;
+	src    = (insn >>  9) & 0x3;
+	mode   = (insn >> 11) & 0x3;
+	
+	fpcr = rdfpcr();
+	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+
+	if (mode == 3) {
+		/* Dynamic -- get rounding mode from fpcr.  */
+		mode = (fpcr >> FPCR_DYN_SHIFT) & 3;
+	}
+
+	switch (src) {
+	case FOP_SRC_S:
+		va = alpha_read_fp_reg_s(fa);
+		vb = alpha_read_fp_reg_s(fb);
+		
+		FP_UNPACK_SP(SA, &va);
+		FP_UNPACK_SP(SB, &vb);
+
+		switch (func) {
+		case FOP_FNC_SUBx:
+			FP_SUB_S(SR, SA, SB);
+			goto pack_s;
+
+		case FOP_FNC_ADDx:
+			FP_ADD_S(SR, SA, SB);
+			goto pack_s;
+
+		case FOP_FNC_MULx:
+			FP_MUL_S(SR, SA, SB);
+			goto pack_s;
+
+		case FOP_FNC_DIVx:
+			FP_DIV_S(SR, SA, SB);
+			goto pack_s;
+
+		case FOP_FNC_SQRTx:
+			FP_SQRT_S(SR, SB);
+			goto pack_s;
+		}
+		goto bad_insn;
+
+	case FOP_SRC_T:
+		va = alpha_read_fp_reg(fa);
+		vb = alpha_read_fp_reg(fb);
+
+		if ((func & ~3) == FOP_FNC_CMPxUN) {
+			FP_UNPACK_RAW_DP(DA, &va);
+			FP_UNPACK_RAW_DP(DB, &vb);
+			if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
+				FP_SET_EXCEPTION(FP_EX_DENORM);
+				if (FP_DENORM_ZERO)
+					_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
+			}
+			if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
+				FP_SET_EXCEPTION(FP_EX_DENORM);
+				if (FP_DENORM_ZERO)
+					_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
+			}
+			FP_CMP_D(res, DA, DB, 3);
+			vc = 0x4000000000000000UL;
+			/* CMPTEQ, CMPTUN don't trap on QNaN,
+			   while CMPTLT and CMPTLE do */
+			if (res == 3
+			    && ((func & 3) >= 2
+				|| FP_ISSIGNAN_D(DA)
+				|| FP_ISSIGNAN_D(DB))) {
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			}
+			switch (func) {
+			case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break;
+			case FOP_FNC_CMPxEQ: if (res) vc = 0; break;
+			case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break;
+			case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break;
+			}
+			goto done_d;
+		}
+
+		FP_UNPACK_DP(DA, &va);
+		FP_UNPACK_DP(DB, &vb);
+
+		switch (func) {
+		case FOP_FNC_SUBx:
+			FP_SUB_D(DR, DA, DB);
+			goto pack_d;
+
+		case FOP_FNC_ADDx:
+			FP_ADD_D(DR, DA, DB);
+			goto pack_d;
+
+		case FOP_FNC_MULx:
+			FP_MUL_D(DR, DA, DB);
+			goto pack_d;
+
+		case FOP_FNC_DIVx:
+			FP_DIV_D(DR, DA, DB);
+			goto pack_d;
+
+		case FOP_FNC_SQRTx:
+			FP_SQRT_D(DR, DB);
+			goto pack_d;
+
+		case FOP_FNC_CVTxS:
+			/* It is irritating that DEC encoded CVTST with
+			   SRC == T_floating.  It is also interesting that
+			   the bit used to tell the two apart is /U... */
+			if (insn & 0x2000) {
+				FP_CONV(S,D,1,1,SR,DB);
+				goto pack_s;
+			} else {
+				vb = alpha_read_fp_reg_s(fb);
+				FP_UNPACK_SP(SB, &vb);
+				DR_c = DB_c;
+				DR_s = DB_s;
+				DR_e = DB_e + (1024 - 128);
+				DR_f = SB_f << (52 - 23);
+				goto pack_d;
+			}
+
+		case FOP_FNC_CVTxQ:
+			if (DB_c == FP_CLS_NAN
+			    && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) {
+			  /* AAHB Table B-2 says QNaN should not trigger INV */
+				vc = 0;
+			} else
+				FP_TO_INT_ROUND_D(vc, DB, 64, 2);
+			goto done_d;
+		}
+		goto bad_insn;
+
+	case FOP_SRC_Q:
+		vb = alpha_read_fp_reg(fb);
+
+		switch (func) {
+		case FOP_FNC_CVTQL:
+			/* Notice: We can get here only due to an integer
+			   overflow.  Such overflows are reported as invalid
+			   ops.  We return the result the hw would have
+			   computed.  */
+			vc = ((vb & 0xc0000000) << 32 |	/* sign and msb */
+			      (vb & 0x3fffffff) << 29);	/* rest of the int */
+			FP_SET_EXCEPTION (FP_EX_INVALID);
+			goto done_d;
+
+		case FOP_FNC_CVTxS:
+			FP_FROM_INT_S(SR, ((long)vb), 64, long);
+			goto pack_s;
+
+		case FOP_FNC_CVTxT:
+			FP_FROM_INT_D(DR, ((long)vb), 64, long);
+			goto pack_d;
+		}
+		goto bad_insn;
+	}
+	goto bad_insn;
+
+pack_s:
+	FP_PACK_SP(&vc, SR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+		vc = 0;
+	alpha_write_fp_reg_s(fc, vc);
+	goto done;
+
+pack_d:
+	FP_PACK_DP(&vc, DR);
+	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+		vc = 0;
+done_d:
+	alpha_write_fp_reg(fc, vc);
+	goto done;
+
+	/*
+	 * Take the appropriate action for each possible
+	 * floating-point result:
+	 *
+	 *	- Set the appropriate bits in the FPCR
+	 *	- If the specified exception is enabled in the FPCR,
+	 *	  return.  The caller (entArith) will dispatch
+	 *	  the appropriate signal to the translated program.
+	 *
+	 * In addition, properly track the exception state in software
+	 * as described in the Alpha Architecture Handbook section 4.7.7.3.
+	 */
+done:
+	if (_fex) {
+		/* Record exceptions in software control word.  */
+		swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
+		current_thread_info()->ieee_state
+		  |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
+
+		/* Update hardware control register.  */
+		fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
+		fpcr |= ieee_swcr_to_fpcr(swcr);
+		wrfpcr(fpcr);
+
+		/* Do we generate a signal?  */
+		_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
+		si_code = 0;
+		if (_fex) {
+			if (_fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES;
+			if (_fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND;
+			if (_fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF;
+			if (_fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV;
+			if (_fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV;
+		}
+
+		return si_code;
+	}
+
+	/* We used to write the destination register here, but DEC FORTRAN
+	   requires that the result *always* be written... so we do the write
+	   immediately after the operations above.  */
+
+	return 0;
+
+bad_insn:
+	printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n",
+	       insn, pc);
+	return -1;
+}
+
+long
+alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask)
+{
+	unsigned long trigger_pc = regs->pc - 4;
+	unsigned long insn, opcode, rc, si_code = 0;
+
+	/*
+	 * Turn off the bits corresponding to registers that are the
+	 * target of instructions that set bits in the exception
+	 * summary register.  We have some slack doing this because a
+	 * register that is the target of a trapping instruction can
+	 * be written at most once in the trap shadow.
+	 *
+	 * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all
+	 * bound the trap shadow, so we need not look any further than
+	 * up to the first occurrence of such an instruction.
+	 */
+	while (write_mask) {
+		get_user(insn, (__u32 __user *)(trigger_pc));
+		opcode = insn >> 26;
+		rc = insn & 0x1f;
+
+		switch (opcode) {
+		      case OPC_PAL:
+		      case OPC_JSR:
+		      case 0x30 ... 0x3f:	/* branches */
+			goto egress;
+
+		      case OPC_MISC:
+			switch (insn & 0xffff) {
+			      case MISC_TRAPB:
+			      case MISC_EXCB:
+				goto egress;
+
+			      default:
+				break;
+			}
+			break;
+
+		      case OPC_INTA:
+		      case OPC_INTL:
+		      case OPC_INTS:
+		      case OPC_INTM:
+			write_mask &= ~(1UL << rc);
+			break;
+
+		      case OPC_FLTC:
+		      case OPC_FLTV:
+		      case OPC_FLTI:
+		      case OPC_FLTL:
+			write_mask &= ~(1UL << (rc + 32));
+			break;
+		}
+		if (!write_mask) {
+			/* Re-execute insns in the trap-shadow.  */
+			regs->pc = trigger_pc + 4;
+			si_code = alpha_fp_emul(trigger_pc);
+			goto egress;
+		}
+		trigger_pc -= 4;
+	}
+
+egress:
+	return si_code;
+}
diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/math-emu/qrnnd.S
new file mode 100644
index 0000000000..d6373ec1bf
--- /dev/null
+++ b/arch/alpha/math-emu/qrnnd.S
@@ -0,0 +1,163 @@
+ # Alpha 21064 __udiv_qrnnd
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of GCC.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # In addition to the permissions in the GNU General Public License, the
+ # Free Software Foundation gives you unlimited permission to link the
+ # compiled version of this file with other programs, and to distribute
+ # those programs without any restriction coming from the use of this
+ # file.  (The General Public License restrictions do apply in other
+ # respects; for example, they cover modification of the file, and
+ # distribution when not linked into another program.)
+
+ # This file is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU General Public License
+ # along with GCC; see the file COPYING.  If not, write to the 
+ # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+        .set noreorder
+        .set noat
+
+	.text
+
+	.globl __udiv_qrnnd
+	.ent __udiv_qrnnd
+__udiv_qrnnd:
+	.frame $30,0,$26,0
+	.prologue 0
+
+#define cnt	$2
+#define tmp	$3
+#define rem_ptr	$16
+#define n1	$17
+#define n0	$18
+#define d	$19
+#define qb	$20
+#define AT	$at
+
+	ldiq	cnt,16
+	blt	d,$largedivisor
+
+$loop1:	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,$loop1
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+$largedivisor:
+	and	n0,1,$4
+
+	srl	n0,1,n0
+	sll	n1,63,tmp
+	or	tmp,n0,n0
+	srl	n1,1,n1
+
+	and	d,1,$6
+	srl	d,1,$5
+	addq	$5,$6,$5
+
+$loop2:	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,$loop2
+
+	addq	n1,n1,n1
+	addq	$4,n1,n1
+	bne	$6,$Odd
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+$Odd:
+	/* q' in n0. r' in n1 */
+	addq	n1,n0,n1
+
+	cmpult	n1,n0,tmp	# tmp := carry from addq
+	subq	n1,d,AT
+	addq	n0,tmp,n0
+	cmovne	tmp,AT,n1
+
+	cmpult	n1,d,tmp
+	addq	n0,1,AT
+	cmoveq	tmp,AT,n0
+	subq	n1,d,AT
+	cmoveq	tmp,AT,n1
+
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+	.end	__udiv_qrnnd
diff --git a/arch/alpha/math-emu/sfp-util.h b/arch/alpha/math-emu/sfp-util.h
new file mode 100644
index 0000000000..ae30f34175
--- /dev/null
+++ b/arch/alpha/math-emu/sfp-util.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/fpu.h>
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  ((sl) = (al) + (bl), (sh) = (ah) + (bh) + ((sl) < (al)))
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  ((sl) = (al) - (bl), (sh) = (ah) - (bh) - ((al) < (bl)))
+
+#define umul_ppmm(wh, wl, u, v)			\
+  __asm__ ("mulq %2,%3,%1; umulh %2,%3,%0"	\
+	   : "=r" ((UDItype)(wh)),		\
+	     "=&r" ((UDItype)(wl))		\
+	   : "r" ((UDItype)(u)),		\
+	     "r" ((UDItype)(v)))
+
+#define udiv_qrnnd(q, r, n1, n0, d)				\
+  do { unsigned long __r;					\
+    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));			\
+    (r) = __r;							\
+  } while (0)
+extern unsigned long __udiv_qrnnd (unsigned long *, unsigned long,
+				   unsigned long , unsigned long);
+
+#define UDIV_NEEDS_NORMALIZATION 1  
+
+#define abort()			goto bad_insn
+
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN -1
+#endif
+#define __BYTE_ORDER __LITTLE_ENDIAN
diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile
new file mode 100644
index 0000000000..bd770302eb
--- /dev/null
+++ b/arch/alpha/mm/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the linux alpha-specific parts of the memory manager.
+#
+
+ccflags-y := -Werror
+
+obj-y	:= init.o fault.o
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
new file mode 100644
index 0000000000..ec20c1004a
--- /dev/null
+++ b/arch/alpha/mm/fault.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+
+#define __EXTERN_INLINE inline
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#undef  __EXTERN_INLINE
+
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/extable.h>
+#include <linux/uaccess.h>
+#include <linux/perf_event.h>
+
+extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
+
+
+/*
+ * Force a new ASN for a task.
+ */
+
+#ifndef CONFIG_SMP
+unsigned long last_asn = ASN_FIRST_VERSION;
+#endif
+
+void
+__load_new_mm_context(struct mm_struct *next_mm)
+{
+	unsigned long mmc;
+	struct pcb_struct *pcb;
+
+	mmc = __get_new_mm_context(next_mm, smp_processor_id());
+	next_mm->context[smp_processor_id()] = mmc;
+
+	pcb = &current_thread_info()->pcb;
+	pcb->asn = mmc & HARDWARE_ASN_MASK;
+	pcb->ptbr = ((unsigned long) next_mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
+
+	__reload_thread(pcb);
+}
+
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to handle_mm_fault().
+ *
+ * mmcsr:
+ *	0 = translation not valid
+ *	1 = access violation
+ *	2 = fault-on-read
+ *	3 = fault-on-execute
+ *	4 = fault-on-write
+ *
+ * cause:
+ *	-1 = instruction fetch
+ *	0 = load
+ *	1 = store
+ *
+ * Registers $9 through $15 are saved in a block just prior to `regs' and
+ * are saved and restored around the call to allow exception code to
+ * modify them.
+ */
+
+/* Macro for exception fixup code to access integer registers.  */
+#define dpf_reg(r)							\
+	(((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 :	\
+				 (r) <= 18 ? (r)+10 : (r)-10])
+
+asmlinkage void
+do_page_fault(unsigned long address, unsigned long mmcsr,
+	      long cause, struct pt_regs *regs)
+{
+	struct vm_area_struct * vma;
+	struct mm_struct *mm = current->mm;
+	const struct exception_table_entry *fixup;
+	int si_code = SEGV_MAPERR;
+	vm_fault_t fault;
+	unsigned int flags = FAULT_FLAG_DEFAULT;
+
+	/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
+	   (or is suppressed by the PALcode).  Support that for older CPUs
+	   by ignoring such an instruction.  */
+	if (cause == 0) {
+		unsigned int insn;
+		__get_user(insn, (unsigned int __user *)regs->pc);
+		if ((insn >> 21 & 0x1f) == 0x1f &&
+		    /* ldq ldl ldt lds ldg ldf ldwu ldbu */
+		    (1ul << (insn >> 26) & 0x30f00001400ul)) {
+			regs->pc += 4;
+			return;
+		}
+	}
+
+	/* If we're in an interrupt context, or have no user context,
+	   we must not take the fault.  */
+	if (!mm || faulthandler_disabled())
+		goto no_context;
+
+#ifdef CONFIG_ALPHA_LARGE_VMALLOC
+	if (address >= TASK_SIZE)
+		goto vmalloc_fault;
+#endif
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+retry:
+	mmap_read_lock(mm);
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+	/* Ok, we have a good vm_area for this memory access, so
+	   we can handle it.  */
+ good_area:
+	si_code = SEGV_ACCERR;
+	if (cause < 0) {
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+	} else if (!cause) {
+		/* Allow reads even for write-only mappings */
+		if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
+			goto bad_area;
+	} else {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
+	}
+
+	/* If for any reason at all we couldn't handle the fault,
+	   make sure we exit gracefully rather than endlessly redo
+	   the fault.  */
+	fault = handle_mm_fault(vma, address, flags, regs);
+
+	if (fault_signal_pending(fault, regs))
+		return;
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+
+	if (fault & VM_FAULT_RETRY) {
+		flags |= FAULT_FLAG_TRIED;
+
+		/* No need to mmap_read_unlock(mm) as we would
+		 * have already released it in __lock_page_or_retry
+		 * in mm/filemap.c.
+		 */
+
+		goto retry;
+	}
+
+	mmap_read_unlock(mm);
+
+	return;
+
+	/* Something tried to access memory that isn't in our memory map.
+	   Fix it, but check if it's kernel or user first.  */
+ bad_area:
+	mmap_read_unlock(mm);
+
+	if (user_mode(regs))
+		goto do_sigsegv;
+
+ no_context:
+	/* Are we prepared to handle this fault as an exception?  */
+	if ((fixup = search_exception_tables(regs->pc)) != 0) {
+		unsigned long newpc;
+		newpc = fixup_exception(dpf_reg, fixup, regs->pc);
+		regs->pc = newpc;
+		return;
+	}
+
+	/* Oops. The kernel tried to access some bad page. We'll have to
+	   terminate things with extreme prejudice.  */
+	printk(KERN_ALERT "Unable to handle kernel paging request at "
+	       "virtual address %016lx\n", address);
+	die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16);
+	make_task_dead(SIGKILL);
+
+	/* We ran out of memory, or some other thing happened to us that
+	   made us unable to handle the page fault gracefully.  */
+ out_of_memory:
+	mmap_read_unlock(mm);
+	if (!user_mode(regs))
+		goto no_context;
+	pagefault_out_of_memory();
+	return;
+
+ do_sigbus:
+	mmap_read_unlock(mm);
+	/* Send a sigbus, regardless of whether we were in kernel
+	   or user mode.  */
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address);
+	if (!user_mode(regs))
+		goto no_context;
+	return;
+
+ do_sigsegv:
+	force_sig_fault(SIGSEGV, si_code, (void __user *) address);
+	return;
+
+#ifdef CONFIG_ALPHA_LARGE_VMALLOC
+ vmalloc_fault:
+	if (user_mode(regs))
+		goto do_sigsegv;
+	else {
+		/* Synchronize this task's top level page-table
+		   with the "reference" page table from init.  */
+		long index = pgd_index(address);
+		pgd_t *pgd, *pgd_k;
+
+		pgd = current->active_mm->pgd + index;
+		pgd_k = swapper_pg_dir + index;
+		if (!pgd_present(*pgd) && pgd_present(*pgd_k)) {
+			pgd_val(*pgd) = pgd_val(*pgd_k);
+			return;
+		}
+		goto no_context;
+	}
+#endif
+}
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
new file mode 100644
index 0000000000..f6114d0335
--- /dev/null
+++ b/arch/alpha/mm/init.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/mm/init.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/* 2.3.x zone allocator, 1999 Andrea Arcangeli <andrea@suse.de> */
+
+#include <linux/pagemap.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/memblock.h> /* max_low_pfn */
+#include <linux/vmalloc.h>
+#include <linux/gfp.h>
+
+#include <linux/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/hwrpb.h>
+#include <asm/dma.h>
+#include <asm/mmu_context.h>
+#include <asm/console.h>
+#include <asm/tlb.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+
+extern void die_if_kernel(char *,struct pt_regs *,long);
+
+static struct pcb_struct original_pcb;
+
+pgd_t *
+pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *ret, *init;
+
+	ret = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	init = pgd_offset(&init_mm, 0UL);
+	if (ret) {
+#ifdef CONFIG_ALPHA_LARGE_VMALLOC
+		memcpy (ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
+			(PTRS_PER_PGD - USER_PTRS_PER_PGD - 1)*sizeof(pgd_t));
+#else
+		pgd_val(ret[PTRS_PER_PGD-2]) = pgd_val(init[PTRS_PER_PGD-2]);
+#endif
+
+		/* The last PGD entry is the VPTB self-map.  */
+		pgd_val(ret[PTRS_PER_PGD-1])
+		  = pte_val(mk_pte(virt_to_page(ret), PAGE_KERNEL));
+	}
+	return ret;
+}
+
+
+/*
+ * BAD_PAGE is the page that is used for page faults when linux
+ * is out-of-memory. Older versions of linux just did a
+ * do_exit(), but using this instead means there is less risk
+ * for a process dying in kernel mode, possibly leaving an inode
+ * unused etc..
+ *
+ * BAD_PAGETABLE is the accompanying page-table: it is initialized
+ * to point to BAD_PAGE entries.
+ *
+ * ZERO_PAGE is a special page that is used for zero-initialized
+ * data and COW.
+ */
+pmd_t *
+__bad_pagetable(void)
+{
+	memset((void *) EMPTY_PGT, 0, PAGE_SIZE);
+	return (pmd_t *) EMPTY_PGT;
+}
+
+pte_t
+__bad_page(void)
+{
+	memset((void *) EMPTY_PGE, 0, PAGE_SIZE);
+	return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED));
+}
+
+static inline unsigned long
+load_PCB(struct pcb_struct *pcb)
+{
+	register unsigned long sp __asm__("$30");
+	pcb->ksp = sp;
+	return __reload_thread(pcb);
+}
+
+/* Set up initial PCB, VPTB, and other such nicities.  */
+
+static inline void
+switch_to_system_map(void)
+{
+	unsigned long newptbr;
+	unsigned long original_pcb_ptr;
+
+	/* Initialize the kernel's page tables.  Linux puts the vptb in
+	   the last slot of the L1 page table.  */
+	memset(swapper_pg_dir, 0, PAGE_SIZE);
+	newptbr = ((unsigned long) swapper_pg_dir - PAGE_OFFSET) >> PAGE_SHIFT;
+	pgd_val(swapper_pg_dir[1023]) =
+		(newptbr << 32) | pgprot_val(PAGE_KERNEL);
+
+	/* Set the vptb.  This is often done by the bootloader, but 
+	   shouldn't be required.  */
+	if (hwrpb->vptb != 0xfffffffe00000000UL) {
+		wrvptptr(0xfffffffe00000000UL);
+		hwrpb->vptb = 0xfffffffe00000000UL;
+		hwrpb_update_checksum(hwrpb);
+	}
+
+	/* Also set up the real kernel PCB while we're at it.  */
+	init_thread_info.pcb.ptbr = newptbr;
+	init_thread_info.pcb.flags = 1;	/* set FEN, clear everything else */
+	original_pcb_ptr = load_PCB(&init_thread_info.pcb);
+	tbia();
+
+	/* Save off the contents of the original PCB so that we can
+	   restore the original console's page tables for a clean reboot.
+
+	   Note that the PCB is supposed to be a physical address, but
+	   since KSEG values also happen to work, folks get confused.
+	   Check this here.  */
+
+	if (original_pcb_ptr < PAGE_OFFSET) {
+		original_pcb_ptr = (unsigned long)
+			phys_to_virt(original_pcb_ptr);
+	}
+	original_pcb = *(struct pcb_struct *) original_pcb_ptr;
+}
+
+int callback_init_done;
+
+void * __init
+callback_init(void * kernel_end)
+{
+	struct crb_struct * crb;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	void *two_pages;
+
+	/* Starting at the HWRPB, locate the CRB. */
+	crb = (struct crb_struct *)((char *)hwrpb + hwrpb->crb_offset);
+
+	if (alpha_using_srm) {
+		/* Tell the console whither it is to be remapped. */
+		if (srm_fixup(VMALLOC_START, (unsigned long)hwrpb))
+			__halt();		/* "We're boned."  --Bender */
+
+		/* Edit the procedure descriptors for DISPATCH and FIXUP. */
+		crb->dispatch_va = (struct procdesc_struct *)
+			(VMALLOC_START + (unsigned long)crb->dispatch_va
+			 - crb->map[0].va);
+		crb->fixup_va = (struct procdesc_struct *)
+			(VMALLOC_START + (unsigned long)crb->fixup_va
+			 - crb->map[0].va);
+	}
+
+	switch_to_system_map();
+
+	/* Allocate one PGD and one PMD.  In the case of SRM, we'll need
+	   these to actually remap the console.  There is an assumption
+	   here that only one of each is needed, and this allows for 8MB.
+	   On systems with larger consoles, additional pages will be
+	   allocated as needed during the mapping process.
+
+	   In the case of not SRM, but not CONFIG_ALPHA_LARGE_VMALLOC,
+	   we need to allocate the PGD we use for vmalloc before we start
+	   forking other tasks.  */
+
+	two_pages = (void *)
+	  (((unsigned long)kernel_end + ~PAGE_MASK) & PAGE_MASK);
+	kernel_end = two_pages + 2*PAGE_SIZE;
+	memset(two_pages, 0, 2*PAGE_SIZE);
+
+	pgd = pgd_offset_k(VMALLOC_START);
+	p4d = p4d_offset(pgd, VMALLOC_START);
+	pud = pud_offset(p4d, VMALLOC_START);
+	pud_set(pud, (pmd_t *)two_pages);
+	pmd = pmd_offset(pud, VMALLOC_START);
+	pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE));
+
+	if (alpha_using_srm) {
+		static struct vm_struct console_remap_vm;
+		unsigned long nr_pages = 0;
+		unsigned long vaddr;
+		unsigned long i, j;
+
+		/* calculate needed size */
+		for (i = 0; i < crb->map_entries; ++i)
+			nr_pages += crb->map[i].count;
+
+		/* register the vm area */
+		console_remap_vm.flags = VM_ALLOC;
+		console_remap_vm.size = nr_pages << PAGE_SHIFT;
+		vm_area_register_early(&console_remap_vm, PAGE_SIZE);
+
+		vaddr = (unsigned long)console_remap_vm.addr;
+
+		/* Set up the third level PTEs and update the virtual
+		   addresses of the CRB entries.  */
+		for (i = 0; i < crb->map_entries; ++i) {
+			unsigned long pfn = crb->map[i].pa >> PAGE_SHIFT;
+			crb->map[i].va = vaddr;
+			for (j = 0; j < crb->map[i].count; ++j) {
+				/* Newer consoles (especially on larger
+				   systems) may require more pages of
+				   PTEs. Grab additional pages as needed. */
+				if (pmd != pmd_offset(pud, vaddr)) {
+					memset(kernel_end, 0, PAGE_SIZE);
+					pmd = pmd_offset(pud, vaddr);
+					pmd_set(pmd, (pte_t *)kernel_end);
+					kernel_end += PAGE_SIZE;
+				}
+				set_pte(pte_offset_kernel(pmd, vaddr),
+					pfn_pte(pfn, PAGE_KERNEL));
+				pfn++;
+				vaddr += PAGE_SIZE;
+			}
+		}
+	}
+
+	callback_init_done = 1;
+	return kernel_end;
+}
+
+/*
+ * paging_init() sets up the memory map.
+ */
+void __init paging_init(void)
+{
+	unsigned long max_zone_pfn[MAX_NR_ZONES] = {0, };
+	unsigned long dma_pfn;
+
+	dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	max_pfn = max_low_pfn;
+
+	max_zone_pfn[ZONE_DMA] = dma_pfn;
+	max_zone_pfn[ZONE_NORMAL] = max_pfn;
+
+	/* Initialize mem_map[].  */
+	free_area_init(max_zone_pfn);
+
+	/* Initialize the kernel's ZERO_PGE. */
+	memset((void *)ZERO_PGE, 0, PAGE_SIZE);
+}
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM)
+void
+srm_paging_stop (void)
+{
+	/* Move the vptb back to where the SRM console expects it.  */
+	swapper_pg_dir[1] = swapper_pg_dir[1023];
+	tbia();
+	wrvptptr(0x200000000UL);
+	hwrpb->vptb = 0x200000000UL;
+	hwrpb_update_checksum(hwrpb);
+
+	/* Reload the page tables that the console had in use.  */
+	load_PCB(&original_pcb);
+	tbia();
+}
+#endif
+
+void __init
+mem_init(void)
+{
+	set_max_mapnr(max_low_pfn);
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+	memblock_free_all();
+}
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
new file mode 100644
index 0000000000..0636e254a2
--- /dev/null
+++ b/arch/alpha/mm/numa.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/alpha/mm/numa.c
+ *
+ *  DISCONTIGMEM NUMA alpha support.
+ *
+ *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/swap.h>
+#include <linux/initrd.h>
+#include <linux/pfn.h>
+#include <linux/module.h>
+
+#include <asm/hwrpb.h>
+#include <asm/sections.h>
+
+pg_data_t node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+#undef DEBUG_DISCONTIG
+#ifdef DEBUG_DISCONTIG
+#define DBGDCONT(args...) printk(args)
+#else
+#define DBGDCONT(args...)
+#endif
+
+#define for_each_mem_cluster(memdesc, _cluster, i)		\
+	for ((_cluster) = (memdesc)->cluster, (i) = 0;		\
+	     (i) < (memdesc)->numclusters; (i)++, (_cluster)++)
+
+static void __init show_mem_layout(void)
+{
+	struct memclust_struct * cluster;
+	struct memdesc_struct * memdesc;
+	int i;
+
+	/* Find free clusters, and init and free the bootmem accordingly.  */
+	memdesc = (struct memdesc_struct *)
+	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
+
+	printk("Raw memory layout:\n");
+	for_each_mem_cluster(memdesc, cluster, i) {
+		printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
+		       i, cluster->usage, cluster->start_pfn,
+		       cluster->start_pfn + cluster->numpages);
+	}
+}
+
+static void __init
+setup_memory_node(int nid, void *kernel_end)
+{
+	extern unsigned long mem_size_limit;
+	struct memclust_struct * cluster;
+	struct memdesc_struct * memdesc;
+	unsigned long start_kernel_pfn, end_kernel_pfn;
+	unsigned long start, end;
+	unsigned long node_pfn_start, node_pfn_end;
+	unsigned long node_min_pfn, node_max_pfn;
+	int i;
+	int show_init = 0;
+
+	/* Find the bounds of current node */
+	node_pfn_start = (node_mem_start(nid)) >> PAGE_SHIFT;
+	node_pfn_end = node_pfn_start + (node_mem_size(nid) >> PAGE_SHIFT);
+	
+	/* Find free clusters, and init and free the bootmem accordingly.  */
+	memdesc = (struct memdesc_struct *)
+	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
+
+	/* find the bounds of this node (node_min_pfn/node_max_pfn) */
+	node_min_pfn = ~0UL;
+	node_max_pfn = 0UL;
+	for_each_mem_cluster(memdesc, cluster, i) {
+		/* Bit 0 is console/PALcode reserved.  Bit 1 is
+		   non-volatile memory -- we might want to mark
+		   this for later.  */
+		if (cluster->usage & 3)
+			continue;
+
+		start = cluster->start_pfn;
+		end = start + cluster->numpages;
+
+		if (start >= node_pfn_end || end <= node_pfn_start)
+			continue;
+
+		if (!show_init) {
+			show_init = 1;
+			printk("Initializing bootmem allocator on Node ID %d\n", nid);
+		}
+		printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
+		       i, cluster->usage, cluster->start_pfn,
+		       cluster->start_pfn + cluster->numpages);
+
+		if (start < node_pfn_start)
+			start = node_pfn_start;
+		if (end > node_pfn_end)
+			end = node_pfn_end;
+
+		if (start < node_min_pfn)
+			node_min_pfn = start;
+		if (end > node_max_pfn)
+			node_max_pfn = end;
+	}
+
+	if (mem_size_limit && node_max_pfn > mem_size_limit) {
+		static int msg_shown = 0;
+		if (!msg_shown) {
+			msg_shown = 1;
+			printk("setup: forcing memory size to %ldK (from %ldK).\n",
+			       mem_size_limit << (PAGE_SHIFT - 10),
+			       node_max_pfn    << (PAGE_SHIFT - 10));
+		}
+		node_max_pfn = mem_size_limit;
+	}
+
+	if (node_min_pfn >= node_max_pfn)
+		return;
+
+	/* Update global {min,max}_low_pfn from node information. */
+	if (node_min_pfn < min_low_pfn)
+		min_low_pfn = node_min_pfn;
+	if (node_max_pfn > max_low_pfn)
+		max_pfn = max_low_pfn = node_max_pfn;
+
+#if 0 /* we'll try this one again in a little while */
+	/* Cute trick to make sure our local node data is on local memory */
+	node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT));
+#endif
+	printk(" Detected node memory:   start %8lu, end %8lu\n",
+	       node_min_pfn, node_max_pfn);
+
+	DBGDCONT(" DISCONTIG: node_data[%d]   is at 0x%p\n", nid, NODE_DATA(nid));
+
+	/* Find the bounds of kernel memory.  */
+	start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
+	end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
+
+	if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn))
+		panic("kernel loaded out of ram");
+
+	memblock_add_node(PFN_PHYS(node_min_pfn),
+			  (node_max_pfn - node_min_pfn) << PAGE_SHIFT, nid);
+
+	/* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
+	   Note that we round this down, not up - node memory
+	   has much larger alignment than 8Mb, so it's safe. */
+	node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
+
+	NODE_DATA(nid)->node_start_pfn = node_min_pfn;
+	NODE_DATA(nid)->node_present_pages = node_max_pfn - node_min_pfn;
+
+	node_set_online(nid);
+}
+
+void __init
+setup_memory(void *kernel_end)
+{
+	unsigned long kernel_size;
+	int nid;
+
+	show_mem_layout();
+
+	nodes_clear(node_online_map);
+
+	min_low_pfn = ~0UL;
+	max_low_pfn = 0UL;
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		setup_memory_node(nid, kernel_end);
+
+	kernel_size = virt_to_phys(kernel_end) - KERNEL_START_PHYS;
+	memblock_reserve(KERNEL_START_PHYS, kernel_size);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_start = INITRD_START;
+	if (initrd_start) {
+		extern void *move_initrd(unsigned long);
+
+		initrd_end = initrd_start+INITRD_SIZE;
+		printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
+		       (void *) initrd_start, INITRD_SIZE);
+
+		if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) {
+			if (!move_initrd(PFN_PHYS(max_low_pfn)))
+				printk("initrd extends beyond end of memory "
+				       "(0x%08lx > 0x%p)\ndisabling initrd\n",
+				       initrd_end,
+				       phys_to_virt(PFN_PHYS(max_low_pfn)));
+		} else {
+			nid = kvaddr_to_nid(initrd_start);
+			memblock_reserve(virt_to_phys((void *)initrd_start),
+					 INITRD_SIZE);
+		}
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
+void __init paging_init(void)
+{
+	unsigned long   max_zone_pfn[MAX_NR_ZONES] = {0, };
+	unsigned long	dma_local_pfn;
+
+	/*
+	 * The old global MAX_DMA_ADDRESS per-arch API doesn't fit
+	 * in the NUMA model, for now we convert it to a pfn and
+	 * we interpret this pfn as a local per-node information.
+	 * This issue isn't very important since none of these machines
+	 * have legacy ISA slots anyways.
+	 */
+	dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+
+	max_zone_pfn[ZONE_DMA] = dma_local_pfn;
+	max_zone_pfn[ZONE_NORMAL] = max_pfn;
+
+	free_area_init(max_zone_pfn);
+
+	/* Initialize the kernel's ZERO_PGE. */
+	memset((void *)ZERO_PGE, 0, PAGE_SIZE);
+}
diff --git a/arch/alpha/oprofile/Makefile b/arch/alpha/oprofile/Makefile
new file mode 100644
index 0000000000..79f32820a4
--- /dev/null
+++ b/arch/alpha/oprofile/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -Werror -Wno-sign-compare
+
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o \
+		timer_int.o )
+
+oprofile-y				:= $(DRIVER_OBJS) common.o
+oprofile-$(CONFIG_ALPHA_GENERIC)	+= op_model_ev4.o \
+					   op_model_ev5.o \
+					   op_model_ev6.o \
+					   op_model_ev67.o
+oprofile-$(CONFIG_ALPHA_EV4)		+= op_model_ev4.o
+oprofile-$(CONFIG_ALPHA_EV5)		+= op_model_ev5.o
+oprofile-$(CONFIG_ALPHA_EV6)		+= op_model_ev6.o \
+					   op_model_ev67.o
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
new file mode 100644
index 0000000000..1b1259c7d7
--- /dev/null
+++ b/arch/alpha/oprofile/common.c
@@ -0,0 +1,189 @@
+/**
+ * @file arch/alpha/oprofile/common.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson <rth@twiddle.net>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <asm/ptrace.h>
+#include <asm/special_insns.h>
+
+#include "op_impl.h"
+
+extern struct op_axp_model op_model_ev4 __attribute__((weak));
+extern struct op_axp_model op_model_ev5 __attribute__((weak));
+extern struct op_axp_model op_model_pca56 __attribute__((weak));
+extern struct op_axp_model op_model_ev6 __attribute__((weak));
+extern struct op_axp_model op_model_ev67 __attribute__((weak));
+
+static struct op_axp_model *model;
+
+extern void (*perf_irq)(unsigned long, struct pt_regs *);
+static void (*save_perf_irq)(unsigned long, struct pt_regs *);
+
+static struct op_counter_config ctr[20];
+static struct op_system_config sys;
+static struct op_register_config reg;
+
+/* Called from do_entInt to handle the performance monitor interrupt.  */
+
+static void
+op_handle_interrupt(unsigned long which, struct pt_regs *regs)
+{
+	model->handle_interrupt(which, regs, ctr);
+
+	/* If the user has selected an interrupt frequency that is
+	   not exactly the width of the counter, write a new value
+	   into the counter such that it'll overflow after N more
+	   events.  */
+	if ((reg.need_reset >> which) & 1)
+		model->reset_ctr(&reg, which);
+}
+ 
+static int
+op_axp_setup(void)
+{
+	unsigned long i, e;
+
+	/* Install our interrupt handler into the existing hook.  */
+	save_perf_irq = perf_irq;
+	perf_irq = op_handle_interrupt;
+
+	/* Compute the mask of enabled counters.  */
+	for (i = e = 0; i < model->num_counters; ++i)
+		if (ctr[i].enabled)
+			e |= 1 << i;
+	reg.enable = e;
+
+	/* Pre-compute the values to stuff in the hardware registers.  */
+	model->reg_setup(&reg, ctr, &sys);
+
+	/* Configure the registers on all cpus.  */
+	smp_call_function(model->cpu_setup, &reg, 1);
+	model->cpu_setup(&reg);
+	return 0;
+}
+
+static void
+op_axp_shutdown(void)
+{
+	/* Remove our interrupt handler.  We may be removing this module.  */
+	perf_irq = save_perf_irq;
+}
+
+static void
+op_axp_cpu_start(void *dummy)
+{
+	wrperfmon(1, reg.enable);
+}
+
+static int
+op_axp_start(void)
+{
+	smp_call_function(op_axp_cpu_start, NULL, 1);
+	op_axp_cpu_start(NULL);
+	return 0;
+}
+
+static inline void
+op_axp_cpu_stop(void *dummy)
+{
+	/* Disable performance monitoring for all counters.  */
+	wrperfmon(0, -1);
+}
+
+static void
+op_axp_stop(void)
+{
+	smp_call_function(op_axp_cpu_stop, NULL, 1);
+	op_axp_cpu_stop(NULL);
+}
+
+static int
+op_axp_create_files(struct dentry *root)
+{
+	int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		struct dentry *dir;
+		char buf[4];
+
+		snprintf(buf, sizeof buf, "%d", i);
+		dir = oprofilefs_mkdir(root, buf);
+
+		oprofilefs_create_ulong(dir, "enabled", &ctr[i].enabled);
+                oprofilefs_create_ulong(dir, "event", &ctr[i].event);
+		oprofilefs_create_ulong(dir, "count", &ctr[i].count);
+		/* Dummies.  */
+		oprofilefs_create_ulong(dir, "kernel", &ctr[i].kernel);
+		oprofilefs_create_ulong(dir, "user", &ctr[i].user);
+		oprofilefs_create_ulong(dir, "unit_mask", &ctr[i].unit_mask);
+	}
+
+	if (model->can_set_proc_mode) {
+		oprofilefs_create_ulong(root, "enable_pal",
+					&sys.enable_pal);
+		oprofilefs_create_ulong(root, "enable_kernel",
+					&sys.enable_kernel);
+		oprofilefs_create_ulong(root, "enable_user",
+					&sys.enable_user);
+	}
+
+	return 0;
+}
+
+int __init
+oprofile_arch_init(struct oprofile_operations *ops)
+{
+	struct op_axp_model *lmodel = NULL;
+
+	switch (implver()) {
+	case IMPLVER_EV4:
+		lmodel = &op_model_ev4;
+		break;
+	case IMPLVER_EV5:
+		/* 21164PC has a slightly different set of events.
+		   Recognize the chip by the presence of the MAX insns.  */
+		if (!amask(AMASK_MAX))
+			lmodel = &op_model_pca56;
+		else
+			lmodel = &op_model_ev5;
+		break;
+	case IMPLVER_EV6:
+		/* 21264A supports ProfileMe.
+		   Recognize the chip by the presence of the CIX insns.  */
+		if (!amask(AMASK_CIX))
+			lmodel = &op_model_ev67;
+		else
+			lmodel = &op_model_ev6;
+		break;
+	}
+
+	if (!lmodel)
+		return -ENODEV;
+	model = lmodel;
+
+	ops->create_files = op_axp_create_files;
+	ops->setup = op_axp_setup;
+	ops->shutdown = op_axp_shutdown;
+	ops->start = op_axp_start;
+	ops->stop = op_axp_stop;
+	ops->cpu_type = lmodel->cpu_type;
+
+	printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
+	       lmodel->cpu_type);
+
+	return 0;
+}
+
+
+void
+oprofile_arch_exit(void)
+{
+}
diff --git a/arch/alpha/oprofile/op_impl.h b/arch/alpha/oprofile/op_impl.h
new file mode 100644
index 0000000000..b2b87ae9a3
--- /dev/null
+++ b/arch/alpha/oprofile/op_impl.h
@@ -0,0 +1,55 @@
+/**
+ * @file arch/alpha/oprofile/op_impl.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson <rth@twiddle.net>
+ */
+
+#ifndef OP_IMPL_H
+#define OP_IMPL_H 1
+
+/* Per-counter configuration as set via oprofilefs.  */
+struct op_counter_config {
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long count;
+	/* Dummies because I am too lazy to hack the userspace tools.  */
+	unsigned long kernel;
+	unsigned long user;
+	unsigned long unit_mask;
+};
+
+/* System-wide configuration as set via oprofilefs.  */
+struct op_system_config {
+	unsigned long enable_pal;
+	unsigned long enable_kernel;
+	unsigned long enable_user;
+};
+
+/* Cached values for the various performance monitoring registers.  */
+struct op_register_config {
+	unsigned long enable;
+	unsigned long mux_select;
+	unsigned long proc_mode;
+	unsigned long freq;
+	unsigned long reset_values;
+	unsigned long need_reset;
+};
+
+/* Per-architecture configuration and hooks.  */
+struct op_axp_model {
+	void (*reg_setup) (struct op_register_config *,
+			   struct op_counter_config *,
+			   struct op_system_config *);
+	void (*cpu_setup) (void *);
+	void (*reset_ctr) (struct op_register_config *, unsigned long);
+	void (*handle_interrupt) (unsigned long, struct pt_regs *,
+				  struct op_counter_config *);
+	char *cpu_type;
+	unsigned char num_counters;
+	unsigned char can_set_proc_mode;
+};
+
+#endif
diff --git a/arch/alpha/oprofile/op_model_ev4.c b/arch/alpha/oprofile/op_model_ev4.c
new file mode 100644
index 0000000000..086a0d5445
--- /dev/null
+++ b/arch/alpha/oprofile/op_model_ev4.c
@@ -0,0 +1,114 @@
+/**
+ * @file arch/alpha/oprofile/op_model_ev4.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson <rth@twiddle.net>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <asm/ptrace.h>
+
+#include "op_impl.h"
+
+
+/* Compute all of the registers in preparation for enabling profiling.  */
+
+static void
+ev4_reg_setup(struct op_register_config *reg,
+	      struct op_counter_config *ctr,
+	      struct op_system_config *sys)
+{
+	unsigned long ctl = 0, count, hilo;
+
+	/* Select desired events.  We've mapped the event numbers
+	   such that they fit directly into the event selection fields.
+
+	   Note that there is no "off" setting.  In both cases we select
+	   the EXTERNAL event source, hoping that it'll be the lowest
+	   frequency, and set the frequency counter to LOW.  The interrupts
+	   for these "disabled" counter overflows are ignored by the
+	   interrupt handler.
+
+	   This is most irritating, because the hardware *can* enable and
+	   disable the interrupts for these counters independently, but the
+	   wrperfmon interface doesn't allow it.  */
+
+	ctl |= (ctr[0].enabled ? ctr[0].event << 8 : 14 << 8);
+	ctl |= (ctr[1].enabled ? (ctr[1].event - 16) << 32 : 7ul << 32);
+
+	/* EV4 can not read or write its counter registers.  The only
+	   thing one can do at all is see if you overflow and get an
+	   interrupt.  We can set the width of the counters, to some
+	   extent.  Take the interrupt count selected by the user,
+	   map it onto one of the possible values, and write it back.  */
+
+	count = ctr[0].count;
+	if (count <= 4096)
+		count = 4096, hilo = 1;
+	else
+		count = 65536, hilo = 0;
+	ctr[0].count = count;
+	ctl |= (ctr[0].enabled && hilo) << 3;
+
+	count = ctr[1].count;
+	if (count <= 256)
+		count = 256, hilo = 1;
+	else
+		count = 4096, hilo = 0;
+	ctr[1].count = count;
+	ctl |= (ctr[1].enabled && hilo);
+
+	reg->mux_select = ctl;
+
+	/* Select performance monitoring options.  */
+	/* ??? Need to come up with some mechanism to trace only
+	   selected processes.  EV4 does not have a mechanism to
+	   select kernel or user mode only.  For now, enable always.  */
+	reg->proc_mode = 0;
+
+	/* Frequency is folded into mux_select for EV4.  */
+	reg->freq = 0;
+
+	/* See above regarding no writes.  */
+	reg->reset_values = 0;
+	reg->need_reset = 0;
+
+}
+
+/* Program all of the registers in preparation for enabling profiling.  */
+
+static void
+ev4_cpu_setup(void *x)
+{
+	struct op_register_config *reg = x;
+
+	wrperfmon(2, reg->mux_select);
+	wrperfmon(3, reg->proc_mode);
+}
+
+static void
+ev4_handle_interrupt(unsigned long which, struct pt_regs *regs,
+		     struct op_counter_config *ctr)
+{
+	/* EV4 can't properly disable counters individually.
+	   Discard "disabled" events now.  */
+	if (!ctr[which].enabled)
+		return;
+
+	/* Record the sample.  */
+	oprofile_add_sample(regs, which);
+}
+
+
+struct op_axp_model op_model_ev4 = {
+	.reg_setup		= ev4_reg_setup,
+	.cpu_setup		= ev4_cpu_setup,
+	.reset_ctr		= NULL,
+	.handle_interrupt	= ev4_handle_interrupt,
+	.cpu_type		= "alpha/ev4",
+	.num_counters		= 2,
+	.can_set_proc_mode	= 0,
+};
diff --git a/arch/alpha/oprofile/op_model_ev5.c b/arch/alpha/oprofile/op_model_ev5.c
new file mode 100644
index 0000000000..c300f5ef34
--- /dev/null
+++ b/arch/alpha/oprofile/op_model_ev5.c
@@ -0,0 +1,209 @@
+/**
+ * @file arch/alpha/oprofile/op_model_ev5.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson <rth@twiddle.net>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <asm/ptrace.h>
+
+#include "op_impl.h"
+
+
+/* Compute all of the registers in preparation for enabling profiling.
+
+   The 21164 (EV5) and 21164PC (PCA65) vary in the bit placement and
+   meaning of the "CBOX" events.  Given that we don't care about meaning
+   at this point, arrange for the difference in bit placement to be
+   handled by common code.  */
+
+static void
+common_reg_setup(struct op_register_config *reg,
+		 struct op_counter_config *ctr,
+		 struct op_system_config *sys,
+		 int cbox1_ofs, int cbox2_ofs)
+{
+	int i, ctl, reset, need_reset;
+
+	/* Select desired events.  The event numbers are selected such
+	   that they map directly into the event selection fields:
+
+		PCSEL0:	0, 1
+		PCSEL1:	24-39
+		 CBOX1: 40-47
+		PCSEL2: 48-63
+		 CBOX2: 64-71
+
+	   There are two special cases, in that CYCLES can be measured
+	   on PCSEL[02], and SCACHE_WRITE can be measured on CBOX[12].
+	   These event numbers are canonicalizes to their first appearance.  */
+
+	ctl = 0;
+	for (i = 0; i < 3; ++i) {
+		unsigned long event = ctr[i].event;
+		if (!ctr[i].enabled)
+			continue;
+
+		/* Remap the duplicate events, as described above.  */
+		if (i == 2) {
+			if (event == 0)
+				event = 12+48;
+			else if (event == 2+41)
+				event = 4+65;
+		}
+
+		/* Convert the event numbers onto mux_select bit mask.  */
+		if (event < 2)
+			ctl |= event << 31;
+		else if (event < 24)
+			/* error */;
+		else if (event < 40)
+			ctl |= (event - 24) << 4;
+		else if (event < 48)
+			ctl |= (event - 40) << cbox1_ofs | 15 << 4;
+		else if (event < 64)
+			ctl |= event - 48;
+		else if (event < 72)
+			ctl |= (event - 64) << cbox2_ofs | 15;
+	}
+	reg->mux_select = ctl;
+
+	/* Select processor mode.  */
+	/* ??? Need to come up with some mechanism to trace only selected
+	   processes.  For now select from pal, kernel and user mode.  */
+	ctl = 0;
+	ctl |= !sys->enable_pal << 9;
+	ctl |= !sys->enable_kernel << 8;
+	ctl |= !sys->enable_user << 30;
+	reg->proc_mode = ctl;
+
+	/* Select interrupt frequencies.  Take the interrupt count selected
+	   by the user, and map it onto one of the possible counter widths.
+	   If the user value is in between, compute a value to which the
+	   counter is reset at each interrupt.  */
+
+	ctl = reset = need_reset = 0;
+	for (i = 0; i < 3; ++i) {
+		unsigned long max, hilo, count = ctr[i].count;
+		if (!ctr[i].enabled)
+			continue;
+
+		if (count <= 256)
+			count = 256, hilo = 3, max = 256;
+		else {
+			max = (i == 2 ? 16384 : 65536);
+			hilo = 2;
+			if (count > max)
+				count = max;
+		}
+		ctr[i].count = count;
+
+		ctl |= hilo << (8 - i*2);
+		reset |= (max - count) << (48 - 16*i);
+		if (count != max)
+			need_reset |= 1 << i;
+	}
+	reg->freq = ctl;
+	reg->reset_values = reset;
+	reg->need_reset = need_reset;
+}
+
+static void
+ev5_reg_setup(struct op_register_config *reg,
+	      struct op_counter_config *ctr,
+	      struct op_system_config *sys)
+{
+	common_reg_setup(reg, ctr, sys, 19, 22);
+}
+
+static void
+pca56_reg_setup(struct op_register_config *reg,
+	        struct op_counter_config *ctr,
+	        struct op_system_config *sys)
+{
+	common_reg_setup(reg, ctr, sys, 8, 11);
+}
+
+/* Program all of the registers in preparation for enabling profiling.  */
+
+static void
+ev5_cpu_setup (void *x)
+{
+	struct op_register_config *reg = x;
+
+	wrperfmon(2, reg->mux_select);
+	wrperfmon(3, reg->proc_mode);
+	wrperfmon(4, reg->freq);
+	wrperfmon(6, reg->reset_values);
+}
+
+/* CTR is a counter for which the user has requested an interrupt count
+   in between one of the widths selectable in hardware.  Reset the count
+   for CTR to the value stored in REG->RESET_VALUES.
+
+   For EV5, this means disabling profiling, reading the current values,
+   masking in the value for the desired register, writing, then turning
+   profiling back on.
+
+   This can be streamlined if profiling is only enabled for user mode.
+   In that case we know that the counters are not currently incrementing
+   (due to being in kernel mode).  */
+
+static void
+ev5_reset_ctr(struct op_register_config *reg, unsigned long ctr)
+{
+	unsigned long values, mask, not_pk, reset_values;
+
+	mask = (ctr == 0 ? 0xfffful << 48
+	        : ctr == 1 ? 0xfffful << 32
+		: 0x3fff << 16);
+
+	not_pk = 1 << 9 | 1 << 8;
+
+	reset_values = reg->reset_values;
+
+	if ((reg->proc_mode & not_pk) == not_pk) {
+		values = wrperfmon(5, 0);
+		values = (reset_values & mask) | (values & ~mask & -2);
+		wrperfmon(6, values);
+	} else {
+		wrperfmon(0, -1);
+		values = wrperfmon(5, 0);
+		values = (reset_values & mask) | (values & ~mask & -2);
+		wrperfmon(6, values);
+		wrperfmon(1, reg->enable);
+	}
+}
+
+static void
+ev5_handle_interrupt(unsigned long which, struct pt_regs *regs,
+		     struct op_counter_config *ctr)
+{
+	/* Record the sample.  */
+	oprofile_add_sample(regs, which);
+}
+
+
+struct op_axp_model op_model_ev5 = {
+	.reg_setup		= ev5_reg_setup,
+	.cpu_setup		= ev5_cpu_setup,
+	.reset_ctr		= ev5_reset_ctr,
+	.handle_interrupt	= ev5_handle_interrupt,
+	.cpu_type		= "alpha/ev5",
+	.num_counters		= 3,
+	.can_set_proc_mode	= 1,
+};
+
+struct op_axp_model op_model_pca56 = {
+	.reg_setup		= pca56_reg_setup,
+	.cpu_setup		= ev5_cpu_setup,
+	.reset_ctr		= ev5_reset_ctr,
+	.handle_interrupt	= ev5_handle_interrupt,
+	.cpu_type		= "alpha/pca56",
+	.num_counters		= 3,
+	.can_set_proc_mode	= 1,
+};
diff --git a/arch/alpha/oprofile/op_model_ev6.c b/arch/alpha/oprofile/op_model_ev6.c
new file mode 100644
index 0000000000..02edf59716
--- /dev/null
+++ b/arch/alpha/oprofile/op_model_ev6.c
@@ -0,0 +1,101 @@
+/**
+ * @file arch/alpha/oprofile/op_model_ev6.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson <rth@twiddle.net>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <asm/ptrace.h>
+
+#include "op_impl.h"
+
+
+/* Compute all of the registers in preparation for enabling profiling.  */
+
+static void
+ev6_reg_setup(struct op_register_config *reg,
+	      struct op_counter_config *ctr,
+	      struct op_system_config *sys)
+{
+	unsigned long ctl, reset, need_reset, i;
+
+	/* Select desired events.  We've mapped the event numbers
+	   such that they fit directly into the event selection fields.  */
+	ctl = 0;
+	if (ctr[0].enabled && ctr[0].event)
+		ctl |= (ctr[0].event & 1) << 4;
+	if (ctr[1].enabled)
+		ctl |= (ctr[1].event - 2) & 15;
+	reg->mux_select = ctl;
+
+	/* Select logging options.  */
+	/* ??? Need to come up with some mechanism to trace only
+	   selected processes.  EV6 does not have a mechanism to
+	   select kernel or user mode only.  For now, enable always.  */
+	reg->proc_mode = 0;
+
+	/* EV6 cannot change the width of the counters as with the
+	   other implementations.  But fortunately, we can write to
+	   the counters and set the value such that it will overflow
+	   at the right time.  */
+	reset = need_reset = 0;
+	for (i = 0; i < 2; ++i) {
+		unsigned long count = ctr[i].count;
+		if (!ctr[i].enabled)
+			continue;
+
+		if (count > 0x100000)
+			count = 0x100000;
+		ctr[i].count = count;
+		reset |= (0x100000 - count) << (i ? 6 : 28);
+		if (count != 0x100000)
+			need_reset |= 1 << i;
+	}
+	reg->reset_values = reset;
+	reg->need_reset = need_reset;
+}
+
+/* Program all of the registers in preparation for enabling profiling.  */
+
+static void
+ev6_cpu_setup (void *x)
+{
+	struct op_register_config *reg = x;
+
+	wrperfmon(2, reg->mux_select);
+	wrperfmon(3, reg->proc_mode);
+	wrperfmon(6, reg->reset_values | 3);
+}
+
+/* CTR is a counter for which the user has requested an interrupt count
+   in between one of the widths selectable in hardware.  Reset the count
+   for CTR to the value stored in REG->RESET_VALUES.  */
+
+static void
+ev6_reset_ctr(struct op_register_config *reg, unsigned long ctr)
+{
+	wrperfmon(6, reg->reset_values | (1 << ctr));
+}
+
+static void
+ev6_handle_interrupt(unsigned long which, struct pt_regs *regs,
+		     struct op_counter_config *ctr)
+{
+	/* Record the sample.  */
+	oprofile_add_sample(regs, which);
+}
+
+
+struct op_axp_model op_model_ev6 = {
+	.reg_setup		= ev6_reg_setup,
+	.cpu_setup		= ev6_cpu_setup,
+	.reset_ctr		= ev6_reset_ctr,
+	.handle_interrupt	= ev6_handle_interrupt,
+	.cpu_type		= "alpha/ev6",
+	.num_counters		= 2,
+	.can_set_proc_mode	= 0,
+};
diff --git a/arch/alpha/oprofile/op_model_ev67.c b/arch/alpha/oprofile/op_model_ev67.c
new file mode 100644
index 0000000000..adb1744d20
--- /dev/null
+++ b/arch/alpha/oprofile/op_model_ev67.c
@@ -0,0 +1,261 @@
+/**
+ * @file arch/alpha/oprofile/op_model_ev67.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Richard Henderson <rth@twiddle.net>
+ * @author Falk Hueffner <falk@debian.org>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <asm/ptrace.h>
+
+#include "op_impl.h"
+
+
+/* Compute all of the registers in preparation for enabling profiling.  */
+
+static void
+ev67_reg_setup(struct op_register_config *reg,
+	       struct op_counter_config *ctr,
+	       struct op_system_config *sys)
+{
+	unsigned long ctl, reset, need_reset, i;
+
+	/* Select desired events.  */
+	ctl = 1UL << 4;		/* Enable ProfileMe mode. */
+
+	/* The event numbers are chosen so we can use them directly if
+	   PCTR1 is enabled.  */
+	if (ctr[1].enabled) {
+		ctl |= (ctr[1].event & 3) << 2;
+	} else {
+		if (ctr[0].event == 0) /* cycles */
+			ctl |= 1UL << 2;
+	}
+	reg->mux_select = ctl;
+
+	/* Select logging options.  */
+	/* ??? Need to come up with some mechanism to trace only
+	   selected processes.  EV67 does not have a mechanism to
+	   select kernel or user mode only.  For now, enable always.  */
+	reg->proc_mode = 0;
+
+	/* EV67 cannot change the width of the counters as with the
+	   other implementations.  But fortunately, we can write to
+	   the counters and set the value such that it will overflow
+	   at the right time.  */
+	reset = need_reset = 0;
+	for (i = 0; i < 2; ++i) {
+		unsigned long count = ctr[i].count;
+		if (!ctr[i].enabled)
+			continue;
+
+		if (count > 0x100000)
+			count = 0x100000;
+		ctr[i].count = count;
+		reset |= (0x100000 - count) << (i ? 6 : 28);
+		if (count != 0x100000)
+			need_reset |= 1 << i;
+	}
+	reg->reset_values = reset;
+	reg->need_reset = need_reset;
+}
+
+/* Program all of the registers in preparation for enabling profiling.  */
+
+static void
+ev67_cpu_setup (void *x)
+{
+	struct op_register_config *reg = x;
+
+	wrperfmon(2, reg->mux_select);
+	wrperfmon(3, reg->proc_mode);
+	wrperfmon(6, reg->reset_values | 3);
+}
+
+/* CTR is a counter for which the user has requested an interrupt count
+   in between one of the widths selectable in hardware.  Reset the count
+   for CTR to the value stored in REG->RESET_VALUES.  */
+
+static void
+ev67_reset_ctr(struct op_register_config *reg, unsigned long ctr)
+{
+	wrperfmon(6, reg->reset_values | (1 << ctr));
+}
+
+/* ProfileMe conditions which will show up as counters. We can also
+   detect the following, but it seems unlikely that anybody is
+   interested in counting them:
+    * Reset
+    * MT_FPCR (write to floating point control register)
+    * Arithmetic trap
+    * Dstream Fault
+    * Machine Check (ECC fault, etc.)
+    * OPCDEC (illegal opcode)
+    * Floating point disabled
+    * Differentiate between DTB single/double misses and 3 or 4 level
+      page tables
+    * Istream access violation
+    * Interrupt
+    * Icache Parity Error.
+    * Instruction killed (nop, trapb)
+
+   Unfortunately, there seems to be no way to detect Dcache and Bcache
+   misses; the latter could be approximated by making the counter
+   count Bcache misses, but that is not precise.
+
+   We model this as 20 counters:
+    * PCTR0
+    * PCTR1
+    * 9 ProfileMe events, induced by PCTR0
+    * 9 ProfileMe events, induced by PCTR1
+*/
+
+enum profileme_counters {
+	PM_STALLED,		/* Stalled for at least one cycle
+				   between the fetch and map stages  */
+	PM_TAKEN,		/* Conditional branch taken */
+	PM_MISPREDICT,		/* Branch caused mispredict trap */
+	PM_ITB_MISS,		/* ITB miss */
+	PM_DTB_MISS,		/* DTB miss */
+	PM_REPLAY,		/* Replay trap */
+	PM_LOAD_STORE,		/* Load-store order trap */
+	PM_ICACHE_MISS,		/* Icache miss */
+	PM_UNALIGNED,		/* Unaligned Load/Store */
+	PM_NUM_COUNTERS
+};
+
+static inline void
+op_add_pm(unsigned long pc, int kern, unsigned long counter,
+	  struct op_counter_config *ctr, unsigned long event)
+{
+	unsigned long fake_counter = 2 + event;
+	if (counter == 1)
+		fake_counter += PM_NUM_COUNTERS;
+	if (ctr[fake_counter].enabled)
+		oprofile_add_pc(pc, kern, fake_counter);
+}
+
+static void
+ev67_handle_interrupt(unsigned long which, struct pt_regs *regs,
+		      struct op_counter_config *ctr)
+{
+	unsigned long pmpc, pctr_ctl;
+	int kern = !user_mode(regs);
+	int mispredict = 0;
+	union {
+		unsigned long v;
+		struct {
+			unsigned reserved:	30; /*  0-29 */
+			unsigned overcount:	 3; /* 30-32 */
+			unsigned icache_miss:	 1; /*    33 */
+			unsigned trap_type:	 4; /* 34-37 */
+			unsigned load_store:	 1; /*    38 */
+			unsigned trap:		 1; /*    39 */
+			unsigned mispredict:	 1; /*    40 */
+		} fields;
+	} i_stat;
+
+	enum trap_types {
+		TRAP_REPLAY,
+		TRAP_INVALID0,
+		TRAP_DTB_DOUBLE_MISS_3,
+		TRAP_DTB_DOUBLE_MISS_4,
+		TRAP_FP_DISABLED,
+		TRAP_UNALIGNED,
+		TRAP_DTB_SINGLE_MISS,
+		TRAP_DSTREAM_FAULT,
+		TRAP_OPCDEC,
+		TRAP_INVALID1,
+		TRAP_MACHINE_CHECK,
+		TRAP_INVALID2,
+		TRAP_ARITHMETIC,
+		TRAP_INVALID3,
+		TRAP_MT_FPCR,
+		TRAP_RESET
+	};
+
+	pmpc = wrperfmon(9, 0);
+	/* ??? Don't know how to handle physical-mode PALcode address.  */
+	if (pmpc & 1)
+		return;
+	pmpc &= ~2;		/* clear reserved bit */
+
+	i_stat.v = wrperfmon(8, 0);
+	if (i_stat.fields.trap) {
+		switch (i_stat.fields.trap_type) {
+		case TRAP_INVALID1:
+		case TRAP_INVALID2:
+		case TRAP_INVALID3:
+			/* Pipeline redirection occurred. PMPC points
+			   to PALcode. Recognize ITB miss by PALcode
+			   offset address, and get actual PC from
+			   EXC_ADDR.  */
+			oprofile_add_pc(regs->pc, kern, which);
+			if ((pmpc & ((1 << 15) - 1)) ==  581)
+				op_add_pm(regs->pc, kern, which,
+					  ctr, PM_ITB_MISS);
+			/* Most other bit and counter values will be
+			   those for the first instruction in the
+			   fault handler, so we're done.  */
+			return;
+		case TRAP_REPLAY:
+			op_add_pm(pmpc, kern, which, ctr,
+				  (i_stat.fields.load_store
+				   ? PM_LOAD_STORE : PM_REPLAY));
+			break;
+		case TRAP_DTB_DOUBLE_MISS_3:
+		case TRAP_DTB_DOUBLE_MISS_4:
+		case TRAP_DTB_SINGLE_MISS:
+			op_add_pm(pmpc, kern, which, ctr, PM_DTB_MISS);
+			break;
+		case TRAP_UNALIGNED:
+			op_add_pm(pmpc, kern, which, ctr, PM_UNALIGNED);
+			break;
+		case TRAP_INVALID0:
+		case TRAP_FP_DISABLED:
+		case TRAP_DSTREAM_FAULT:
+		case TRAP_OPCDEC:
+		case TRAP_MACHINE_CHECK:
+		case TRAP_ARITHMETIC:
+		case TRAP_MT_FPCR:
+		case TRAP_RESET:
+			break;
+		}
+
+		/* ??? JSR/JMP/RET/COR or HW_JSR/HW_JMP/HW_RET/HW_COR
+		   mispredicts do not set this bit but can be
+		   recognized by the presence of one of these
+		   instructions at the PMPC location with bit 39
+		   set.  */
+		if (i_stat.fields.mispredict) {
+			mispredict = 1;
+			op_add_pm(pmpc, kern, which, ctr, PM_MISPREDICT);
+		}
+	}
+
+	oprofile_add_pc(pmpc, kern, which);
+
+	pctr_ctl = wrperfmon(5, 0);
+	if (pctr_ctl & (1UL << 27))
+		op_add_pm(pmpc, kern, which, ctr, PM_STALLED);
+
+	/* Unfortunately, TAK is undefined on mispredicted branches.
+	   ??? It is also undefined for non-cbranch insns, should
+	   check that.  */
+	if (!mispredict && pctr_ctl & (1UL << 0))
+		op_add_pm(pmpc, kern, which, ctr, PM_TAKEN);
+}
+
+struct op_axp_model op_model_ev67 = {
+	.reg_setup		= ev67_reg_setup,
+	.cpu_setup		= ev67_cpu_setup,
+	.reset_ctr		= ev67_reset_ctr,
+	.handle_interrupt	= ev67_handle_interrupt,
+	.cpu_type		= "alpha/ev67",
+	.num_counters		= 20,
+	.can_set_proc_mode	= 0,
+};
diff --git a/arch/arc/Kbuild b/arch/arc/Kbuild
new file mode 100644
index 0000000000..b94102fff6
--- /dev/null
+++ b/arch/arc/Kbuild
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += kernel/
+obj-y += mm/
+
+# for cleaning
+subdir- += boot
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
new file mode 100644
index 0000000000..3c2a4753d0
--- /dev/null
+++ b/arch/arc/Kconfig
@@ -0,0 +1,561 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+
+config ARC
+	def_bool y
+	select ARC_TIMERS
+	select ARCH_HAS_CACHE_LINE_SIZE
+	select ARCH_HAS_DEBUG_VM_PGTABLE
+	select ARCH_HAS_DMA_PREP_COHERENT
+	select ARCH_HAS_PTE_SPECIAL
+	select ARCH_HAS_SETUP_DMA_OPS
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
+	select ARCH_32BIT_OFF_T
+	select BUILDTIME_TABLE_SORT
+	select CLONE_BACKWARDS
+	select COMMON_CLK
+	select DMA_DIRECT_REMAP
+	select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
+	# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
+	select GENERIC_IRQ_SHOW
+	select GENERIC_PCI_IOMAP
+	select GENERIC_PENDING_IRQ if SMP
+	select GENERIC_SCHED_CLOCK
+	select GENERIC_SMP_IDLE_THREAD
+	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
+	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_IOREMAP_PROT
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZMA
+	select HAVE_KPROBES
+	select HAVE_KRETPROBES
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_PERF_EVENTS
+	select IRQ_DOMAIN
+	select MODULES_USE_ELF_RELA
+	select OF
+	select OF_EARLY_FLATTREE
+	select PCI_SYSCALL if PCI
+	select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
+	select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
+	select SET_FS
+	select TRACE_IRQFLAGS_SUPPORT
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config SCHED_OMIT_FRAME_POINTER
+	def_bool y
+
+config GENERIC_CSUM
+	def_bool y
+
+config ARCH_FLATMEM_ENABLE
+	def_bool y
+
+config MMU
+	def_bool y
+
+config NO_IOPORT_MAP
+	def_bool y
+
+config GENERIC_CALIBRATE_DELAY
+	def_bool y
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config STACKTRACE_SUPPORT
+	def_bool y
+	select STACKTRACE
+
+menu "ARC Architecture Configuration"
+
+menu "ARC Platform/SoC/Board"
+
+source "arch/arc/plat-tb10x/Kconfig"
+source "arch/arc/plat-axs10x/Kconfig"
+source "arch/arc/plat-hsdk/Kconfig"
+
+endmenu
+
+choice
+	prompt "ARC Instruction Set"
+	default ISA_ARCV2
+
+config ISA_ARCOMPACT
+	bool "ARCompact ISA"
+	select CPU_NO_EFFICIENT_FFS
+	help
+	  The original ARC ISA of ARC600/700 cores
+
+config ISA_ARCV2
+	bool "ARC ISA v2"
+	select ARC_TIMERS_64BIT
+	help
+	  ISA for the Next Generation ARC-HS cores
+
+endchoice
+
+menu "ARC CPU Configuration"
+
+choice
+	prompt "ARC Core"
+	default ARC_CPU_770 if ISA_ARCOMPACT
+	default ARC_CPU_HS if ISA_ARCV2
+
+config ARC_CPU_770
+	bool "ARC770"
+	depends on ISA_ARCOMPACT
+	select ARC_HAS_SWAPE
+	help
+	  Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
+	  This core has a bunch of cool new features:
+	  -MMU-v3: Variable Page Sz (4k, 8k, 16k), bigger J-TLB (128x4)
+	           Shared Address Spaces (for sharing TLB entries in MMU)
+	  -Caches: New Prog Model, Region Flush
+	  -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
+
+config ARC_CPU_HS
+	bool "ARC-HS"
+	depends on ISA_ARCV2
+	help
+	  Support for ARC HS38x Cores based on ARCv2 ISA
+	  The notable features are:
+	    - SMP configurations of up to 4 cores with coherency
+	    - Optional L2 Cache and IO-Coherency
+	    - Revised Interrupt Architecture (multiple priorites, reg banks,
+	        auto stack switch, auto regfile save/restore)
+	    - MMUv4 (PIPT dcache, Huge Pages)
+	    - Instructions for
+		* 64bit load/store: LDD, STD
+		* Hardware assisted divide/remainder: DIV, REM
+		* Function prologue/epilogue: ENTER_S, LEAVE_S
+		* IRQ enable/disable: CLRI, SETI
+		* pop count: FFS, FLS
+		* SETcc, BMSKN, XBFU...
+
+endchoice
+
+config ARC_TUNE_MCPU
+	string "Override default -mcpu compiler flag"
+	default ""
+	help
+	  Override default -mcpu=xxx compiler flag (which is set depending on
+	  the ISA version) with the specified value.
+	  NOTE: If specified flag isn't supported by current compiler the
+	  ISA default value will be used as a fallback.
+
+config CPU_BIG_ENDIAN
+	bool "Enable Big Endian Mode"
+	help
+	  Build kernel for Big Endian Mode of ARC CPU
+
+config SMP
+	bool "Symmetric Multi-Processing"
+	select ARC_MCIP if ISA_ARCV2
+	help
+	  This enables support for systems with more than one CPU.
+
+if SMP
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-4096)"
+	range 2 4096
+	default "4"
+
+config ARC_SMP_HALT_ON_RESET
+	bool "Enable Halt-on-reset boot mode"
+	help
+	  In SMP configuration cores can be configured as Halt-on-reset
+	  or they could all start at same time. For Halt-on-reset, non
+	  masters are parked until Master kicks them so they can start off
+	  at designated entry point. For other case, all jump to common
+	  entry point and spin wait for Master's signal.
+
+endif #SMP
+
+config ARC_MCIP
+	bool "ARConnect Multicore IP (MCIP) Support "
+	depends on ISA_ARCV2
+	default y if SMP
+	help
+	  This IP block enables SMP in ARC-HS38 cores.
+	  It provides for cross-core interrupts, multi-core debug
+	  hardware semaphores, shared memory,....
+
+menuconfig ARC_CACHE
+	bool "Enable Cache Support"
+	default y
+
+if ARC_CACHE
+
+config ARC_CACHE_LINE_SHIFT
+	int "Cache Line Length (as power of 2)"
+	range 5 7
+	default "6"
+	help
+	  Starting with ARC700 4.9, Cache line length is configurable,
+	  This option specifies "N", with Line-len = 2 power N
+	  So line lengths of 32, 64, 128 are specified by 5,6,7, respectively
+	  Linux only supports same line lengths for I and D caches.
+
+config ARC_HAS_ICACHE
+	bool "Use Instruction Cache"
+	default y
+
+config ARC_HAS_DCACHE
+	bool "Use Data Cache"
+	default y
+
+config ARC_CACHE_PAGES
+	bool "Per Page Cache Control"
+	default y
+	depends on ARC_HAS_ICACHE || ARC_HAS_DCACHE
+	help
+	  This can be used to over-ride the global I/D Cache Enable on a
+	  per-page basis (but only for pages accessed via MMU such as
+	  Kernel Virtual address or User Virtual Address)
+	  TLB entries have a per-page Cache Enable Bit.
+	  Note that Global I/D ENABLE + Per Page DISABLE works but corollary
+	  Global DISABLE + Per Page ENABLE won't work
+
+config ARC_CACHE_VIPT_ALIASING
+	bool "Support VIPT Aliasing D$"
+	depends on ARC_HAS_DCACHE && ISA_ARCOMPACT
+
+endif #ARC_CACHE
+
+config ARC_HAS_ICCM
+	bool "Use ICCM"
+	help
+	  Single Cycle RAMS to store Fast Path Code
+
+config ARC_ICCM_SZ
+	int "ICCM Size in KB"
+	default "64"
+	depends on ARC_HAS_ICCM
+
+config ARC_HAS_DCCM
+	bool "Use DCCM"
+	help
+	  Single Cycle RAMS to store Fast Path Data
+
+config ARC_DCCM_SZ
+	int "DCCM Size in KB"
+	default "64"
+	depends on ARC_HAS_DCCM
+
+config ARC_DCCM_BASE
+	hex "DCCM map address"
+	default "0xA0000000"
+	depends on ARC_HAS_DCCM
+
+choice
+	prompt "MMU Version"
+	default ARC_MMU_V3 if ISA_ARCOMPACT
+	default ARC_MMU_V4 if ISA_ARCV2
+
+config ARC_MMU_V3
+	bool "MMU v3"
+	depends on ISA_ARCOMPACT
+	help
+	  Introduced with ARC700 4.10: New Features
+	  Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
+	  Shared Address Spaces (SASID)
+
+config ARC_MMU_V4
+	bool "MMU v4"
+	depends on ISA_ARCV2
+
+endchoice
+
+
+choice
+	prompt "MMU Page Size"
+	default ARC_PAGE_SIZE_8K
+
+config ARC_PAGE_SIZE_8K
+	bool "8KB"
+	help
+	  Choose between 8k vs 16k
+
+config ARC_PAGE_SIZE_16K
+	bool "16KB"
+
+config ARC_PAGE_SIZE_4K
+	bool "4KB"
+	depends on ARC_MMU_V3 || ARC_MMU_V4
+
+endchoice
+
+choice
+	prompt "MMU Super Page Size"
+	depends on ISA_ARCV2 && TRANSPARENT_HUGEPAGE
+	default ARC_HUGEPAGE_2M
+
+config ARC_HUGEPAGE_2M
+	bool "2MB"
+
+config ARC_HUGEPAGE_16M
+	bool "16MB"
+
+endchoice
+
+config PGTABLE_LEVELS
+	int "Number of Page table levels"
+	default 2
+
+config ARC_COMPACT_IRQ_LEVELS
+	depends on ISA_ARCOMPACT
+	bool "Setup Timer IRQ as high Priority"
+	# if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy
+	depends on !SMP
+
+config ARC_FPU_SAVE_RESTORE
+	bool "Enable FPU state persistence across context switch"
+	help
+	  ARCompact FPU has internal registers to assist with Double precision
+	  Floating Point operations. There are control and stauts registers
+	  for floating point exceptions and rounding modes. These are
+	  preserved across task context switch when enabled.
+
+config ARC_CANT_LLSC
+	def_bool n
+
+config ARC_HAS_LLSC
+	bool "Insn: LLOCK/SCOND (efficient atomic ops)"
+	default y
+	depends on !ARC_CANT_LLSC
+
+config ARC_HAS_SWAPE
+	bool "Insn: SWAPE (endian-swap)"
+	default y
+
+if ISA_ARCV2
+
+config ARC_USE_UNALIGNED_MEM_ACCESS
+	bool "Enable unaligned access in HW"
+	default y
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	help
+	  The ARC HS architecture supports unaligned memory access
+	  which is disabled by default. Enable unaligned access in
+	  hardware and use software to use it
+
+config ARC_HAS_LL64
+	bool "Insn: 64bit LDD/STD"
+	help
+	  Enable gcc to generate 64-bit load/store instructions
+	  ISA mandates even/odd registers to allow encoding of two
+	  dest operands with 2 possible source operands.
+	default y
+
+config ARC_HAS_DIV_REM
+	bool "Insn: div, divu, rem, remu"
+	default y
+
+config ARC_HAS_ACCL_REGS
+	bool "Reg Pair ACCL:ACCH (FPU and/or MPY > 6 and/or DSP)"
+	default y
+	help
+	  Depending on the configuration, CPU can contain accumulator reg-pair
+	  (also referred to as r58:r59). These can also be used by gcc as GPR so
+	  kernel needs to save/restore per process
+
+config ARC_DSP_HANDLED
+	def_bool n
+
+config ARC_DSP_SAVE_RESTORE_REGS
+	def_bool n
+
+choice
+	prompt "DSP support"
+	default ARC_DSP_NONE
+	help
+	  Depending on the configuration, CPU can contain DSP registers
+	  (ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL).
+	  Below are options describing how to handle these registers in
+	  interrupt entry / exit and in context switch.
+
+config ARC_DSP_NONE
+	bool "No DSP extension presence in HW"
+	help
+	  No DSP extension presence in HW
+
+config ARC_DSP_KERNEL
+	bool "DSP extension in HW, no support for userspace"
+	select ARC_HAS_ACCL_REGS
+	select ARC_DSP_HANDLED
+	help
+	  DSP extension presence in HW, no support for DSP-enabled userspace
+	  applications. We don't save / restore DSP registers and only do
+	  some minimal preparations so userspace won't be able to break kernel
+
+config ARC_DSP_USERSPACE
+	bool "Support DSP for userspace apps"
+	select ARC_HAS_ACCL_REGS
+	select ARC_DSP_HANDLED
+	select ARC_DSP_SAVE_RESTORE_REGS
+	help
+	  DSP extension presence in HW, support save / restore DSP registers to
+	  run DSP-enabled userspace applications
+
+config ARC_DSP_AGU_USERSPACE
+	bool "Support DSP with AGU for userspace apps"
+	select ARC_HAS_ACCL_REGS
+	select ARC_DSP_HANDLED
+	select ARC_DSP_SAVE_RESTORE_REGS
+	help
+	  DSP and AGU extensions presence in HW, support save / restore DSP
+	  and AGU registers to run DSP-enabled userspace applications
+endchoice
+
+config ARC_IRQ_NO_AUTOSAVE
+	bool "Disable hardware autosave regfile on interrupts"
+	default n
+	help
+	  On HS cores, taken interrupt auto saves the regfile on stack.
+	  This is programmable and can be optionally disabled in which case
+	  software INTERRUPT_PROLOGUE/EPILGUE do the needed work
+
+config ARC_LPB_DISABLE
+	bool "Disable loop buffer (LPB)"
+	help
+	  On HS cores, loop buffer (LPB) is programmable in runtime and can
+	  be optionally disabled.
+
+endif # ISA_ARCV2
+
+endmenu   # "ARC CPU Configuration"
+
+config LINUX_LINK_BASE
+	hex "Kernel link address"
+	default "0x80000000"
+	help
+	  ARC700 divides the 32 bit phy address space into two equal halves
+	  -Lower 2G (0 - 0x7FFF_FFFF ) is user virtual, translated by MMU
+	  -Upper 2G (0x8000_0000 onwards) is untranslated, for kernel
+	  Typically Linux kernel is linked at the start of untransalted addr,
+	  hence the default value of 0x8zs.
+	  However some customers have peripherals mapped at this addr, so
+	  Linux needs to be scooted a bit.
+	  If you don't know what the above means, leave this setting alone.
+	  This needs to match memory start address specified in Device Tree
+
+config LINUX_RAM_BASE
+	hex "RAM base address"
+	default LINUX_LINK_BASE
+	help
+	  By default Linux is linked at base of RAM. However in some special
+	  cases (such as HSDK), Linux can't be linked at start of DDR, hence
+	  this option.
+
+config HIGHMEM
+	bool "High Memory Support"
+	select HAVE_ARCH_PFN_VALID
+	select KMAP_LOCAL
+	help
+	  With ARC 2G:2G address split, only upper 2G is directly addressable by
+	  kernel. Enable this to potentially allow access to rest of 2G and PAE
+	  in future
+
+config ARC_HAS_PAE40
+	bool "Support for the 40-bit Physical Address Extension"
+	depends on ISA_ARCV2
+	select HIGHMEM
+	select PHYS_ADDR_T_64BIT
+	help
+	  Enable access to physical memory beyond 4G, only supported on
+	  ARC cores with 40 bit Physical Addressing support
+
+config ARC_KVADDR_SIZE
+	int "Kernel Virtual Address Space size (MB)"
+	range 0 512
+	default "256"
+	help
+	  The kernel address space is carved out of 256MB of translated address
+	  space for catering to vmalloc, modules, pkmap, fixmap. This however may
+	  not suffice vmalloc requirements of a 4K CPU EZChip system. So allow
+	  this to be stretched to 512 MB (by extending into the reserved
+	  kernel-user gutter)
+
+config ARC_CURR_IN_REG
+	bool "Dedicate Register r25 for current_task pointer"
+	default y
+	help
+	  This reserved Register R25 to point to Current Task in
+	  kernel mode. This saves memory access for each such access
+
+
+config ARC_EMUL_UNALIGNED
+	bool "Emulate unaligned memory access (userspace only)"
+	select SYSCTL_ARCH_UNALIGN_NO_WARN
+	select SYSCTL_ARCH_UNALIGN_ALLOW
+	depends on ISA_ARCOMPACT
+	help
+	  This enables misaligned 16 & 32 bit memory access from user space.
+	  Use ONLY-IF-ABS-NECESSARY as it will be very slow and also can hide
+	  potential bugs in code
+
+config HZ
+	int "Timer Frequency"
+	default 100
+
+config ARC_METAWARE_HLINK
+	bool "Support for Metaware debugger assisted Host access"
+	help
+	  This options allows a Linux userland apps to directly access
+	  host file system (open/creat/read/write etc) with help from
+	  Metaware Debugger. This can come in handy for Linux-host communication
+	  when there is no real usable peripheral such as EMAC.
+
+menuconfig ARC_DBG
+	bool "ARC debugging"
+	default y
+
+if ARC_DBG
+
+config ARC_DW2_UNWIND
+	bool "Enable DWARF specific kernel stack unwind"
+	default y
+	select KALLSYMS
+	help
+	  Compiles the kernel with DWARF unwind information and can be used
+	  to get stack backtraces.
+
+	  If you say Y here the resulting kernel image will be slightly larger
+	  but not slower, and it will give very useful debugging information.
+	  If you don't debug the kernel, you can say N, but we may not be able
+	  to solve problems without frame unwind information
+
+config ARC_DBG_JUMP_LABEL
+	bool "Paranoid checks in Static Keys (jump labels) code"
+	depends on JUMP_LABEL
+	default y if STATIC_KEYS_SELFTEST
+	help
+	  Enable paranoid checks and self-test of both ARC-specific and generic
+	  part of static keys (jump labels) related code.
+endif
+
+config ARC_BUILTIN_DTB_NAME
+	string "Built in DTB"
+	help
+	  Set the name of the DTB to embed in the vmlinux binary
+	  Leaving it blank selects the minimal "skeleton" dtb
+
+endmenu	 # "ARC Architecture Configuration"
+
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	default "12" if ARC_HUGEPAGE_16M
+	default "11"
+
+source "kernel/power/Kconfig"
diff --git a/arch/arc/Kconfig.debug b/arch/arc/Kconfig.debug
new file mode 100644
index 0000000000..45add86dec
--- /dev/null
+++ b/arch/arc/Kconfig.debug
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config 16KSTACKS
+	bool "Use 16Kb for kernel stacks instead of 8Kb"
+	help
+	  If you say Y here the kernel will use a  16Kb stacksize for the
+	  kernel stack attached to each process/thread. The default is 8K.
+	  This increases the resident kernel footprint and will cause less
+	  threads to run on the system and also increase the pressure
+	  on the VM subsystem for higher order allocations.
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
new file mode 100644
index 0000000000..efc54f3e35
--- /dev/null
+++ b/arch/arc/Makefile
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+
+KBUILD_DEFCONFIG := haps_hs_smp_defconfig
+
+ifeq ($(CROSS_COMPILE),)
+CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-)
+endif
+
+cflags-y	+= -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
+
+tune-mcpu-def-$(CONFIG_ISA_ARCOMPACT)	:= -mcpu=arc700
+tune-mcpu-def-$(CONFIG_ISA_ARCV2)	:= -mcpu=hs38
+
+ifeq ($(CONFIG_ARC_TUNE_MCPU),)
+cflags-y				+= $(tune-mcpu-def-y)
+else
+tune-mcpu				:= $(CONFIG_ARC_TUNE_MCPU)
+ifneq ($(call cc-option,$(tune-mcpu)),)
+cflags-y				+= $(tune-mcpu)
+else
+# The flag provided by 'CONFIG_ARC_TUNE_MCPU' option isn't known by this compiler
+# (probably the compiler is too old). Use ISA default mcpu flag instead as a safe option.
+$(warning ** WARNING ** CONFIG_ARC_TUNE_MCPU flag '$(tune-mcpu)' is unknown, fallback to '$(tune-mcpu-def-y)')
+cflags-y				+= $(tune-mcpu-def-y)
+endif
+endif
+
+
+ifdef CONFIG_ARC_CURR_IN_REG
+# For a global register definition, make sure it gets passed to every file
+# We had a customer reported bug where some code built in kernel was NOT using
+# any kernel headers, and missing the r25 global register
+# Can't do unconditionally because of recursive include issues
+# due to <linux/thread_info.h>
+LINUXINCLUDE	+=  -include $(srctree)/arch/arc/include/asm/current.h
+endif
+
+cflags-y				+= -fsection-anchors
+
+cflags-$(CONFIG_ARC_HAS_LLSC)		+= -mlock
+cflags-$(CONFIG_ARC_HAS_SWAPE)		+= -mswape
+
+ifdef CONFIG_ISA_ARCV2
+
+ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS
+cflags-y				+= -munaligned-access
+else
+cflags-y				+= -mno-unaligned-access
+endif
+
+ifndef CONFIG_ARC_HAS_LL64
+cflags-y				+= -mno-ll64
+endif
+
+ifndef CONFIG_ARC_HAS_DIV_REM
+cflags-y				+= -mno-div-rem
+endif
+
+endif
+
+cfi := $(call as-instr,.cfi_startproc\n.cfi_endproc,-DARC_DW2_UNWIND_AS_CFI)
+cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables $(cfi)
+
+# small data is default for elf32 tool-chain. If not usable, disable it
+# This also allows repurposing GP as scratch reg to gcc reg allocator
+disable_small_data := y
+cflags-$(disable_small_data)		+= -mno-sdata -fcall-used-gp
+
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= -mbig-endian
+ldflags-$(CONFIG_CPU_BIG_ENDIAN)	+= -EB
+
+LIBGCC	= $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
+
+# Modules with short calls might break for calls into builtin-kernel
+KBUILD_CFLAGS_MODULE	+= -mlong-calls -mno-millicode
+
+# Finally dump eveything into kernel build system
+KBUILD_CFLAGS	+= $(cflags-y)
+KBUILD_AFLAGS	+= $(KBUILD_CFLAGS)
+KBUILD_LDFLAGS	+= $(ldflags-y)
+
+head-y		:= arch/arc/kernel/head.o
+
+# w/o this dtb won't embed into kernel binary
+core-y		+= arch/arc/boot/dts/
+
+core-y				+= arch/arc/plat-sim/
+core-$(CONFIG_ARC_PLAT_TB10X)	+= arch/arc/plat-tb10x/
+core-$(CONFIG_ARC_PLAT_AXS10X)	+= arch/arc/plat-axs10x/
+core-$(CONFIG_ARC_SOC_HSDK)	+= arch/arc/plat-hsdk/
+
+libs-y		+= arch/arc/lib/ $(LIBGCC)
+
+boot		:= arch/arc/boot
+
+boot_targets := uImage.bin uImage.gz uImage.lzma
+
+PHONY += $(boot_targets)
+$(boot_targets): vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+uimage-default-y			:= uImage.bin
+uimage-default-$(CONFIG_KERNEL_GZIP)	:= uImage.gz
+uimage-default-$(CONFIG_KERNEL_LZMA)	:= uImage.lzma
+
+PHONY += uImage
+uImage: $(uimage-default-y)
+	@ln -sf $< $(boot)/uImage
+	@$(kecho) '  Image $(boot)/uImage is ready'
+
+CLEAN_FILES += $(boot)/uImage
diff --git a/arch/arc/boot/.gitignore b/arch/arc/boot/.gitignore
new file mode 100644
index 0000000000..675db14940
--- /dev/null
+++ b/arch/arc/boot/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+uImage
diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile
new file mode 100644
index 0000000000..5648748c28
--- /dev/null
+++ b/arch/arc/boot/Makefile
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# uImage build relies on mkimage being availble on your host for ARC target
+# You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage
+# and make sure it's reacable from your PATH
+
+OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S
+
+LINUX_START_TEXT = $$($(READELF) -h vmlinux | \
+			grep "Entry point address" | grep -o 0x.*)
+
+UIMAGE_LOADADDR    = $(CONFIG_LINUX_LINK_BASE)
+UIMAGE_ENTRYADDR   = $(LINUX_START_TEXT)
+
+targets += vmlinux.bin
+targets += vmlinux.bin.gz
+targets += vmlinux.bin.lzma
+targets += uImage.bin
+targets += uImage.gz
+targets += uImage.lzma
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzma)
+
+$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,uimage,none)
+
+$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,uimage,gzip)
+
+$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE
+	$(call if_changed,uimage,lzma)
diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile
new file mode 100644
index 0000000000..4237aa5de3
--- /dev/null
+++ b/arch/arc/boot/dts/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+# Built-in dtb
+builtindtb-y		:= nsim_700
+
+ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),)
+	builtindtb-y	:= $(CONFIG_ARC_BUILTIN_DTB_NAME)
+endif
+
+obj-y   += $(builtindtb-y).dtb.o
+dtb-y := $(builtindtb-y).dtb
+
+# for CONFIG_OF_ALL_DTBS test
+dtstree	:= $(srctree)/$(src)
+dtb-	:= $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
+
+# board-specific dtc flags
+DTC_FLAGS_hsdk += --pad 20
diff --git a/arch/arc/boot/dts/abilis_tb100.dtsi b/arch/arc/boot/dts/abilis_tb100.dtsi
new file mode 100644
index 0000000000..41026a3bfa
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb100.dtsi
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Abilis Systems TB100 SOC device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ */
+
+/include/ "abilis_tb10x.dtsi"
+
+
+/ {
+	soc100 {
+		bus-frequency	= <166666666>;
+
+		pll0: oscillator {
+			clock-frequency  = <1000000000>;
+		};
+		cpu_clk: clkdiv_cpu {
+			clock-mult = <1>;
+			clock-div = <2>;
+		};
+		ahb_clk: clkdiv_ahb {
+			clock-mult = <1>;
+			clock-div = <6>;
+		};
+
+		iomux: iomux@ff10601c {
+			/* Port 1 */
+			pctl_tsin_s0: pctl-tsin-s0 {   /* Serial TS-in 0 */
+				abilis,function = "mis0";
+			};
+			pctl_tsin_s1: pctl-tsin-s1 {   /* Serial TS-in 1 */
+				abilis,function = "mis1";
+			};
+			pctl_gpio_a: pctl-gpio-a {     /* GPIO bank A */
+				abilis,function = "gpioa";
+			};
+			pctl_tsin_p1: pctl-tsin-p1 {   /* Parallel TS-in 1 */
+				abilis,function = "mip1";
+			};
+			/* Port 2 */
+			pctl_tsin_s2: pctl-tsin-s2 {   /* Serial TS-in 2 */
+				abilis,function = "mis2";
+			};
+			pctl_tsin_s3: pctl-tsin-s3 {   /* Serial TS-in 3 */
+				abilis,function = "mis3";
+			};
+			pctl_gpio_c: pctl-gpio-c {     /* GPIO bank C */
+				abilis,function = "gpioc";
+			};
+			pctl_tsin_p3: pctl-tsin-p3 {   /* Parallel TS-in 3 */
+				abilis,function = "mip3";
+			};
+			/* Port 3 */
+			pctl_tsin_s4: pctl-tsin-s4 {   /* Serial TS-in 4 */
+				abilis,function = "mis4";
+			};
+			pctl_tsin_s5: pctl-tsin-s5 {   /* Serial TS-in 5 */
+				abilis,function = "mis5";
+			};
+			pctl_gpio_e: pctl-gpio-e {     /* GPIO bank E */
+				abilis,function = "gpioe";
+			};
+			pctl_tsin_p5: pctl-tsin-p5 {   /* Parallel TS-in 5 */
+				abilis,function = "mip5";
+			};
+			/* Port 4 */
+			pctl_tsin_s6: pctl-tsin-s6 {   /* Serial TS-in 6 */
+				abilis,function = "mis6";
+			};
+			pctl_tsin_s7: pctl-tsin-s7 {   /* Serial TS-in 7 */
+				abilis,function = "mis7";
+			};
+			pctl_gpio_g: pctl-gpio-g {     /* GPIO bank G */
+				abilis,function = "gpiog";
+			};
+			pctl_tsin_p7: pctl-tsin-p7 {   /* Parallel TS-in 7 */
+				abilis,function = "mip7";
+			};
+			/* Port 5 */
+			pctl_gpio_j: pctl-gpio-j {     /* GPIO bank J */
+				abilis,function = "gpioj";
+			};
+			pctl_gpio_k: pctl-gpio-k {     /* GPIO bank K */
+				abilis,function = "gpiok";
+			};
+			pctl_ciplus: pctl-ciplus {     /* CI+ interface */
+				abilis,function = "ciplus";
+			};
+			pctl_mcard: pctl-mcard {       /* M-Card interface */
+				abilis,function = "mcard";
+			};
+			/* Port 6 */
+			pctl_tsout_p: pctl-tsout-p {   /* Parallel TS-out */
+				abilis,function = "mop";
+			};
+			pctl_tsout_s0: pctl-tsout-s0 { /* Serial TS-out 0 */
+				abilis,function = "mos0";
+			};
+			pctl_tsout_s1: pctl-tsout-s1 { /* Serial TS-out 1 */
+				abilis,function = "mos1";
+			};
+			pctl_tsout_s2: pctl-tsout-s2 { /* Serial TS-out 2 */
+				abilis,function = "mos2";
+			};
+			pctl_tsout_s3: pctl-tsout-s3 { /* Serial TS-out 3 */
+				abilis,function = "mos3";
+			};
+			/* Port 7 */
+			pctl_uart0: pctl-uart0 {       /* UART 0 */
+				abilis,function = "uart0";
+			};
+			pctl_uart1: pctl-uart1 {       /* UART 1 */
+				abilis,function = "uart1";
+			};
+			pctl_gpio_l: pctl-gpio-l {     /* GPIO bank L */
+				abilis,function = "gpiol";
+			};
+			pctl_gpio_m: pctl-gpio-m {     /* GPIO bank M */
+				abilis,function = "gpiom";
+			};
+			/* Port 8 */
+			pctl_spi3: pctl-spi3 {
+				abilis,function = "spi3";
+			};
+			/* Port 9 */
+			pctl_spi1: pctl-spi1 {
+				abilis,function = "spi1";
+			};
+			pctl_gpio_n: pctl-gpio-n {
+				abilis,function = "gpion";
+			};
+			/* Unmuxed GPIOs */
+			pctl_gpio_b: pctl-gpio-b {
+				abilis,function = "gpiob";
+			};
+			pctl_gpio_d: pctl-gpio-d {
+				abilis,function = "gpiod";
+			};
+			pctl_gpio_f: pctl-gpio-f {
+				abilis,function = "gpiof";
+			};
+			pctl_gpio_h: pctl-gpio-h {
+				abilis,function = "gpioh";
+			};
+			pctl_gpio_i: pctl-gpio-i {
+				abilis,function = "gpioi";
+			};
+		};
+
+		gpioa: gpio@ff140000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff140000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioa";
+		};
+		gpiob: gpio@ff141000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff141000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiob";
+		};
+		gpioc: gpio@ff142000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff142000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioc";
+		};
+		gpiod: gpio@ff143000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff143000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiod";
+		};
+		gpioe: gpio@ff144000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff144000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioe";
+		};
+		gpiof: gpio@ff145000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff145000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiof";
+		};
+		gpiog: gpio@ff146000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff146000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiog";
+		};
+		gpioh: gpio@ff147000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff147000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioh";
+		};
+		gpioi: gpio@ff148000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff148000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <12>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioi";
+		};
+		gpioj: gpio@ff149000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff149000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <32>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioj";
+		};
+		gpiok: gpio@ff14a000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff14a000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <22>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiok";
+		};
+		gpiol: gpio@ff14b000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff14b000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <4>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiol";
+		};
+		gpiom: gpio@ff14c000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff14c000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <4>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiom";
+		};
+		gpion: gpio@ff14d000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff14d000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <5>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpion";
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts
new file mode 100644
index 0000000000..6d346de5e3
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Abilis Systems TB100 Development Kit PCB device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ */
+
+/dts-v1/;
+
+/include/ "abilis_tb100.dtsi"
+
+/ {
+	model = "abilis,tb100";
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8";
+	};
+
+	aliases { };
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x08000000>;	/* 128M */
+	};
+
+	soc100 {
+		uart@ff100000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&pctl_uart0>;
+		};
+		ethernet@fe100000 {
+			phy-mode = "rgmii";
+		};
+
+		i2c0: i2c@ff120000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+		i2c1: i2c@ff121000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+		i2c2: i2c@ff122000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+		i2c3: i2c@ff123000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+		i2c4: i2c@ff124000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+
+		leds {
+			compatible = "gpio-leds";
+			power {
+				label = "Power";
+				gpios = <&gpioi 0 0>;
+				linux,default-trigger = "default-on";
+			};
+			heartbeat {
+				label = "Heartbeat";
+				gpios = <&gpioi 1 0>;
+				linux,default-trigger = "heartbeat";
+			};
+			led2 {
+				label = "LED2";
+				gpios = <&gpioi 2 0>;
+				default-state = "off";
+			};
+			led3 {
+				label = "LED3";
+				gpios = <&gpioi 3 0>;
+				default-state = "off";
+			};
+			led4 {
+				label = "LED4";
+				gpios = <&gpioi 4 0>;
+				default-state = "off";
+			};
+			led5 {
+				label = "LED5";
+				gpios = <&gpioi 5 0>;
+				default-state = "off";
+			};
+			led6 {
+				label = "LED6";
+				gpios = <&gpioi 6 0>;
+				default-state = "off";
+			};
+			led7 {
+				label = "LED7";
+				gpios = <&gpioi 7 0>;
+				default-state = "off";
+			};
+			led8 {
+				label = "LED8";
+				gpios = <&gpioi 8 0>;
+				default-state = "off";
+			};
+			led9 {
+				label = "LED9";
+				gpios = <&gpioi 9 0>;
+				default-state = "off";
+			};
+			led10 {
+				label = "LED10";
+				gpios = <&gpioi 10 0>;
+				default-state = "off";
+			};
+			led11 {
+				label = "LED11";
+				gpios = <&gpioi 11 0>;
+				default-state = "off";
+			};
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/abilis_tb101.dtsi b/arch/arc/boot/dts/abilis_tb101.dtsi
new file mode 100644
index 0000000000..041ab1ba02
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb101.dtsi
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Abilis Systems TB101 SOC device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ */
+
+/include/ "abilis_tb10x.dtsi"
+
+
+/ {
+	soc100 {
+		bus-frequency	= <166666666>;
+
+		pll0: oscillator {
+			clock-frequency  = <1000000000>;
+		};
+		cpu_clk: clkdiv_cpu {
+			clock-mult = <1>;
+			clock-div = <2>;
+		};
+		ahb_clk: clkdiv_ahb {
+			clock-mult = <1>;
+			clock-div = <6>;
+		};
+
+		iomux: iomux@ff10601c {
+			/* Port 1 */
+			pctl_tsin_s0: pctl-tsin-s0 {   /* Serial TS-in 0 */
+				abilis,function = "mis0";
+			};
+			pctl_tsin_s1: pctl-tsin-s1 {   /* Serial TS-in 1 */
+				abilis,function = "mis1";
+			};
+			pctl_gpio_a: pctl-gpio-a {     /* GPIO bank A */
+				abilis,function = "gpioa";
+			};
+			pctl_tsin_p1: pctl-tsin-p1 {   /* Parallel TS-in 1 */
+				abilis,function = "mip1";
+			};
+			/* Port 2 */
+			pctl_tsin_s2: pctl-tsin-s2 {   /* Serial TS-in 2 */
+				abilis,function = "mis2";
+			};
+			pctl_tsin_s3: pctl-tsin-s3 {   /* Serial TS-in 3 */
+				abilis,function = "mis3";
+			};
+			pctl_gpio_c: pctl-gpio-c {     /* GPIO bank C */
+				abilis,function = "gpioc";
+			};
+			pctl_tsin_p3: pctl-tsin-p3 {   /* Parallel TS-in 3 */
+				abilis,function = "mip3";
+			};
+			/* Port 3 */
+			pctl_tsin_s4: pctl-tsin-s4 {   /* Serial TS-in 4 */
+				abilis,function = "mis4";
+			};
+			pctl_tsin_s5: pctl-tsin-s5 {   /* Serial TS-in 5 */
+				abilis,function = "mis5";
+			};
+			pctl_gpio_e: pctl-gpio-e {     /* GPIO bank E */
+				abilis,function = "gpioe";
+			};
+			pctl_tsin_p5: pctl-tsin-p5 {   /* Parallel TS-in 5 */
+				abilis,function = "mip5";
+			};
+			/* Port 4 */
+			pctl_tsin_s6: pctl-tsin-s6 {   /* Serial TS-in 6 */
+				abilis,function = "mis6";
+			};
+			pctl_tsin_s7: pctl-tsin-s7 {   /* Serial TS-in 7 */
+				abilis,function = "mis7";
+			};
+			pctl_gpio_g: pctl-gpio-g {     /* GPIO bank G */
+				abilis,function = "gpiog";
+			};
+			pctl_tsin_p7: pctl-tsin-p7 {   /* Parallel TS-in 7 */
+				abilis,function = "mip7";
+			};
+			/* Port 5 */
+			pctl_gpio_j: pctl-gpio-j {     /* GPIO bank J */
+				abilis,function = "gpioj";
+			};
+			pctl_gpio_k: pctl-gpio-k {     /* GPIO bank K */
+				abilis,function = "gpiok";
+			};
+			pctl_ciplus: pctl-ciplus {     /* CI+ interface */
+				abilis,function = "ciplus";
+			};
+			pctl_mcard: pctl-mcard {       /* M-Card interface */
+				abilis,function = "mcard";
+			};
+			pctl_stc0: pctl-stc0 {         /* Smart card I/F 0 */
+				abilis,function = "stc0";
+			};
+			pctl_stc1: pctl-stc1 {         /* Smart card I/F 1 */
+				abilis,function = "stc1";
+			};
+			/* Port 6 */
+			pctl_tsout_p: pctl-tsout-p {   /* Parallel TS-out */
+				abilis,function = "mop";
+			};
+			pctl_tsout_s0: pctl-tsout-s0 { /* Serial TS-out 0 */
+				abilis,function = "mos0";
+			};
+			pctl_tsout_s1: pctl-tsout-s1 { /* Serial TS-out 1 */
+				abilis,function = "mos1";
+			};
+			pctl_tsout_s2: pctl-tsout-s2 { /* Serial TS-out 2 */
+				abilis,function = "mos2";
+			};
+			pctl_tsout_s3: pctl-tsout-s3 { /* Serial TS-out 3 */
+				abilis,function = "mos3";
+			};
+			/* Port 7 */
+			pctl_uart0: pctl-uart0 {       /* UART 0 */
+				abilis,function = "uart0";
+			};
+			pctl_uart1: pctl-uart1 {       /* UART 1 */
+				abilis,function = "uart1";
+			};
+			pctl_gpio_l: pctl-gpio-l {     /* GPIO bank L */
+				abilis,function = "gpiol";
+			};
+			pctl_gpio_m: pctl-gpio-m {     /* GPIO bank M */
+				abilis,function = "gpiom";
+			};
+			/* Port 8 */
+			pctl_spi3: pctl-spi3 {
+				abilis,function = "spi3";
+			};
+			pctl_jtag: pctl-jtag {
+				abilis,function = "jtag";
+			};
+			/* Port 9 */
+			pctl_spi1: pctl-spi1 {
+				abilis,function = "spi1";
+			};
+			pctl_gpio_n: pctl-gpio-n {
+				abilis,function = "gpion";
+			};
+			/* Unmuxed GPIOs */
+			pctl_gpio_b: pctl-gpio-b {
+				abilis,function = "gpiob";
+			};
+			pctl_gpio_d: pctl-gpio-d {
+				abilis,function = "gpiod";
+			};
+			pctl_gpio_f: pctl-gpio-f {
+				abilis,function = "gpiof";
+			};
+			pctl_gpio_h: pctl-gpio-h {
+				abilis,function = "gpioh";
+			};
+			pctl_gpio_i: pctl-gpio-i {
+				abilis,function = "gpioi";
+			};
+		};
+
+		gpioa: gpio@ff140000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff140000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioa";
+		};
+		gpiob: gpio@ff141000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff141000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiob";
+		};
+		gpioc: gpio@ff142000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff142000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioc";
+		};
+		gpiod: gpio@ff143000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff143000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiod";
+		};
+		gpioe: gpio@ff144000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff144000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioe";
+		};
+		gpiof: gpio@ff145000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff145000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiof";
+		};
+		gpiog: gpio@ff146000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff146000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <3>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiog";
+		};
+		gpioh: gpio@ff147000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff147000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <2>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioh";
+		};
+		gpioi: gpio@ff148000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff148000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <12>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioi";
+		};
+		gpioj: gpio@ff149000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff149000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <32>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpioj";
+		};
+		gpiok: gpio@ff14a000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff14a000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <22>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiok";
+		};
+		gpiol: gpio@ff14b000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff14b000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <4>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiol";
+		};
+		gpiom: gpio@ff14c000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff14c000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <4>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpiom";
+		};
+		gpion: gpio@ff14d000 {
+			compatible = "abilis,tb10x-gpio";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <27 2>;
+			reg = <0xff14d000 0x1000>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			abilis,ngpio = <5>;
+			gpio-ranges = <&iomux 0 0 0>;
+			gpio-ranges-group-names = "gpion";
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts
new file mode 100644
index 0000000000..d11b790f8b
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Abilis Systems TB101 Development Kit PCB device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ */
+
+/dts-v1/;
+
+/include/ "abilis_tb101.dtsi"
+
+/ {
+	model = "abilis,tb101";
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8";
+	};
+
+	aliases { };
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x08000000>;	/* 128M */
+	};
+
+	soc100 {
+		uart@ff100000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&pctl_uart0>;
+		};
+		ethernet@fe100000 {
+			phy-mode = "rgmii";
+		};
+
+		i2c0: i2c@ff120000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+		i2c1: i2c@ff121000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+		i2c2: i2c@ff122000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+		i2c3: i2c@ff123000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+		i2c4: i2c@ff124000 {
+			i2c-sda-hold-time-ns = <432>;
+		};
+
+		leds {
+			compatible = "gpio-leds";
+			power {
+				label = "Power";
+				gpios = <&gpioi 0 0>;
+				linux,default-trigger = "default-on";
+			};
+			heartbeat {
+				label = "Heartbeat";
+				gpios = <&gpioi 1 0>;
+				linux,default-trigger = "heartbeat";
+			};
+			led2 {
+				label = "LED2";
+				gpios = <&gpioi 2 0>;
+				default-state = "off";
+			};
+			led3 {
+				label = "LED3";
+				gpios = <&gpioi 3 0>;
+				default-state = "off";
+			};
+			led4 {
+				label = "LED4";
+				gpios = <&gpioi 4 0>;
+				default-state = "off";
+			};
+			led5 {
+				label = "LED5";
+				gpios = <&gpioi 5 0>;
+				default-state = "off";
+			};
+			led6 {
+				label = "LED6";
+				gpios = <&gpioi 6 0>;
+				default-state = "off";
+			};
+			led7 {
+				label = "LED7";
+				gpios = <&gpioi 7 0>;
+				default-state = "off";
+			};
+			led8 {
+				label = "LED8";
+				gpios = <&gpioi 8 0>;
+				default-state = "off";
+			};
+			led9 {
+				label = "LED9";
+				gpios = <&gpioi 9 0>;
+				default-state = "off";
+			};
+			led10 {
+				label = "LED10";
+				gpios = <&gpioi 10 0>;
+				default-state = "off";
+			};
+			led11 {
+				label = "LED11";
+				gpios = <&gpioi 11 0>;
+				default-state = "off";
+			};
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/abilis_tb10x.dtsi b/arch/arc/boot/dts/abilis_tb10x.dtsi
new file mode 100644
index 0000000000..aa62619f21
--- /dev/null
+++ b/arch/arc/boot/dts/abilis_tb10x.dtsi
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Abilis Systems TB10X SOC device tree
+ *
+ * Copyright (C) Abilis Systems 2013
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ */
+
+
+/ {
+	compatible		= "abilis,arc-tb10x";
+	#address-cells		= <1>;
+	#size-cells		= <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,arc770d";
+			reg = <0>;
+		};
+	};
+
+	/* TIMER0 with interrupt for clockevent */
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <3>;
+		interrupt-parent = <&intc>;
+		clocks = <&cpu_clk>;
+	};
+
+	/* TIMER1 for free running clocksource */
+	timer1 {
+		compatible = "snps,arc-timer";
+		clocks = <&cpu_clk>;
+	};
+
+	soc100 {
+		#address-cells	= <1>;
+		#size-cells	= <1>;
+		device_type	= "soc";
+		ranges		= <0xfe000000 0xfe000000 0x02000000
+				0x000f0000 0x000f0000 0x00010000>;
+		compatible	= "abilis,tb10x", "simple-bus";
+
+		pll0: oscillator {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-output-names = "pll0";
+		};
+		cpu_clk: clkdiv_cpu {
+			compatible = "fixed-factor-clock";
+			#clock-cells = <0>;
+			clocks = <&pll0>;
+			clock-output-names = "cpu_clk";
+		};
+		ahb_clk: clkdiv_ahb {
+			compatible = "fixed-factor-clock";
+			#clock-cells = <0>;
+			clocks = <&pll0>;
+			clock-output-names = "ahb_clk";
+		};
+
+		iomux: iomux@ff10601c {
+			compatible = "abilis,tb10x-iomux";
+			#gpio-range-cells = <3>;
+			reg = <0xff10601c 0x4>;
+		};
+
+		intc: interrupt-controller {
+			compatible = "snps,arc700-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+		tb10x_ictl: pic@fe002000 {
+			compatible = "abilis,tb10x-ictl";
+			reg = <0xfe002000 0x20>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupt-parent = <&intc>;
+			interrupts = <5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
+					20 21 22 23 24 25 26 27 28 29 30 31>;
+		};
+
+		uart@ff100000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0xff100000 0x100>;
+			clock-frequency = <166666666>;
+			interrupts = <25 8>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			interrupt-parent = <&tb10x_ictl>;
+		};
+		ethernet@fe100000 {
+			compatible = "snps,dwmac-3.70a","snps,dwmac";
+			reg = <0xfe100000 0x1058>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <6 8>;
+			interrupt-names = "macirq";
+			clocks = <&ahb_clk>;
+			clock-names = "stmmaceth";
+		};
+		dma@fe000000 {
+			compatible = "snps,dma-spear1340";
+			reg = <0xfe000000 0x400>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <14 8>;
+			dma-channels = <6>;
+			dma-requests = <0>;
+			dma-masters = <1>;
+			#dma-cells = <3>;
+			chan_allocation_order = <0>;
+			chan_priority = <1>;
+			block_size = <0x7ff>;
+			data-width = <4>;
+			clocks = <&ahb_clk>;
+			clock-names = "hclk";
+			multi-block = <1 1 1 1 1 1>;
+		};
+
+		i2c0: i2c@ff120000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xff120000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 8>;
+			clocks = <&ahb_clk>;
+		};
+		i2c1: i2c@ff121000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xff121000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 8>;
+			clocks = <&ahb_clk>;
+		};
+		i2c2: i2c@ff122000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xff122000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 8>;
+			clocks = <&ahb_clk>;
+		};
+		i2c3: i2c@ff123000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xff123000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 8>;
+			clocks = <&ahb_clk>;
+		};
+		i2c4: i2c@ff124000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "snps,designware-i2c";
+			reg = <0xff124000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <12 8>;
+			clocks = <&ahb_clk>;
+		};
+
+		spi0: spi@fe010000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "abilis,tb100-spi";
+			num-cs = <1>;
+			reg = <0xfe010000 0x20>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <26 8>;
+			clocks = <&ahb_clk>;
+		};
+		spi1: spi@fe011000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "abilis,tb100-spi";
+			num-cs = <2>;
+			reg = <0xfe011000 0x20>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <10 8>;
+			clocks = <&ahb_clk>;
+		};
+
+		tb10x_tsm: tb10x-tsm@ff316000 {
+			compatible = "abilis,tb100-tsm";
+			reg = <0xff316000 0x400>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <17 8>;
+			output-clkdiv = <4>;
+			global-packet-delay = <0x21>;
+			port-packet-delay = <0>;
+		};
+		tb10x_stream_proc: tb10x-stream-proc {
+			compatible = "abilis,tb100-streamproc";
+			reg =   <0xfff00000 0x200>,
+				<0x000f0000 0x10000>,
+				<0xfff00200 0x105>,
+				<0xff10600c 0x1>,
+				<0xfe001018 0x1>;
+			reg-names =     "mbox",
+					"sp_iccm",
+					"mbox_irq",
+					"cpuctrl",
+					"a6it_int_force";
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <20 2>, <19 2>;
+			interrupt-names = "cmd_irq", "event_irq";
+		};
+		tb10x_mdsc0: tb10x-mdscr@ff300000 {
+			compatible = "abilis,tb100-mdscr";
+			reg = <0xff300000 0x7000>;
+			tb100-mdscr-manage-tsin;
+		};
+		tb10x_mscr0: tb10x-mdscr@ff307000 {
+			compatible = "abilis,tb100-mdscr";
+			reg = <0xff307000 0x7000>;
+		};
+		tb10x_scr0: tb10x-mdscr@ff30e000 {
+			compatible = "abilis,tb100-mdscr";
+			reg = <0xff30e000 0x4000>;
+			tb100-mdscr-manage-tsin;
+		};
+		tb10x_scr1: tb10x-mdscr@ff312000 {
+			compatible = "abilis,tb100-mdscr";
+			reg = <0xff312000 0x4000>;
+			tb100-mdscr-manage-tsin;
+		};
+		tb10x_wfb: tb10x-wfb@ff319000 {
+			compatible = "abilis,tb100-wfb";
+			reg = <0xff319000 0x1000>;
+			interrupt-parent = <&tb10x_ictl>;
+			interrupts = <16 8>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
new file mode 100644
index 0000000000..2a151607b0
--- /dev/null
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Device tree for AXC001 770D/EM6/AS221 CPU card
+ * Note that this file only supports the 770D CPU
+ */
+
+/include/ "skeleton.dtsi"
+
+/ {
+	compatible = "snps,arc";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpu_card {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <750000000>;
+		};
+
+		input_clk: input-clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <33333333>;
+		};
+
+		core_intc: arc700-intc@cpu {
+			compatible = "snps,arc700-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		/*
+		 * this GPIO block ORs all interrupts on CPU card (creg,..)
+		 * to uplink only 1 IRQ to ARC core intc
+		 */
+		dw-apb-gpio@2000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = < 0x2000 0x80 >;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			ictl_intc: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <30>;
+				reg = <0>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				interrupt-parent = <&core_intc>;
+				interrupts = <15>;
+			};
+		};
+
+		debug_uart: dw-apb-uart@5000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x5000 0x100>;
+			clock-frequency = <33333000>;
+			interrupt-parent = <&ictl_intc>;
+			interrupts = <19 4>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,arc700-pct";
+		};
+	};
+
+	/*
+	 * This INTC is actually connected to DW APB GPIO
+	 * which acts as a wire between MB INTC and CPU INTC.
+	 * GPIO INTC is configured in platform init code
+	 * and here we mimic direct connection from MB INTC to
+	 * CPU INTC, thus we set "interrupts = <7>" instead of
+	 * "interrupts = <12>"
+	 *
+	 * This intc actually resides on MB, but we move it here to
+	 * avoid duplicating the MB dtsi file given that IRQ from
+	 * this intc to cpu intc are different for axs101 and axs103
+	 */
+	mb_intc: interrupt-controller@e0012000 {
+		#interrupt-cells = <1>;
+		compatible = "snps,dw-apb-ictl";
+		reg = < 0x0 0xe0012000 0x0 0x200 >;
+		interrupt-controller;
+		interrupt-parent = <&core_intc>;
+		interrupts = < 7 >;
+	};
+
+	memory {
+		device_type = "memory";
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+		reg = <0x0 0x80000000 0x0 0x1b000000>;	/* (512 - 32) MiB */
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		/*
+		 * We just move frame buffer area to the very end of
+		 * available DDR. And even though in case of ARC770 there's
+		 * no strict requirement for a frame-buffer to be in any
+		 * particular location it allows us to use the same
+		 * base board's DT node for ARC PGU as for ARc HS38.
+		 */
+		frame_buffer: frame_buffer@9e000000 {
+			compatible = "shared-dma-pool";
+			reg = <0x0 0x9e000000 0x0 0x2000000>;
+			no-map;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
new file mode 100644
index 0000000000..cd1edcf4f9
--- /dev/null
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x UP configuration
+ */
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+	compatible = "snps,arc";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpu_card {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
+
+		input_clk: input-clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <33333333>;
+		};
+
+		core_clk: core-clk@80 {
+			compatible = "snps,axs10x-arc-pll-clock";
+			reg = <0x80 0x10>, <0x100 0x10>;
+			#clock-cells = <0>;
+			clocks = <&input_clk>;
+
+			/*
+			 * Set initial core pll output frequency to 90MHz.
+			 * It will be applied at the core pll driver probing
+			 * on early boot.
+			 */
+			assigned-clocks = <&core_clk>;
+			assigned-clock-rates = <90000000>;
+		};
+
+		core_intc: archs-intc@cpu {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		/*
+		 * this GPIO block ORs all interrupts on CPU card (creg,..)
+		 * to uplink only 1 IRQ to ARC core intc
+		 */
+		dw-apb-gpio@2000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = < 0x2000 0x80 >;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			ictl_intc: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <30>;
+				reg = <0>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				interrupt-parent = <&core_intc>;
+				interrupts = <25>;
+			};
+		};
+
+		debug_uart: dw-apb-uart@5000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x5000 0x100>;
+			clock-frequency = <33333000>;
+			interrupt-parent = <&ictl_intc>;
+			interrupts = <2 4>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,archs-pct";
+			#interrupt-cells = <1>;
+			interrupt-parent = <&core_intc>;
+			interrupts = <20>;
+		};
+	};
+
+	/*
+	 * Mark DMA peripherals connected via IOC port as dma-coherent. We do
+	 * it via overlay because peripherals defined in axs10x_mb.dtsi are
+	 * used for both AXS101 and AXS103 boards and only AXS103 has IOC (so
+	 * only AXS103 board has HW-coherent DMA peripherals)
+	 * We don't need to mark pgu@17000 as dma-coherent because it uses
+	 * external DMA buffer located outside of IOC aperture.
+	 */
+	axs10x_mb {
+		ethernet@18000 {
+			dma-coherent;
+		};
+
+		ehci@40000 {
+			dma-coherent;
+		};
+
+		ohci@60000 {
+			dma-coherent;
+		};
+
+		mmc@15000 {
+			dma-coherent;
+		};
+	};
+
+	/*
+	 * The DW APB ICTL intc on MB is connected to CPU intc via a
+	 * DT "invisible" DW APB GPIO block, configured to simply pass thru
+	 * interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c)
+	 *
+	 * So here we mimic a direct connection betwen them, ignoring the
+	 * ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core)
+	 * instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO)
+	 *
+	 * This intc actually resides on MB, but we move it here to
+	 * avoid duplicating the MB dtsi file given that IRQ from
+	 * this intc to cpu intc are different for axs101 and axs103
+	 */
+	mb_intc: interrupt-controller@e0012000 {
+		#interrupt-cells = <1>;
+		compatible = "snps,dw-apb-ictl";
+		reg = < 0x0 0xe0012000 0x0 0x200 >;
+		interrupt-controller;
+		interrupt-parent = <&core_intc>;
+		interrupts = < 24 >;
+	};
+
+	memory {
+		device_type = "memory";
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MiB low mem */
+		       0x1 0xc0000000 0x0 0x40000000>;	/* 1 GiB highmem */
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		/*
+		 * Move frame buffer out of IOC aperture (0x8z-0xaz).
+		 */
+		frame_buffer: frame_buffer@be000000 {
+			compatible = "shared-dma-pool";
+			reg = <0x0 0xbe000000 0x0 0x2000000>;
+			no-map;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
new file mode 100644
index 0000000000..70779386ca
--- /dev/null
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x2 (Dual Core) with IDU intc
+ */
+
+/include/ "skeleton_hs_idu.dtsi"
+
+/ {
+	compatible = "snps,arc";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpu_card {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
+
+		input_clk: input-clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <33333333>;
+		};
+
+		core_clk: core-clk@80 {
+			compatible = "snps,axs10x-arc-pll-clock";
+			reg = <0x80 0x10>, <0x100 0x10>;
+			#clock-cells = <0>;
+			clocks = <&input_clk>;
+
+			/*
+			 * Set initial core pll output frequency to 100MHz.
+			 * It will be applied at the core pll driver probing
+			 * on early boot.
+			 */
+			assigned-clocks = <&core_clk>;
+			assigned-clock-rates = <100000000>;
+		};
+
+		core_intc: archs-intc@cpu {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		idu_intc: idu-interrupt-controller {
+			compatible = "snps,archs-idu-intc";
+			interrupt-controller;
+			interrupt-parent = <&core_intc>;
+			#interrupt-cells = <1>;
+		};
+
+		/*
+		 * this GPIO block ORs all interrupts on CPU card (creg,..)
+		 * to uplink only 1 IRQ to ARC core intc
+		 */
+		dw-apb-gpio@2000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = < 0x2000 0x80 >;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			ictl_intc: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <30>;
+				reg = <0>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				interrupt-parent = <&idu_intc>;
+				interrupts = <1>;
+			};
+		};
+
+		debug_uart: dw-apb-uart@5000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x5000 0x100>;
+			clock-frequency = <33333000>;
+			interrupt-parent = <&ictl_intc>;
+			interrupts = <2 4>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,archs-pct";
+			#interrupt-cells = <1>;
+			interrupt-parent = <&core_intc>;
+			interrupts = <20>;
+		};
+	};
+
+	/*
+	 * Mark DMA peripherals connected via IOC port as dma-coherent. We do
+	 * it via overlay because peripherals defined in axs10x_mb.dtsi are
+	 * used for both AXS101 and AXS103 boards and only AXS103 has IOC (so
+	 * only AXS103 board has HW-coherent DMA peripherals)
+	 * We don't need to mark pgu@17000 as dma-coherent because it uses
+	 * external DMA buffer located outside of IOC aperture.
+	 */
+	axs10x_mb {
+		ethernet@18000 {
+			dma-coherent;
+		};
+
+		ehci@40000 {
+			dma-coherent;
+		};
+
+		ohci@60000 {
+			dma-coherent;
+		};
+
+		mmc@15000 {
+			dma-coherent;
+		};
+	};
+
+	/*
+	 * This INTC is actually connected to DW APB GPIO
+	 * which acts as a wire between MB INTC and CPU INTC.
+	 * GPIO INTC is configured in platform init code
+	 * and here we mimic direct connection from MB INTC to
+	 * CPU INTC, thus we set "interrupts = <0 1>" instead of
+	 * "interrupts = <12>"
+	 *
+	 * This intc actually resides on MB, but we move it here to
+	 * avoid duplicating the MB dtsi file given that IRQ from
+	 * this intc to cpu intc are different for axs101 and axs103
+	 */
+	mb_intc: interrupt-controller@e0012000 {
+		#interrupt-cells = <1>;
+		compatible = "snps,dw-apb-ictl";
+		reg = < 0x0 0xe0012000 0x0 0x200 >;
+		interrupt-controller;
+		interrupt-parent = <&idu_intc>;
+		interrupts = <0>;
+	};
+
+	memory {
+		device_type = "memory";
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MiB low mem */
+		       0x1 0xc0000000 0x0 0x40000000>;	/* 1 GiB highmem */
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		/*
+		 * Move frame buffer out of IOC aperture (0x8z-0xaz).
+		 */
+		frame_buffer: frame_buffer@be000000 {
+			compatible = "shared-dma-pool";
+			reg = <0x0 0xbe000000 0x0 0x2000000>;
+			no-map;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts
new file mode 100644
index 0000000000..c4cfc5f4f4
--- /dev/null
+++ b/arch/arc/boot/dts/axs101.dts
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC AXS101 S/W development platform
+ */
+/dts-v1/;
+
+/include/ "axc001.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+	model = "snps,axs101";
+	compatible = "snps,axs101", "snps,arc-sdp";
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 print-fatal-signals=1";
+	};
+};
diff --git a/arch/arc/boot/dts/axs103.dts b/arch/arc/boot/dts/axs103.dts
new file mode 100644
index 0000000000..16ccb7ba7a
--- /dev/null
+++ b/arch/arc/boot/dts/axs103.dts
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Device Tree for AXS103 SDP with AXS10X Main Board and
+ * AXC003 FPGA Card (with UP bitfile)
+ */
+/dts-v1/;
+
+/include/ "axc003.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+	model = "snps,axs103";
+	compatible = "snps,axs103", "snps,arc-sdp";
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1";
+	};
+};
diff --git a/arch/arc/boot/dts/axs103_idu.dts b/arch/arc/boot/dts/axs103_idu.dts
new file mode 100644
index 0000000000..a934b92a8c
--- /dev/null
+++ b/arch/arc/boot/dts/axs103_idu.dts
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Device Tree for AXS103 SDP with AXS10X Main Board and
+ * AXC003 FPGA Card (with SMP bitfile)
+ */
+/dts-v1/;
+
+/include/ "axc003_idu.dtsi"
+/include/ "axs10x_mb.dtsi"
+
+/ {
+	model = "snps,axs103-smp";
+	compatible = "snps,axs103", "snps,arc-sdp";
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 print-fatal-signals=1 consoleblank=0";
+	};
+};
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
new file mode 100644
index 0000000000..99d3e7175b
--- /dev/null
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support for peripherals on the AXS10x mainboard
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/ {
+	aliases {
+		ethernet = &gmac;
+	};
+
+	axs10x_mb {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x00000000 0x0 0xe0000000 0x10000000>;
+		interrupt-parent = <&mb_intc>;
+
+		creg_rst: reset-controller@11220 {
+			compatible = "snps,axs10x-reset";
+			#reset-cells = <1>;
+			reg = <0x11220 0x4>;
+		};
+
+		i2sclk: i2sclk@100a0 {
+			compatible = "snps,axs10x-i2s-pll-clock";
+			reg = <0x100a0 0x10>;
+			clocks = <&i2spll_clk>;
+			#clock-cells = <0>;
+		};
+
+		clocks {
+			i2spll_clk: i2spll_clk {
+				compatible = "fixed-clock";
+				clock-frequency = <27000000>;
+				#clock-cells = <0>;
+			};
+
+			i2cclk: i2cclk {
+				compatible = "fixed-clock";
+				clock-frequency = <50000000>;
+				#clock-cells = <0>;
+			};
+
+			apbclk: apbclk {
+				compatible = "fixed-clock";
+				clock-frequency = <50000000>;
+				#clock-cells = <0>;
+			};
+
+			mmcclk: mmcclk {
+				compatible = "fixed-clock";
+				/*
+				 * DW sdio controller has external ciu clock divider
+				 * controlled via register in SDIO IP. It divides
+				 * sdio_ref_clk (which comes from CGU) by 16 for
+				 * default. So default mmcclk clock (which comes
+				 * to sdk_in) is 25000000 Hz.
+				 */
+				clock-frequency = <25000000>;
+				#clock-cells = <0>;
+			};
+		};
+
+		pguclk: pguclk@10080 {
+			compatible = "snps,axs10x-pgu-pll-clock";
+			reg = <0x10080 0x10>, <0x110 0x10>;
+			#clock-cells = <0>;
+			clocks = <&input_clk>;
+		};
+
+		gmac: ethernet@18000 {
+			#interrupt-cells = <1>;
+			compatible = "snps,dwmac";
+			reg = < 0x18000 0x2000 >;
+			interrupts = < 4 >;
+			interrupt-names = "macirq";
+			phy-mode = "rgmii";
+			snps,pbl = < 32 >;
+			snps,multicast-filter-bins = <256>;
+			clocks = <&apbclk>;
+			clock-names = "stmmaceth";
+			max-speed = <100>;
+			resets = <&creg_rst 5>;
+			reset-names = "stmmaceth";
+			mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */
+		};
+
+		ehci@40000 {
+			compatible = "generic-ehci";
+			reg = < 0x40000 0x100 >;
+			interrupts = < 8 >;
+		};
+
+		ohci@60000 {
+			compatible = "generic-ohci";
+			reg = < 0x60000 0x100 >;
+			interrupts = < 8 >;
+		};
+
+		/*
+		 * According to DW Mobile Storage databook it is required
+		 * to use  "Hold Register" if card is enumerated in SDR12 or
+		 * SDR25 modes.
+		 *
+		 * Utilization of "Hold Register" is already implemented via
+		 * dw_mci_pltfm_prepare_command() which in its turn gets
+		 * used through dw_mci_drv_data->prepare_command call-back.
+		 * This call-back is used in Altera Socfpga platform and so
+		 * we may reuse it saying that we're compatible with their
+		 * "altr,socfpga-dw-mshc".
+		 *
+		 * Most probably "Hold Register" utilization is platform-
+		 * independent requirement which means that single unified
+		 * "snps,dw-mshc" should be enough for all users of DW MMC once
+		 * dw_mci_pltfm_prepare_command() is used in generic platform
+		 * code.
+		 */
+		mmc@15000 {
+			compatible = "altr,socfpga-dw-mshc";
+			reg = < 0x15000 0x400 >;
+			fifo-depth = < 16 >;
+			card-detect-delay = < 200 >;
+			clocks = <&apbclk>, <&mmcclk>;
+			clock-names = "biu", "ciu";
+			interrupts = < 7 >;
+			bus-width = < 4 >;
+		};
+
+		uart@20000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x20000 0x100>;
+			clock-frequency = <33333333>;
+			interrupts = <17>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		uart@21000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x21000 0x100>;
+			clock-frequency = <33333333>;
+			interrupts = <18>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		/* UART muxed with USB data port (ttyS3) */
+		uart@22000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x22000 0x100>;
+			clock-frequency = <33333333>;
+			interrupts = <19>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		i2c@1d000 {
+			compatible = "snps,designware-i2c";
+			reg = <0x1d000 0x100>;
+			clock-frequency = <400000>;
+			clocks = <&i2cclk>;
+			interrupts = <14>;
+		};
+
+		i2s: i2s@1e000 {
+			compatible = "snps,designware-i2s";
+			reg = <0x1e000 0x100>;
+			clocks = <&i2sclk 0>;
+			clock-names = "i2sclk";
+			interrupts = <15>;
+			#sound-dai-cells = <0>;
+		};
+
+		i2c@1f000 {
+			compatible = "snps,designware-i2c";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x1f000 0x100>;
+			clock-frequency = <400000>;
+			clocks = <&i2cclk>;
+			interrupts = <16>;
+
+			adv7511:adv7511@39{
+				compatible="adi,adv7511";
+				reg = <0x39>;
+				interrupts = <23>;
+				adi,input-depth = <8>;
+				adi,input-colorspace = "rgb";
+				adi,input-clock = "1x";
+				adi,clock-delay = <0x03>;
+				#sound-dai-cells = <0>;
+
+				ports {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					/* RGB/YUV input */
+					port@0 {
+						reg = <0>;
+						adv7511_input:endpoint {
+						remote-endpoint = <&pgu_output>;
+						};
+					};
+
+					/* HDMI output */
+					port@1 {
+						reg = <1>;
+						adv7511_output: endpoint {
+							remote-endpoint = <&hdmi_connector_in>;
+						};
+					};
+				};
+			};
+
+			eeprom@54{
+				compatible = "atmel,24c01";
+				reg = <0x54>;
+				pagesize = <0x8>;
+			};
+
+			eeprom@57{
+				compatible = "atmel,24c04";
+				reg = <0x57>;
+				pagesize = <0x8>;
+			};
+		};
+
+		hdmi0: connector {
+			compatible = "hdmi-connector";
+			type = "a";
+			port {
+				hdmi_connector_in: endpoint {
+					remote-endpoint = <&adv7511_output>;
+				};
+			};
+		};
+
+		gpio0:gpio@13000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = <0x13000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			gpio0_banka: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <32>;
+				reg = <0>;
+			};
+
+			gpio0_bankb: gpio-controller@1 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <8>;
+				reg = <1>;
+			};
+
+			gpio0_bankc: gpio-controller@2 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <8>;
+				reg = <2>;
+			};
+		};
+
+		gpio1:gpio@14000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = <0x14000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			gpio1_banka: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <30>;
+				reg = <0>;
+			};
+
+			gpio1_bankb: gpio-controller@1 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <10>;
+				reg = <1>;
+			};
+
+			gpio1_bankc: gpio-controller@2 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <8>;
+				reg = <2>;
+			};
+		};
+
+		pgu@17000 {
+			compatible = "snps,arcpgu";
+			reg = <0x17000 0x400>;
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
+			memory-region = <&frame_buffer>;
+			port {
+				pgu_output: endpoint {
+					remote-endpoint = <&adv7511_input>;
+				};
+			};
+		};
+
+		sound_playback {
+			compatible = "simple-audio-card";
+			simple-audio-card,name = "AXS10x HDMI Audio";
+			simple-audio-card,format = "i2s";
+			simple-audio-card,cpu {
+				sound-dai = <&i2s>;
+			};
+			simple-audio-card,codec {
+				sound-dai = <&adv7511>;
+			};
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/haps_hs.dts b/arch/arc/boot/dts/haps_hs.dts
new file mode 100644
index 0000000000..76ad527a08
--- /dev/null
+++ b/arch/arc/boot/dts/haps_hs.dts
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+	model = "snps,zebu_hs";
+	compatible = "snps,zebu_hs";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&core_intc>;
+
+	memory {
+		device_type = "memory";
+		/* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+		reg = <0x0 0x80000000 0x0 0x40000000	/* 1 GB low mem */
+		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
+	};
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* only perip space at end of low mem accessible
+			  bus addr,  parent bus addr, size    */
+		ranges = <0x80000000 0x0 0x80000000 0x80000000>;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <50000000>;
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@f0000000 {
+			compatible = "ns16550a";
+			reg = <0xf0000000 0x2000>;
+			interrupts = <24>;
+			clock-frequency = <50000000>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,archs-pct";
+			#interrupt-cells = <1>;
+			interrupts = <20>;
+		};
+
+		virtio0: virtio@f0100000 {
+			compatible = "virtio,mmio";
+			reg = <0xf0100000 0x2000>;
+			interrupts = <31>;
+		};
+
+		virtio1: virtio@f0102000 {
+			compatible = "virtio,mmio";
+			reg = <0xf0102000 0x2000>;
+			interrupts = <32>;
+		};
+
+		virtio2: virtio@f0104000 {
+			compatible = "virtio,mmio";
+			reg = <0xf0104000 0x2000>;
+			interrupts = <33>;
+		};
+
+		virtio3: virtio@f0106000 {
+			compatible = "virtio,mmio";
+			reg = <0xf0106000 0x2000>;
+			interrupts = <34>;
+		};
+
+		virtio4: virtio@f0108000 {
+			compatible = "virtio,mmio";
+			reg = <0xf0108000 0x2000>;
+			interrupts = <35>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/haps_hs_idu.dts b/arch/arc/boot/dts/haps_hs_idu.dts
new file mode 100644
index 0000000000..738c76cd07
--- /dev/null
+++ b/arch/arc/boot/dts/haps_hs_idu.dts
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs_idu.dtsi"
+
+/ {
+	model = "snps,zebu_hs-smp";
+	compatible = "snps,zebu_hs";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&core_intc>;
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x20000000>;	/* 512 */
+	};
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <50000000>;	/* 50 MHZ */
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		idu_intc: idu-interrupt-controller {
+			compatible = "snps,archs-idu-intc";
+			interrupt-controller;
+			interrupt-parent = <&core_intc>;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@f0000000 {
+			compatible = "ns16550a";
+			reg = <0xf0000000 0x2000>;
+			interrupt-parent = <&idu_intc>;
+			interrupts = <0>;
+			clock-frequency = <50000000>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,archs-pct";
+			#interrupt-cells = <1>;
+			interrupts = <20>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
new file mode 100644
index 0000000000..dcaa44e408
--- /dev/null
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Device Tree for ARC HS Development Kit
+ */
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/reset/snps,hsdk-reset.h>
+
+/ {
+	model = "snps,hsdk";
+	compatible = "snps,hsdk";
+
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
+	};
+
+	aliases {
+		ethernet = &gmac;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <0>;
+			clocks = <&core_clk>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <1>;
+			clocks = <&core_clk>;
+		};
+
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <2>;
+			clocks = <&core_clk>;
+		};
+
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <3>;
+			clocks = <&core_clk>;
+		};
+	};
+
+	input_clk: input-clk {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <33333333>;
+	};
+
+	reg_5v0: regulator-5v0 {
+		compatible = "regulator-fixed";
+
+		regulator-name = "5v0-supply";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+
+	cpu_intc: cpu-interrupt-controller {
+		compatible = "snps,archs-intc";
+		interrupt-controller;
+		#interrupt-cells = <1>;
+	};
+
+	idu_intc: idu-interrupt-controller {
+		compatible = "snps,archs-idu-intc";
+		interrupt-controller;
+		#interrupt-cells = <1>;
+		interrupt-parent = <&cpu_intc>;
+	};
+
+	arcpct: pct {
+		compatible = "snps,archs-pct";
+		interrupt-parent = <&cpu_intc>;
+		interrupts = <20>;
+	};
+
+	/* TIMER0 with interrupt for clockevent */
+	timer {
+		compatible = "snps,arc-timer";
+		interrupts = <16>;
+		interrupt-parent = <&cpu_intc>;
+		clocks = <&core_clk>;
+	};
+
+	/* 64-bit Global Free Running Counter */
+	gfrc {
+		compatible = "snps,archs-timer-gfrc";
+		clocks = <&core_clk>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupt-parent = <&idu_intc>;
+
+		ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
+
+		cgu_rst: reset-controller@8a0 {
+			compatible = "snps,hsdk-reset";
+			#reset-cells = <1>;
+			reg = <0x8a0 0x4>, <0xff0 0x4>;
+		};
+
+		core_clk: core-clk@0 {
+			compatible = "snps,hsdk-core-pll-clock";
+			reg = <0x00 0x10>, <0x14b8 0x4>;
+			#clock-cells = <0>;
+			clocks = <&input_clk>;
+
+			/*
+			 * Set initial core pll output frequency to 1GHz.
+			 * It will be applied at the core pll driver probing
+			 * on early boot.
+			 */
+			assigned-clocks = <&core_clk>;
+			assigned-clock-rates = <1000000000>;
+		};
+
+		serial: serial@5000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x5000 0x100>;
+			clock-frequency = <33330000>;
+			interrupts = <6>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		gmacclk: gmacclk {
+			compatible = "fixed-clock";
+			clock-frequency = <400000000>;
+			#clock-cells = <0>;
+		};
+
+		mmcclk_ciu: mmcclk-ciu {
+			compatible = "fixed-clock";
+			/*
+			 * DW sdio controller has external ciu clock divider
+			 * controlled via register in SDIO IP. Due to its
+			 * unexpected default value (it should divide by 1
+			 * but it divides by 8) SDIO IP uses wrong clock and
+			 * works unstable (see STAR 9001204800)
+			 * We switched to the minimum possible value of the
+			 * divisor (div-by-2) in HSDK platform code.
+			 * So add temporary fix and change clock frequency
+			 * to 50000000 Hz until we fix dw sdio driver itself.
+			 */
+			clock-frequency = <50000000>;
+			#clock-cells = <0>;
+		};
+
+		mmcclk_biu: mmcclk-biu {
+			compatible = "fixed-clock";
+			clock-frequency = <400000000>;
+			#clock-cells = <0>;
+		};
+
+		gpu_core_clk: gpu-core-clk {
+			compatible = "fixed-clock";
+			clock-frequency = <400000000>;
+			#clock-cells = <0>;
+		};
+
+		gpu_dma_clk: gpu-dma-clk {
+			compatible = "fixed-clock";
+			clock-frequency = <400000000>;
+			#clock-cells = <0>;
+		};
+
+		gpu_cfg_clk: gpu-cfg-clk {
+			compatible = "fixed-clock";
+			clock-frequency = <200000000>;
+			#clock-cells = <0>;
+		};
+
+		dmac_core_clk: dmac-core-clk {
+			compatible = "fixed-clock";
+			clock-frequency = <400000000>;
+			#clock-cells = <0>;
+		};
+
+		dmac_cfg_clk: dmac-gpu-cfg-clk {
+			compatible = "fixed-clock";
+			clock-frequency = <200000000>;
+			#clock-cells = <0>;
+		};
+
+		gmac: ethernet@8000 {
+			#interrupt-cells = <1>;
+			compatible = "snps,dwmac";
+			reg = <0x8000 0x2000>;
+			interrupts = <10>;
+			interrupt-names = "macirq";
+			phy-mode = "rgmii-id";
+			snps,pbl = <32>;
+			snps,multicast-filter-bins = <256>;
+			clocks = <&gmacclk>;
+			clock-names = "stmmaceth";
+			phy-handle = <&phy0>;
+			resets = <&cgu_rst HSDK_ETH_RESET>;
+			reset-names = "stmmaceth";
+			mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */
+			dma-coherent;
+
+			tx-fifo-depth = <4096>;
+			rx-fifo-depth = <4096>;
+
+			mdio {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "snps,dwmac-mdio";
+				phy0: ethernet-phy@0 { /* Micrel KSZ9031 */
+					reg = <0>;
+				};
+			};
+		};
+
+		ohci@60000 {
+			compatible = "snps,hsdk-v1.0-ohci", "generic-ohci";
+			reg = <0x60000 0x100>;
+			interrupts = <15>;
+			resets = <&cgu_rst HSDK_USB_RESET>;
+			dma-coherent;
+		};
+
+		ehci@40000 {
+			compatible = "snps,hsdk-v1.0-ehci", "generic-ehci";
+			reg = <0x40000 0x100>;
+			interrupts = <15>;
+			resets = <&cgu_rst HSDK_USB_RESET>;
+			dma-coherent;
+		};
+
+		mmc@a000 {
+			compatible = "altr,socfpga-dw-mshc";
+			reg = <0xa000 0x400>;
+			num-slots = <1>;
+			fifo-depth = <16>;
+			card-detect-delay = <200>;
+			clocks = <&mmcclk_biu>, <&mmcclk_ciu>;
+			clock-names = "biu", "ciu";
+			interrupts = <12>;
+			bus-width = <4>;
+			dma-coherent;
+		};
+
+		spi0: spi@20000 {
+			compatible = "snps,dw-apb-ssi";
+			reg = <0x20000 0x100>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <16>;
+			num-cs = <2>;
+			reg-io-width = <4>;
+			clocks = <&input_clk>;
+			cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>,
+				   <&creg_gpio 1 GPIO_ACTIVE_LOW>;
+
+			spi-flash@0 {
+				compatible = "sst26wf016b", "jedec,spi-nor";
+				reg = <0>;
+				#address-cells = <1>;
+				#size-cells = <1>;
+				spi-max-frequency = <4000000>;
+			};
+
+			adc@1 {
+				compatible = "ti,adc108s102";
+				reg = <1>;
+				vref-supply = <&reg_5v0>;
+				spi-max-frequency = <1000000>;
+			};
+		};
+
+		creg_gpio: gpio@14b0 {
+			compatible = "snps,creg-gpio-hsdk";
+			reg = <0x14b0 0x4>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			ngpios = <2>;
+		};
+
+		gpio: gpio@3000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = <0x3000 0x20>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			gpio_port_a: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <24>;
+				reg = <0>;
+			};
+		};
+
+		gpu_3d: gpu@90000 {
+			compatible = "vivante,gc";
+			reg = <0x90000 0x4000>;
+			clocks = <&gpu_dma_clk>,
+				 <&gpu_cfg_clk>,
+				 <&gpu_core_clk>,
+				 <&gpu_core_clk>;
+			clock-names = "bus", "reg", "core", "shader";
+			interrupts = <28>;
+		};
+
+		dmac: dmac@80000 {
+			compatible = "snps,axi-dma-1.01a";
+			reg = <0x80000 0x400>;
+			interrupts = <27>;
+			clocks = <&dmac_core_clk>, <&dmac_cfg_clk>;
+			clock-names = "core-clk", "cfgr-clk";
+
+			dma-channels = <4>;
+			snps,dma-masters = <2>;
+			snps,data-width = <3>;
+			snps,block-size = <4096 4096 4096 4096>;
+			snps,priority = <0 1 2 3>;
+			snps,axi-max-burst-len = <16>;
+		};
+	};
+
+	memory@80000000 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		device_type = "memory";
+		reg = <0x0 0x80000000 0x0 0x40000000>;  /* 1 GB lowmem */
+		/*     0x1 0x00000000 0x0 0x40000000>;     1 GB highmem */
+	};
+};
diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts
new file mode 100644
index 0000000000..f8832a15e1
--- /dev/null
+++ b/arch/arc/boot/dts/nsim_700.dts
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+	model = "snps,nsim";
+	compatible = "snps,nsim";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&core_intc>;
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 print-fatal-signals=1";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <80000000>;
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,arc700-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@f0000000 {
+			compatible = "ns16550a";
+			reg = <0xf0000000 0x2000>;
+			interrupts = <24>;
+			clock-frequency = <50000000>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,arc700-pct";
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
new file mode 100644
index 0000000000..fc207c4a4e
--- /dev/null
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
+
+/ {
+	model = "snps,nsimosci";
+	compatible = "snps,nsimosci";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&core_intc>;
+
+	chosen {
+		/* this is for console on PGU */
+		/* bootargs = "console=tty0 consoleblank=0"; */
+		/* this is for console on serial */
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <20000000>;
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,arc700-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@f0000000 {
+			compatible = "ns8250";
+			reg = <0xf0000000 0x2000>;
+			interrupts = <11>;
+			clock-frequency = <3686400>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+
+		pguclk: pguclk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <25175000>;
+		};
+
+		pgu@f9000000 {
+			compatible = "snps,arcpgu";
+			reg = <0xf9000000 0x400>;
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
+		};
+
+		ps2: ps2@f9001000 {
+			compatible = "snps,arc_ps2";
+			reg = <0xf9000400 0x14>;
+			interrupts = <13>;
+			interrupt-names = "arc_ps2_irq";
+		};
+
+		eth0: ethernet@f0003000 {
+			compatible = "ezchip,nps-mgt-enet";
+			reg = <0xf0003000 0x44>;
+			interrupts = <7>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,arc700-pct";
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts
new file mode 100644
index 0000000000..71f1f84161
--- /dev/null
+++ b/arch/arc/boot/dts/nsimosci_hs.dts
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+	model = "snps,nsimosci_hs";
+	compatible = "snps,nsimosci_hs";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&core_intc>;
+
+	chosen {
+		/* this is for console on PGU */
+		/* bootargs = "console=tty0 consoleblank=0"; */
+		/* this is for console on serial */
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <20000000>;
+		};
+
+		core_intc: core-interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@f0000000 {
+			compatible = "ns8250";
+			reg = <0xf0000000 0x2000>;
+			interrupts = <24>;
+			clock-frequency = <3686400>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+
+		pguclk: pguclk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <25175000>;
+		};
+
+		pgu@f9000000 {
+			compatible = "snps,arcpgu";
+			reg = <0xf9000000 0x400>;
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
+		};
+
+		ps2: ps2@f9001000 {
+			compatible = "snps,arc_ps2";
+			reg = <0xf9000400 0x14>;
+			interrupts = <27>;
+			interrupt-names = "arc_ps2_irq";
+		};
+
+		eth0: ethernet@f0003000 {
+			compatible = "ezchip,nps-mgt-enet";
+			reg = <0xf0003000 0x44>;
+			interrupts = <25>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,archs-pct";
+			#interrupt-cells = <1>;
+			interrupts = <20>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts
new file mode 100644
index 0000000000..69d794c59d
--- /dev/null
+++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs_idu.dtsi"
+
+/ {
+	model = "snps,nsimosci_hs-smp";
+	compatible = "snps,nsimosci_hs";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&core_intc>;
+
+	chosen {
+		/* this is for console on serial */
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24 print-fatal-signals=1";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <5000000>;
+		};
+
+		core_intc: core-interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		idu_intc: idu-interrupt-controller {
+			compatible = "snps,archs-idu-intc";
+			interrupt-controller;
+			interrupt-parent = <&core_intc>;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@f0000000 {
+			compatible = "ns8250";
+			reg = <0xf0000000 0x2000>;
+			interrupt-parent = <&idu_intc>;
+			interrupts = <0>;
+			clock-frequency = <3686400>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+
+		pguclk: pguclk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <25175000>;
+		};
+
+		pgu@f9000000 {
+			compatible = "snps,arcpgu";
+			reg = <0xf9000000 0x400>;
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
+		};
+
+		ps2: ps2@f9001000 {
+			compatible = "snps,arc_ps2";
+			reg = <0xf9000400 0x14>;
+			interrupts = <3>;
+			interrupt-parent = <&idu_intc>;
+			interrupt-names = "arc_ps2_irq";
+		};
+
+		eth0: ethernet@f0003000 {
+			compatible = "ezchip,nps-mgt-enet";
+			reg = <0xf0003000 0x44>;
+			interrupt-parent = <&idu_intc>;
+			interrupts = <1>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,archs-pct";
+			#interrupt-cells = <1>;
+			interrupts = <20>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi
new file mode 100644
index 0000000000..ba86b8036a
--- /dev/null
+++ b/arch/arc/boot/dts/skeleton.dtsi
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Skeleton device tree; the bare minimum needed to boot; just include and
+ * add a compatible value.
+ */
+
+/ {
+	compatible = "snps,arc";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	chosen { };
+	aliases { };
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,arc770d";
+			reg = <0>;
+			clocks = <&core_clk>;
+		};
+	};
+
+	/* TIMER0 with interrupt for clockevent */
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <3>;
+		interrupt-parent = <&core_intc>;
+		clocks = <&core_clk>;
+	};
+
+	/* TIMER1 for free running clocksource */
+	timer1 {
+		compatible = "snps,arc-timer";
+		clocks = <&core_clk>;
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x10000000>;	/* 256M */
+	};
+};
diff --git a/arch/arc/boot/dts/skeleton_hs.dtsi b/arch/arc/boot/dts/skeleton_hs.dtsi
new file mode 100644
index 0000000000..8fb49890e8
--- /dev/null
+++ b/arch/arc/boot/dts/skeleton_hs.dtsi
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/ {
+	compatible = "snps,arc";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	chosen { };
+	aliases { };
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <0>;
+			clocks = <&core_clk>;
+		};
+	};
+
+	/* TIMER0 with interrupt for clockevent */
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <16>;
+		interrupt-parent = <&core_intc>;
+		clocks = <&core_clk>;
+	};
+
+	/* 64-bit Local RTC: preferred clocksource for UP */
+	rtc {
+		compatible = "snps,archs-timer-rtc";
+		clocks = <&core_clk>;
+	};
+
+	/* TIMER1 for free running clocksource: Fallback if rtc not found */
+	timer1 {
+		compatible = "snps,arc-timer";
+		clocks = <&core_clk>;
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x10000000>;	/* 256M */
+	};
+};
diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
new file mode 100644
index 0000000000..75f5c9ecb5
--- /dev/null
+++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/ {
+	compatible = "snps,arc";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	chosen { };
+	aliases { };
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <0>;
+			clocks = <&core_clk>;
+		};
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <1>;
+			clocks = <&core_clk>;
+		};
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <2>;
+			clocks = <&core_clk>;
+		};
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <3>;
+			clocks = <&core_clk>;
+		};
+	};
+
+	/* TIMER0 with interrupt for clockevent */
+	timer0 {
+		compatible = "snps,arc-timer";
+		interrupts = <16>;
+		interrupt-parent = <&core_intc>;
+		clocks = <&core_clk>;
+	};
+
+	/* 64-bit Global Free Running Counter */
+	gfrc {
+		compatible = "snps,archs-timer-gfrc";
+		clocks = <&core_clk>;
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x10000000>;	/* 256M */
+	};
+};
diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi
new file mode 100644
index 0000000000..c21d0eb07b
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_axc003.dtsi
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2013, 2014 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Device tree for AXC003 CPU card: HS38x UP configuration (VDK version)
+ */
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+	compatible = "snps,arc";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpu_card {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		ranges = <0x00000000 0xf0000000 0x10000000>;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <50000000>;
+		};
+
+		core_intc: archs-intc@cpu {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		debug_uart: dw-apb-uart@5000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x5000 0x100>;
+			clock-frequency = <2403200>;
+			interrupt-parent = <&core_intc>;
+			interrupts = <19>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+	};
+
+	mb_intc: interrupt-controller@e0012000 {
+		#interrupt-cells = <1>;
+		compatible = "snps,dw-apb-ictl";
+		reg = < 0xe0012000 0x200 >;
+		interrupt-controller;
+		interrupt-parent = <&core_intc>;
+		interrupts = < 18 >;
+	};
+
+	memory {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x00000000 0x80000000 0x40000000>;
+		device_type = "memory";
+		reg = <0x80000000 0x20000000>;	/* 512MiB */
+	};
+};
diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
new file mode 100644
index 0000000000..4d348853ac
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Device tree for AXC003 CPU card:
+ * HS38x2 (Dual Core) with IDU intc (VDK version)
+ */
+
+/include/ "skeleton_hs_idu.dtsi"
+
+/ {
+	compatible = "snps,arc";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpu_card {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		ranges = <0x00000000 0xf0000000 0x10000000>;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <50000000>;
+		};
+
+		core_intc: archs-intc@cpu {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		idu_intc: idu-interrupt-controller {
+			compatible = "snps,archs-idu-intc";
+			interrupt-controller;
+			interrupt-parent = <&core_intc>;
+			#interrupt-cells = <1>;
+		};
+
+		debug_uart: dw-apb-uart@5000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x5000 0x100>;
+			clock-frequency = <2403200>;
+			interrupt-parent = <&idu_intc>;
+			interrupts = <2>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+	};
+
+	mb_intc: interrupt-controller@e0012000 {
+		#interrupt-cells = <1>;
+		compatible = "snps,dw-apb-ictl";
+		reg = < 0xe0012000 0x200 >;
+		interrupt-controller;
+		interrupt-parent = <&idu_intc>;
+		interrupts = <0>;
+	};
+
+	memory {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x00000000 0x80000000 0x40000000>;
+		device_type = "memory";
+		reg = <0x80000000 0x20000000>;	/* 512MiB */
+	};
+};
diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
new file mode 100644
index 0000000000..cbb1797702
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support for peripherals on the AXS10x mainboard (VDK version)
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/ {
+	axs10x_mb_vdk {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x00000000 0xe0000000 0x10000000>;
+		interrupt-parent = <&mb_intc>;
+
+		clocks {
+			apbclk: apbclk {
+				compatible = "fixed-clock";
+				clock-frequency = <50000000>;
+				#clock-cells = <0>;
+			};
+
+			mmcclk: mmcclk {
+				compatible = "fixed-clock";
+				clock-frequency = <50000000>;
+				#clock-cells = <0>;
+			};
+
+			pguclk: pguclk {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+				clock-frequency = <25175000>;
+			};
+		};
+
+		ethernet@18000 {
+			#interrupt-cells = <1>;
+			compatible = "snps,dwmac";
+			reg = < 0x18000 0x2000 >;
+			interrupts = < 4 >;
+			interrupt-names = "macirq";
+			phy-mode = "rgmii";
+			snps,phy-addr = < 0 >;  // VDK model phy address is 0
+			snps,pbl = < 32 >;
+			clocks = <&apbclk>;
+			clock-names = "stmmaceth";
+		};
+
+		ehci@40000 {
+			compatible = "generic-ehci";
+			reg = < 0x40000 0x100 >;
+			interrupts = < 8 >;
+		};
+
+		uart@20000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x20000 0x100>;
+			clock-frequency = <2403200>;
+			interrupts = <17>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		uart@21000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x21000 0x100>;
+			clock-frequency = <2403200>;
+			interrupts = <18>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+		uart@22000 {
+			compatible = "snps,dw-apb-uart";
+			reg = <0x22000 0x100>;
+			clock-frequency = <2403200>;
+			interrupts = <19>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+		};
+
+/* PGU output directly sent to virtual LCD screen; hdmi controller not modelled */
+		pgu@17000 {
+			compatible = "snps,arcpgu";
+			reg = <0x17000 0x400>;
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
+		};
+
+/* VDK has additional ps2 keyboard/mouse interface integrated in LCD screen model */
+		ps2: ps2@e0017400 {
+			compatible = "snps,arc_ps2";
+			reg = <0x17400 0x14>;
+			interrupts = <5>;
+			interrupt-names = "arc_ps2_irq";
+		};
+
+		mmc@15000 {
+			compatible = "snps,dw-mshc";
+			reg = <0x15000 0x400>;
+			fifo-depth = <1024>;
+			card-detect-delay = <200>;
+			clocks = <&apbclk>, <&mmcclk>;
+			clock-names = "biu", "ciu";
+			interrupts = <7>;
+			bus-width = <4>;
+		};
+	};
+
+	/*
+	 * Embedded Vision subsystem UIO mappings; only relevant for EV VDK
+	 *
+	 * This node is intentionally put outside of MB above becase
+	 * it maps areas outside of MB's 0xez-0xfz.
+	 */
+	uio_ev: uio@d0000000 {
+		compatible = "generic-uio";
+		reg = <0xd0000000 0x2000 0xd1000000 0x2000 0x90000000 0x10000000 0xc0000000 0x10000000>;
+		reg-names = "ev_gsa", "ev_ctrl", "ev_shared_mem", "ev_code_mem";
+		interrupt-parent = <&mb_intc>;
+		interrupts = <23>;
+	};
+};
diff --git a/arch/arc/boot/dts/vdk_hs38.dts b/arch/arc/boot/dts/vdk_hs38.dts
new file mode 100644
index 0000000000..cddea7eaca
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_hs38.dts
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC HS38 Virtual Development Kit (VDK)
+ */
+/dts-v1/;
+
+/include/ "vdk_axc003.dtsi"
+/include/ "vdk_axs10x_mb.dtsi"
+
+/ {
+	model = "snps,vdk_archs";
+	compatible = "snps,axs103";
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+	};
+};
diff --git a/arch/arc/boot/dts/vdk_hs38_smp.dts b/arch/arc/boot/dts/vdk_hs38_smp.dts
new file mode 100644
index 0000000000..f57d1922ee
--- /dev/null
+++ b/arch/arc/boot/dts/vdk_hs38_smp.dts
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * ARC HS38 Virtual Development Kit, SMP version (VDK)
+ */
+/dts-v1/;
+
+/include/ "vdk_axc003_idu.dtsi"
+/include/ "vdk_axs10x_mb.dtsi"
+
+/ {
+	model = "snps,vdk_archs-smp";
+	compatible = "snps,axs103";
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=640x480-24";
+	};
+};
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
new file mode 100644
index 0000000000..0016149f95
--- /dev/null
+++ b/arch/arc/configs/axs101_defconfig
@@ -0,0 +1,109 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS101=y
+CONFIG_ARC_CACHE_LINE_SHIFT=5
+CONFIG_ARC_BUILTIN_DTB_NAME="axs101"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_DRM=m
+CONFIG_DRM_I2C_ADV7511=m
+CONFIG_DRM_ARCPGU=m
+CONFIG_FB=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
new file mode 100644
index 0000000000..5b031582a1
--- /dev/null
+++ b/arch/arc/configs/axs103_defconfig
@@ -0,0 +1,107 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="axs103"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
new file mode 100644
index 0000000000..d4eec39e01
--- /dev/null
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -0,0 +1,110 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+# CONFIG_USB_NET_DRIVERS is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+# CONFIG_HWMON is not set
+CONFIG_DRM=m
+CONFIG_DRM_I2C_ADV7511=m
+CONFIG_DRM_ARCPGU=m
+CONFIG_FB=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig
new file mode 100644
index 0000000000..7337cdf4ff
--- /dev/null
+++ b/arch/arc/configs/haps_hs_defconfig
@@ -0,0 +1,65 @@
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_EXPERT=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs"
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_VIRTIO_BLK=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+# CONFIG_ETHERNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_VIRTIO_MMIO=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_MEMORY_INIT=y
+# CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig
new file mode 100644
index 0000000000..bc927221af
--- /dev/null
+++ b/arch/arc/configs/haps_hs_smp_defconfig
@@ -0,0 +1,65 @@
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu"
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_ETHERNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_SOFTLOCKUP_DETECTOR=y
+# CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
new file mode 100644
index 0000000000..aa000075a5
--- /dev/null
+++ b/arch/arc/configs/hsdk_defconfig
@@ -0,0 +1,96 @@
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_ARC_SOC_HSDK=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_LINUX_LINK_BASE=0x90000000
+CONFIG_LINUX_RAM_BASE=0x80000000
+CONFIG_ARC_BUILTIN_DTB_NAME="hsdk"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_MTD=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+CONFIG_STMMAC_ETH=y
+CONFIG_MICREL_PHY=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_SPI=y
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_DWAPB=y
+CONFIG_GPIO_SNPS_CREG=y
+# CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_DRM=y
+# CONFIG_DRM_FBDEV_EMULATION is not set
+CONFIG_DRM_UDL=y
+CONFIG_DRM_ETNAVIV=y
+CONFIG_FB=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_DMADEVICES=y
+CONFIG_DW_AXI_DMAC=y
+CONFIG_IIO=y
+CONFIG_TI_ADC108S102=y
+CONFIG_EXT3_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
+CONFIG_CRYPTO_ECHAINIV=y
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
new file mode 100644
index 0000000000..326f6cde78
--- /dev/null
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -0,0 +1,61 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_ETHERNET is not set
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
new file mode 100644
index 0000000000..bf39a00916
--- /dev/null
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -0,0 +1,69 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=y
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_MOUSE_PS2_ALPS is not set
+# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
+# CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_CYPRESS is not set
+# CONFIG_MOUSE_PS2_TRACKPOINT is not set
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_DRM=y
+CONFIG_DRM_ARCPGU=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
new file mode 100644
index 0000000000..7121bd71c5
--- /dev/null
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -0,0 +1,67 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_MOUSE_PS2_ALPS is not set
+# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
+# CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_TRACKPOINT is not set
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_DRM=y
+CONFIG_DRM_ARCPGU=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
new file mode 100644
index 0000000000..f9863b294a
--- /dev/null
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -0,0 +1,79 @@
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+# CONFIG_ARC_TIMERS_64BIT is not set
+CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=y
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_DRM=y
+CONFIG_DRM_ARCPGU=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FTRACE=y
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
new file mode 100644
index 0000000000..a12656ec00
--- /dev/null
+++ b/arch/arc/configs/tb10x_defconfig
@@ -0,0 +1,106 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_DEFAULT_HOSTNAME="tb10x"
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../tb10x-rootfs.cpio"
+CONFIG_INITRAMFS_ROOT_UID=2100
+CONFIG_INITRAMFS_ROOT_GID=501
+# CONFIG_RD_GZIP is not set
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_AIO is not set
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLOCK is not set
+CONFIG_ARC_PLAT_TB10X=y
+CONFIG_ARC_CACHE_LINE_SHIFT=5
+CONFIG_HZ=250
+CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_GPIO_SYSFS=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_ONESHOT=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_CPU=y
+CONFIG_LEDS_TRIGGER_GPIO=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_LEDS_TRIGGER_TRANSIENT=y
+CONFIG_DMADEVICES=y
+CONFIG_DW_DMAC=y
+CONFIG_ASYNC_TX_DMA=y
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CONFIGFS_FS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_HEADERS_CHECK=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
new file mode 100644
index 0000000000..d7c858df52
--- /dev/null
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -0,0 +1,98 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38"
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_ARCPGU_RGB888=y
+CONFIG_ARCPGU_DISPTYPE=0
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_SERIAL=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
new file mode 100644
index 0000000000..015c1d4388
--- /dev/null
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -0,0 +1,102 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ARC_PLAT_AXS10X=y
+CONFIG_AXS103=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+# CONFIG_ARC_TIMERS_64BIT is not set
+CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp"
+CONFIG_PREEMPT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_NATIONAL_PHY=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_DRM=y
+CONFIG_DRM_ARCPGU=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_SERIAL=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+CONFIG_UIO=y
+CONFIG_UIO_PDRV_GENIRQ=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
new file mode 100644
index 0000000000..3c1afa524b
--- /dev/null
+++ b/arch/arc/include/asm/Kbuild
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+generic-y += extable.h
+generic-y += kvm_para.h
+generic-y += mcs_spinlock.h
+generic-y += parport.h
+generic-y += user.h
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
new file mode 100644
index 0000000000..2162023195
--- /dev/null
+++ b/arch/arc/include/asm/arcregs.h
@@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_ARCREGS_H
+#define _ASM_ARC_ARCREGS_H
+
+/* Build Configuration Registers */
+#define ARC_REG_AUX_DCCM	0x18	/* DCCM Base Addr ARCv2 */
+#define ARC_REG_ERP_CTRL	0x3F	/* ARCv2 Error protection control */
+#define ARC_REG_DCCM_BASE_BUILD	0x61	/* DCCM Base Addr ARCompact */
+#define ARC_REG_CRC_BCR		0x62
+#define ARC_REG_VECBASE_BCR	0x68
+#define ARC_REG_PERIBASE_BCR	0x69
+#define ARC_REG_FP_BCR		0x6B	/* ARCompact: Single-Precision FPU */
+#define ARC_REG_DPFP_BCR	0x6C	/* ARCompact: Dbl Precision FPU */
+#define ARC_REG_ERP_BUILD	0xc7	/* ARCv2 Error protection Build: ECC/Parity */
+#define ARC_REG_FP_V2_BCR	0xc8	/* ARCv2 FPU */
+#define ARC_REG_SLC_BCR		0xce
+#define ARC_REG_DCCM_BUILD	0x74	/* DCCM size (common) */
+#define ARC_REG_AP_BCR		0x76
+#define ARC_REG_ICCM_BUILD	0x78	/* ICCM size (common) */
+#define ARC_REG_XY_MEM_BCR	0x79
+#define ARC_REG_MAC_BCR		0x7a
+#define ARC_REG_MUL_BCR		0x7b
+#define ARC_REG_SWAP_BCR	0x7c
+#define ARC_REG_NORM_BCR	0x7d
+#define ARC_REG_MIXMAX_BCR	0x7e
+#define ARC_REG_BARREL_BCR	0x7f
+#define ARC_REG_D_UNCACH_BCR	0x6A
+#define ARC_REG_BPU_BCR		0xc0
+#define ARC_REG_ISA_CFG_BCR	0xc1
+#define ARC_REG_LPB_BUILD	0xE9	/* ARCv2 Loop Buffer Build */
+#define ARC_REG_RTT_BCR		0xF2
+#define ARC_REG_IRQ_BCR		0xF3
+#define ARC_REG_MICRO_ARCH_BCR	0xF9	/* ARCv2 Product revision */
+#define ARC_REG_SMART_BCR	0xFF
+#define ARC_REG_CLUSTER_BCR	0xcf
+#define ARC_REG_AUX_ICCM	0x208	/* ICCM Base Addr (ARCv2) */
+#define ARC_REG_LPB_CTRL	0x488	/* ARCv2 Loop Buffer control */
+#define ARC_REG_FPU_CTRL	0x300
+#define ARC_REG_FPU_STATUS	0x301
+
+/* Common for ARCompact and ARCv2 status register */
+#define ARC_REG_STATUS32	0x0A
+
+/* status32 Bits Positions */
+#define STATUS_AE_BIT		5	/* Exception active */
+#define STATUS_DE_BIT		6	/* PC is in delay slot */
+#define STATUS_U_BIT		7	/* User/Kernel mode */
+#define STATUS_Z_BIT            11
+#define STATUS_L_BIT		12	/* Loop inhibit */
+
+/* These masks correspond to the status word(STATUS_32) bits */
+#define STATUS_AE_MASK		(1<<STATUS_AE_BIT)
+#define STATUS_DE_MASK		(1<<STATUS_DE_BIT)
+#define STATUS_U_MASK		(1<<STATUS_U_BIT)
+#define STATUS_Z_MASK		(1<<STATUS_Z_BIT)
+#define STATUS_L_MASK		(1<<STATUS_L_BIT)
+
+/*
+ * ECR: Exception Cause Reg bits-n-pieces
+ * [23:16] = Exception Vector
+ * [15: 8] = Exception Cause Code
+ * [ 7: 0] = Exception Parameters (for certain types only)
+ */
+#ifdef CONFIG_ISA_ARCOMPACT
+#define ECR_V_MEM_ERR			0x01
+#define ECR_V_INSN_ERR			0x02
+#define ECR_V_MACH_CHK			0x20
+#define ECR_V_ITLB_MISS			0x21
+#define ECR_V_DTLB_MISS			0x22
+#define ECR_V_PROTV			0x23
+#define ECR_V_TRAP			0x25
+#else
+#define ECR_V_MEM_ERR			0x01
+#define ECR_V_INSN_ERR			0x02
+#define ECR_V_MACH_CHK			0x03
+#define ECR_V_ITLB_MISS			0x04
+#define ECR_V_DTLB_MISS			0x05
+#define ECR_V_PROTV			0x06
+#define ECR_V_TRAP			0x09
+#define ECR_V_MISALIGN			0x0d
+#endif
+
+/* DTLB Miss and Protection Violation Cause Codes */
+
+#define ECR_C_PROTV_INST_FETCH		0x00
+#define ECR_C_PROTV_LOAD		0x01
+#define ECR_C_PROTV_STORE		0x02
+#define ECR_C_PROTV_XCHG		0x03
+#define ECR_C_PROTV_MISALIG_DATA	0x04
+
+#define ECR_C_BIT_PROTV_MISALIG_DATA	10
+
+/* Machine Check Cause Code Values */
+#define ECR_C_MCHK_DUP_TLB		0x01
+
+/* DTLB Miss Exception Cause Code Values */
+#define ECR_C_BIT_DTLB_LD_MISS		8
+#define ECR_C_BIT_DTLB_ST_MISS		9
+
+/* Auxiliary registers */
+#define AUX_IDENTITY		4
+#define AUX_EXEC_CTRL		8
+#define AUX_INTR_VEC_BASE	0x25
+#define AUX_VOL			0x5e
+
+/*
+ * Floating Pt Registers
+ * Status regs are read-only (build-time) so need not be saved/restored
+ */
+#define ARC_AUX_FP_STAT         0x300
+#define ARC_AUX_DPFP_1L         0x301
+#define ARC_AUX_DPFP_1H         0x302
+#define ARC_AUX_DPFP_2L         0x303
+#define ARC_AUX_DPFP_2H         0x304
+#define ARC_AUX_DPFP_STAT       0x305
+
+/*
+ * DSP-related registers
+ * Registers names must correspond to dsp_callee_regs structure fields names
+ * for automatic offset calculation in DSP_AUX_SAVE_RESTORE macros.
+ */
+#define ARC_AUX_DSP_BUILD	0x7A
+#define ARC_AUX_ACC0_LO		0x580
+#define ARC_AUX_ACC0_GLO	0x581
+#define ARC_AUX_ACC0_HI		0x582
+#define ARC_AUX_ACC0_GHI	0x583
+#define ARC_AUX_DSP_BFLY0	0x598
+#define ARC_AUX_DSP_CTRL	0x59F
+#define ARC_AUX_DSP_FFT_CTRL	0x59E
+
+#define ARC_AUX_AGU_BUILD	0xCC
+#define ARC_AUX_AGU_AP0		0x5C0
+#define ARC_AUX_AGU_AP1		0x5C1
+#define ARC_AUX_AGU_AP2		0x5C2
+#define ARC_AUX_AGU_AP3		0x5C3
+#define ARC_AUX_AGU_OS0		0x5D0
+#define ARC_AUX_AGU_OS1		0x5D1
+#define ARC_AUX_AGU_MOD0	0x5E0
+#define ARC_AUX_AGU_MOD1	0x5E1
+#define ARC_AUX_AGU_MOD2	0x5E2
+#define ARC_AUX_AGU_MOD3	0x5E3
+
+#ifndef __ASSEMBLY__
+
+#include <soc/arc/aux.h>
+
+/* Helpers */
+#define TO_KB(bytes)		((bytes) >> 10)
+#define TO_MB(bytes)		(TO_KB(bytes) >> 10)
+#define PAGES_TO_KB(n_pages)	((n_pages) << (PAGE_SHIFT - 10))
+#define PAGES_TO_MB(n_pages)	(PAGES_TO_KB(n_pages) >> 10)
+
+
+/*
+ ***************************************************************
+ * Build Configuration Registers, with encoded hardware config
+ */
+struct bcr_identity {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int chip_id:16, cpu_id:8, family:8;
+#else
+	unsigned int family:8, cpu_id:8, chip_id:16;
+#endif
+};
+
+struct bcr_isa_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1,
+		     pad1:12, ver:8;
+#else
+	unsigned int ver:8, pad1:12, be:1, atomic:1, unalign:1,
+		     ldd:1, pad2:4, div_rem:4;
+#endif
+};
+
+struct bcr_uarch_build_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:8, prod:8, maj:8, min:8;
+#else
+	unsigned int min:8, maj:8, prod:8, pad:8;
+#endif
+};
+
+struct bcr_mpy {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8;
+#else
+	unsigned int ver:8, type:2, cycles:2, dsp:4, x1616:8, pad:8;
+#endif
+};
+
+struct bcr_iccm_arcompact {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int base:16, pad:5, sz:3, ver:8;
+#else
+	unsigned int ver:8, sz:3, pad:5, base:16;
+#endif
+};
+
+struct bcr_iccm_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:8, sz11:4, sz01:4, sz10:4, sz00:4, ver:8;
+#else
+	unsigned int ver:8, sz00:4, sz10:4, sz01:4, sz11:4, pad:8;
+#endif
+};
+
+struct bcr_dccm_arcompact {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int res:21, sz:3, ver:8;
+#else
+	unsigned int ver:8, sz:3, res:21;
+#endif
+};
+
+struct bcr_dccm_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad2:12, cyc:3, pad1:1, sz1:4, sz0:4, ver:8;
+#else
+	unsigned int ver:8, sz0:4, sz1:4, pad1:1, cyc:3, pad2:12;
+#endif
+};
+
+/* ARCompact: Both SP and DP FPU BCRs have same format */
+struct bcr_fp_arcompact {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int fast:1, ver:8;
+#else
+	unsigned int ver:8, fast:1;
+#endif
+};
+
+struct bcr_fp_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad2:15, dp:1, pad1:7, sp:1, ver:8;
+#else
+	unsigned int ver:8, sp:1, pad1:7, dp:1, pad2:15;
+#endif
+};
+
+struct bcr_actionpoint {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:21, min:1, num:2, ver:8;
+#else
+	unsigned int ver:8, num:2, min:1, pad:21;
+#endif
+};
+
+#include <soc/arc/timers.h>
+
+struct bcr_bpu_arcompact {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad2:19, fam:1, pad:2, ent:2, ver:8;
+#else
+	unsigned int ver:8, ent:2, pad:2, fam:1, pad2:19;
+#endif
+};
+
+struct bcr_bpu_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:6, fbe:2, tqe:2, ts:4, ft:1, rse:2, pte:3, bce:3, ver:8;
+#else
+	unsigned int ver:8, bce:3, pte:3, rse:2, ft:1, ts:4, tqe:2, fbe:2, pad:6;
+#endif
+};
+
+/* Error Protection Build: ECC/Parity */
+struct bcr_erp {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad3:5, mmu:3, pad2:4, ic:3, dc:3, pad1:6, ver:8;
+#else
+	unsigned int ver:8, pad1:6, dc:3, ic:3, pad2:4, mmu:3, pad3:5;
+#endif
+};
+
+/* Error Protection Control */
+struct ctl_erp {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad2:27, mpd:1, pad1:2, dpd:1, dpi:1;
+#else
+	unsigned int dpi:1, dpd:1, pad1:2, mpd:1, pad2:27;
+#endif
+};
+
+struct bcr_lpb {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:16, entries:8, ver:8;
+#else
+	unsigned int ver:8, entries:8, pad:16;
+#endif
+};
+
+struct bcr_generic {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int info:24, ver:8;
+#else
+	unsigned int ver:8, info:24;
+#endif
+};
+
+/*
+ *******************************************************************
+ * Generic structures to hold build configuration used at runtime
+ */
+
+struct cpuinfo_arc_mmu {
+	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1;
+	unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
+};
+
+struct cpuinfo_arc_cache {
+	unsigned int sz_k:14, line_len:8, assoc:4, alias:1, vipt:1, pad:4;
+};
+
+struct cpuinfo_arc_bpu {
+	unsigned int ver, full, num_cache, num_pred, ret_stk;
+};
+
+struct cpuinfo_arc_ccm {
+	unsigned int base_addr, sz;
+};
+
+struct cpuinfo_arc {
+	struct cpuinfo_arc_cache icache, dcache, slc;
+	struct cpuinfo_arc_mmu mmu;
+	struct cpuinfo_arc_bpu bpu;
+	struct bcr_identity core;
+	struct bcr_isa_arcv2 isa;
+	const char *release, *name;
+	unsigned int vec_base;
+	struct cpuinfo_arc_ccm iccm, dccm;
+	struct {
+		unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
+			     fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
+			     ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
+			     timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
+	} extn;
+	struct bcr_mpy extn_mpy;
+};
+
+extern struct cpuinfo_arc cpuinfo_arc700[];
+
+static inline int is_isa_arcv2(void)
+{
+	return IS_ENABLED(CONFIG_ISA_ARCV2);
+}
+
+static inline int is_isa_arcompact(void)
+{
+	return IS_ENABLED(CONFIG_ISA_ARCOMPACT);
+}
+
+#endif /* __ASEMBLY__ */
+
+#endif /* _ASM_ARC_ARCREGS_H */
diff --git a/arch/arc/include/asm/asm-offsets.h b/arch/arc/include/asm/asm-offsets.h
new file mode 100644
index 0000000000..32a1d3d518
--- /dev/null
+++ b/arch/arc/include/asm/asm-offsets.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <generated/asm-offsets.h>
diff --git a/arch/arc/include/asm/asserts.h b/arch/arc/include/asm/asserts.h
new file mode 100644
index 0000000000..108f33be6a
--- /dev/null
+++ b/arch/arc/include/asm/asserts.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+#ifndef __ASM_ARC_ASSERTS_H
+#define __ASM_ARC_ASSERTS_H
+
+/* Helpers to sanitize config options. */
+
+void chk_opt_strict(char *opt_name, bool hw_exists, bool opt_ena);
+void chk_opt_weak(char *opt_name, bool hw_exists, bool opt_ena);
+
+/*
+ * Check required config option:
+ *  - panic in case of OPT enabled but corresponding HW absent.
+ *  - warn in case of OPT disabled but corresponding HW exists.
+*/
+#define CHK_OPT_STRICT(opt_name, hw_exists)				\
+({									\
+	chk_opt_strict(#opt_name, hw_exists, IS_ENABLED(opt_name));	\
+})
+
+/*
+ * Check optional config option:
+ *  - panic in case of OPT enabled but corresponding HW absent.
+*/
+#define CHK_OPT_WEAK(opt_name, hw_exists)				\
+({									\
+	chk_opt_weak(#opt_name, hw_exists, IS_ENABLED(opt_name));	\
+})
+
+#endif /* __ASM_ARC_ASSERTS_H */
diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
new file mode 100644
index 0000000000..088d348781
--- /dev/null
+++ b/arch/arc/include/asm/atomic-llsc.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_LLSC_H
+#define _ASM_ARC_ATOMIC_LLSC_H
+
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
+	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+}									\
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned int val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	return val;							\
+}
+
+#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned int val, orig;						\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[orig], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"	bnz     1b				\n"		\
+	: [val]	"=&r"	(val),						\
+	  [orig] "=&r" (orig)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	return orig;							\
+}
+
+#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
new file mode 100644
index 0000000000..2c830347bf
--- /dev/null
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
+#define _ASM_ARC_ATOMIC_SPLOCK_H
+
+/*
+ * Non hardware assisted Atomic-R-M-W
+ * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ */
+
+static inline void arch_atomic_set(atomic_t *v, int i)
+{
+	/*
+	 * Independent of hardware support, all of the atomic_xxx() APIs need
+	 * to follow the same locking rules to make sure that a "hardware"
+	 * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
+	 * sequence
+	 *
+	 * Thus atomic_set() despite being 1 insn (and seemingly atomic)
+	 * requires the locking.
+	 */
+	unsigned long flags;
+
+	atomic_ops_lock(flags);
+	WRITE_ONCE(v->counter, i);
+	atomic_ops_unlock(flags);
+}
+
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+									\
+	atomic_ops_lock(flags);						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+}
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned int temp;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	temp = v->counter;						\
+	temp c_op i;							\
+	v->counter = temp;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	unsigned int orig;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	orig = v->counter;						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return orig;							\
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot		arch_atomic_andnot
+#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
new file mode 100644
index 0000000000..52ee51e1ff
--- /dev/null
+++ b/arch/arc/include/asm/atomic.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_ATOMIC_H
+#define _ASM_ARC_ATOMIC_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/cmpxchg.h>
+#include <asm/barrier.h>
+#include <asm/smp.h>
+
+#define arch_atomic_read(v)  READ_ONCE((v)->counter)
+
+#ifdef CONFIG_ARC_HAS_LLSC
+#include <asm/atomic-llsc.h>
+#else
+#include <asm/atomic-spinlock.h>
+#endif
+
+#define arch_atomic_cmpxchg(v, o, n)					\
+({									\
+	arch_cmpxchg(&((v)->counter), (o), (n));			\
+})
+
+#ifdef arch_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_relaxed(v, o, n)				\
+({									\
+	arch_cmpxchg_relaxed(&((v)->counter), (o), (n));		\
+})
+#endif
+
+#define arch_atomic_xchg(v, n)						\
+({									\
+	arch_xchg(&((v)->counter), (n));				\
+})
+
+#ifdef arch_xchg_relaxed
+#define arch_atomic_xchg_relaxed(v, n)					\
+({									\
+	arch_xchg_relaxed(&((v)->counter), (n));			\
+})
+#endif
+
+/*
+ * 64-bit atomics
+ */
+#ifdef CONFIG_GENERIC_ATOMIC64
+#include <asm-generic/atomic64.h>
+#else
+#include <asm/atomic64-arcv2.h>
+#endif
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
new file mode 100644
index 0000000000..c5a8010fdc
--- /dev/null
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ *  - The address HAS to be 64-bit aligned
+ */
+
+#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
+#define _ASM_ARC_ATOMIC64_ARCV2_H
+
+typedef struct {
+	s64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(a) { (a) }
+
+static inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+	s64 val;
+
+	__asm__ __volatile__(
+	"	ldd   %0, [%1]	\n"
+	: "=r"(val)
+	: "r"(&v->counter));
+
+	return val;
+}
+
+static inline void arch_atomic64_set(atomic64_t *v, s64 a)
+{
+	/*
+	 * This could have been a simple assignment in "C" but would need
+	 * explicit volatile. Otherwise gcc optimizers could elide the store
+	 * which borked atomic64 self-test
+	 * In the inline asm version, memory clobber needed for exact same
+	 * reason, to tell gcc about the store.
+	 *
+	 * This however is not needed for sibling atomic64_add() etc since both
+	 * load/store are explicitly done in inline asm. As long as API is used
+	 * for each access, gcc has no way to optimize away any load/store
+	 */
+	__asm__ __volatile__(
+	"	std   %0, [%1]	\n"
+	:
+	: "r"(a), "r"(&v->counter)
+	: "memory");
+}
+
+#define ATOMIC64_OP(op, op1, op2)					\
+static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd  %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(val)							\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");							\
+}									\
+
+#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
+static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)	\
+{									\
+	s64 val;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: [val] "=&r"(val)						\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	return val;							\
+}
+
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+
+#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
+static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)	\
+{									\
+	s64 val, orig;							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%2]	\n"				\
+	"	" #op1 " %L1, %L0, %L3	\n"				\
+	"	" #op2 " %H1, %H0, %H3	\n"				\
+	"	scondd   %1, [%2]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(orig), "=&r"(val)					\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	return orig;							\
+}
+
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
+
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
+
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+
+#define arch_atomic64_andnot		arch_atomic64_andnot
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline s64
+arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	brne    %L0, %L2, 2f	\n"
+	"	brne    %H0, %H2, 2f	\n"
+	"	scondd  %3, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "ir"(expected), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
+{
+	s64 prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	scondd  %2, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+/**
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+	s64 val;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
+	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
+	"	brlt    %H0, 0, 2f	\n"
+	"	scondd  %0, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(val)
+	: "r"(&v->counter)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return val;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+/**
+ * arch_atomic64_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if it was not @u.
+ * Returns the old value of @v
+ */
+static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	s64 old, temp;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%2]	\n"
+	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
+	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
+	"2:				\n"
+	"	add.f   %L1, %L0, %L3	\n"
+	"	adc     %H1, %H0, %H3	\n"
+	"	scondd  %1, [%2]	\n"
+	"	bnz     1b		\n"
+	"3:				\n"
+	: "=&r"(old), "=&r" (temp)
+	: "r"(&v->counter), "r"(a), "r"(u)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return old;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+
+#endif
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
new file mode 100644
index 0000000000..4637de9e02
--- /dev/null
+++ b/arch/arc/include/asm/barrier.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#ifdef CONFIG_ISA_ARCV2
+
+/*
+ * ARCv2 based HS38 cores are in-order issue, but still weakly ordered
+ * due to micro-arch buffering/queuing of load/store, cache hit vs. miss ...
+ *
+ * Explicit barrier provided by DMB instruction
+ *  - Operand supports fine grained load/store/load+store semantics
+ *  - Ensures that selected memory operation issued before it will complete
+ *    before any subsequent memory operation of same type
+ *  - DMB guarantees SMP as well as local barrier semantics
+ *    (asm-generic/barrier.h ensures sane smp_*mb if not defined here, i.e.
+ *    UP: barrier(), SMP: smp_*mb == *mb)
+ *  - DSYNC provides DMB+completion_of_cache_bpu_maintenance_ops hence not needed
+ *    in the general case. Plus it only provides full barrier.
+ */
+
+#define mb()	asm volatile("dmb 3\n" : : : "memory")
+#define rmb()	asm volatile("dmb 1\n" : : : "memory")
+#define wmb()	asm volatile("dmb 2\n" : : : "memory")
+
+#else
+
+/*
+ * ARCompact based cores (ARC700) only have SYNC instruction which is super
+ * heavy weight as it flushes the pipeline as well.
+ * There are no real SMP implementations of such cores.
+ */
+
+#define mb()	asm volatile("sync\n" : : : "memory")
+
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
new file mode 100644
index 0000000000..bdb7e190a2
--- /dev/null
+++ b/arch/arc/include/asm/bitops.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_BITOPS_H
+#define _ASM_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+/*
+ * Count the number of zeros, starting from MSB
+ * Helper for fls( ) friends
+ * This is a pure count, so (1-32) or (0-31) doesn't apply
+ * It could be 0 to 32, based on num of 0's in there
+ * clz(0x8000_0000) = 0, clz(0xFFFF_FFFF)=0, clz(0) = 32, clz(1) = 31
+ */
+static inline __attribute__ ((const)) int clz(unsigned int x)
+{
+	unsigned int res;
+
+	__asm__ __volatile__(
+	"	norm.f  %0, %1		\n"
+	"	mov.n   %0, 0		\n"
+	"	add.p   %0, %0, 1	\n"
+	: "=r"(res)
+	: "r"(x)
+	: "cc");
+
+	return res;
+}
+
+static inline int constant_fls(unsigned int x)
+{
+	int r = 32;
+
+	if (!x)
+		return 0;
+	if (!(x & 0xffff0000u)) {
+		x <<= 16;
+		r -= 16;
+	}
+	if (!(x & 0xff000000u)) {
+		x <<= 8;
+		r -= 8;
+	}
+	if (!(x & 0xf0000000u)) {
+		x <<= 4;
+		r -= 4;
+	}
+	if (!(x & 0xc0000000u)) {
+		x <<= 2;
+		r -= 2;
+	}
+	if (!(x & 0x80000000u))
+		r -= 1;
+	return r;
+}
+
+/*
+ * fls = Find Last Set in word
+ * @result: [1-32]
+ * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
+ */
+static inline __attribute__ ((const)) int fls(unsigned int x)
+{
+	if (__builtin_constant_p(x))
+	       return constant_fls(x);
+
+	return 32 - clz(x);
+}
+
+/*
+ * __fls: Similar to fls, but zero based (0-31)
+ */
+static inline __attribute__ ((const)) int __fls(unsigned long x)
+{
+	if (!x)
+		return 0;
+	else
+		return fls(x) - 1;
+}
+
+/*
+ * ffs = Find First Set in word (LSB to MSB)
+ * @result: [1-32], 0 if all 0's
+ */
+#define ffs(x)	({ unsigned long __t = (x); fls(__t & -__t); })
+
+/*
+ * __ffs: Similar to ffs, but zero based (0-31)
+ */
+static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
+{
+	if (!word)
+		return word;
+
+	return ffs(word) - 1;
+}
+
+#else	/* CONFIG_ISA_ARCV2 */
+
+/*
+ * fls = Find Last Set in word
+ * @result: [1-32]
+ * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
+ */
+static inline __attribute__ ((const)) int fls(unsigned int x)
+{
+	int n;
+
+	asm volatile(
+	"	fls.f	%0, %1		\n"  /* 0:31; 0(Z) if src 0 */
+	"	add.nz	%0, %0, 1	\n"  /* 0:31 -> 1:32 */
+	: "=r"(n)	/* Early clobber not needed */
+	: "r"(x)
+	: "cc");
+
+	return n;
+}
+
+/*
+ * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set
+ */
+static inline __attribute__ ((const)) int __fls(unsigned long x)
+{
+	/* FLS insn has exactly same semantics as the API */
+	return	__builtin_arc_fls(x);
+}
+
+/*
+ * ffs = Find First Set in word (LSB to MSB)
+ * @result: [1-32], 0 if all 0's
+ */
+static inline __attribute__ ((const)) int ffs(unsigned int x)
+{
+	int n;
+
+	asm volatile(
+	"	ffs.f	%0, %1		\n"  /* 0:31; 31(Z) if src 0 */
+	"	add.nz	%0, %0, 1	\n"  /* 0:31 -> 1:32 */
+	"	mov.z	%0, 0		\n"  /* 31(Z)-> 0 */
+	: "=r"(n)	/* Early clobber not needed */
+	: "r"(x)
+	: "cc");
+
+	return n;
+}
+
+/*
+ * __ffs: Similar to ffs, but zero based (0-31)
+ */
+static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
+{
+	unsigned long n;
+
+	asm volatile(
+	"	ffs.f	%0, %1		\n"  /* 0:31; 31(Z) if src 0 */
+	"	mov.z	%0, 0		\n"  /* 31(Z)-> 0 */
+	: "=r"(n)
+	: "r"(x)
+	: "cc");
+
+	return n;
+
+}
+
+#endif	/* CONFIG_ISA_ARCOMPACT */
+
+/*
+ * ffz = Find First Zero in word.
+ * @return:[0-31], 32 if all 1's
+ */
+#define ffz(x)	__ffs(~(x))
+
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
+
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
new file mode 100644
index 0000000000..4c453ba96c
--- /dev/null
+++ b/arch/arc/include/asm/bug.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_BUG_H
+#define _ASM_ARC_BUG_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/ptrace.h>
+
+struct task_struct;
+
+void show_regs(struct pt_regs *regs);
+void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs,
+		     const char *loglvl);
+void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
+			    unsigned long address);
+void die(const char *str, struct pt_regs *regs, unsigned long address);
+
+#define BUG()	do {								\
+	pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
+	barrier_before_unreachable();						\
+	__builtin_trap();							\
+} while (0)
+
+#define HAVE_ARCH_BUG
+
+#include <asm-generic/bug.h>
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
new file mode 100644
index 0000000000..f0f1fc5d62
--- /dev/null
+++ b/arch/arc/include/asm/cache.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ARC_ASM_CACHE_H
+#define __ARC_ASM_CACHE_H
+
+/* In case $$ not config, setup a dummy number for rest of kernel */
+#ifndef CONFIG_ARC_CACHE_LINE_SHIFT
+#define L1_CACHE_SHIFT		6
+#else
+#define L1_CACHE_SHIFT		CONFIG_ARC_CACHE_LINE_SHIFT
+#endif
+
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
+
+/*
+ * ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF)
+ * Ideal for wiring memory mapped peripherals as we don't need to do
+ * explicit uncached accesses (LD.di/ST.di) hence more portable drivers
+ */
+#define ARC_UNCACHED_ADDR_SPACE	0xc0000000
+
+#ifndef __ASSEMBLY__
+
+#include <linux/build_bug.h>
+
+/* Uncached access macros */
+#define arc_read_uncached_32(ptr)	\
+({					\
+	unsigned int __ret;		\
+	__asm__ __volatile__(		\
+	"	ld.di %0, [%1]	\n"	\
+	: "=r"(__ret)			\
+	: "r"(ptr));			\
+	__ret;				\
+})
+
+#define arc_write_uncached_32(ptr, data)\
+({					\
+	__asm__ __volatile__(		\
+	"	st.di %0, [%1]	\n"	\
+	:				\
+	: "r"(data), "r"(ptr));		\
+})
+
+/* Largest line length for either L1 or L2 is 128 bytes */
+#define SMP_CACHE_BYTES		128
+#define cache_line_size()	SMP_CACHE_BYTES
+#define ARCH_DMA_MINALIGN	SMP_CACHE_BYTES
+
+/*
+ * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
+ * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
+ * alignment for any atomic64_t embedded in buffer.
+ * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
+ * value of 4 (and not 8) in ARC ABI.
+ */
+#if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
+#define ARCH_SLAB_MINALIGN	8
+#endif
+
+extern int ioc_enable;
+extern unsigned long perip_base, perip_end;
+
+#endif	/* !__ASSEMBLY__ */
+
+/* Instruction cache related Auxiliary registers */
+#define ARC_REG_IC_BCR		0x77	/* Build Config reg */
+#define ARC_REG_IC_IVIC		0x10
+#define ARC_REG_IC_CTRL		0x11
+#define ARC_REG_IC_IVIR		0x16
+#define ARC_REG_IC_ENDR		0x17
+#define ARC_REG_IC_IVIL		0x19
+#define ARC_REG_IC_PTAG		0x1E
+#define ARC_REG_IC_PTAG_HI	0x1F
+
+/* Bit val in IC_CTRL */
+#define IC_CTRL_DIS		0x1
+
+/* Data cache related Auxiliary registers */
+#define ARC_REG_DC_BCR		0x72	/* Build Config reg */
+#define ARC_REG_DC_IVDC		0x47
+#define ARC_REG_DC_CTRL		0x48
+#define ARC_REG_DC_IVDL		0x4A
+#define ARC_REG_DC_FLSH		0x4B
+#define ARC_REG_DC_FLDL		0x4C
+#define ARC_REG_DC_STARTR	0x4D
+#define ARC_REG_DC_ENDR		0x4E
+#define ARC_REG_DC_PTAG		0x5C
+#define ARC_REG_DC_PTAG_HI	0x5F
+
+/* Bit val in DC_CTRL */
+#define DC_CTRL_DIS		0x001
+#define DC_CTRL_INV_MODE_FLUSH	0x040
+#define DC_CTRL_FLUSH_STATUS	0x100
+#define DC_CTRL_RGN_OP_INV	0x200
+#define DC_CTRL_RGN_OP_MSK	0x200
+
+/*System-level cache (L2 cache) related Auxiliary registers */
+#define ARC_REG_SLC_CFG		0x901
+#define ARC_REG_SLC_CTRL	0x903
+#define ARC_REG_SLC_FLUSH	0x904
+#define ARC_REG_SLC_INVALIDATE	0x905
+#define ARC_AUX_SLC_IVDL	0x910
+#define ARC_AUX_SLC_FLDL	0x912
+#define ARC_REG_SLC_RGN_START	0x914
+#define ARC_REG_SLC_RGN_START1	0x915
+#define ARC_REG_SLC_RGN_END	0x916
+#define ARC_REG_SLC_RGN_END1	0x917
+
+/* Bit val in SLC_CONTROL */
+#define SLC_CTRL_DIS		0x001
+#define SLC_CTRL_IM		0x040
+#define SLC_CTRL_BUSY		0x100
+#define SLC_CTRL_RGN_OP_INV	0x200
+
+/* IO coherency related Auxiliary registers */
+#define ARC_REG_IO_COH_ENABLE	0x500
+#define ARC_IO_COH_ENABLE_BIT	BIT(0)
+#define ARC_REG_IO_COH_PARTIAL	0x501
+#define ARC_IO_COH_PARTIAL_BIT	BIT(0)
+#define ARC_REG_IO_COH_AP0_BASE	0x508
+#define ARC_REG_IO_COH_AP0_SIZE	0x509
+
+#endif /* _ASM_CACHE_H */
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
new file mode 100644
index 0000000000..e201b4b165
--- /dev/null
+++ b/arch/arc/include/asm/cacheflush.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ *  vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
+ *   -flush_cache_dup_mm (fork)
+ *   -likewise for flush_cache_mm (exit/execve)
+ *   -likewise for flush_cache_{range,page} (munmap, exit, COW-break)
+ *
+ *  vineetg: April 2008
+ *   -Added a critical CacheLine flush to copy_to_user_page( ) which
+ *     was causing gdbserver to not setup breakpoints consistently
+ */
+
+#ifndef _ASM_CACHEFLUSH_H
+#define _ASM_CACHEFLUSH_H
+
+#include <linux/mm.h>
+#include <asm/shmparam.h>
+
+/*
+ * Semantically we need this because icache doesn't snoop dcache/dma.
+ * However ARC Cache flush requires paddr as well as vaddr, latter not available
+ * in the flush_icache_page() API. So we no-op it but do the equivalent work
+ * in update_mmu_cache()
+ */
+#define flush_icache_page(vma, page)
+
+void flush_cache_all(void);
+
+void flush_icache_range(unsigned long kstart, unsigned long kend);
+void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len);
+void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr);
+void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+
+void flush_dcache_page(struct page *page);
+
+void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
+void dma_cache_inv(phys_addr_t start, unsigned long sz);
+void dma_cache_wback(phys_addr_t start, unsigned long sz);
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+/* TBD: optimize this */
+#define flush_cache_vmap(start, end)		flush_cache_all()
+#define flush_cache_vunmap(start, end)		flush_cache_all()
+
+#define flush_cache_dup_mm(mm)			/* called on fork (VIVT only) */
+
+#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING
+
+#define flush_cache_mm(mm)			/* called on munmap/exit */
+#define flush_cache_range(mm, u_vstart, u_vend)
+#define flush_cache_page(vma, u_vaddr, pfn)	/* PF handling/COW-break */
+
+#else	/* VIPT aliasing dcache */
+
+/* To clear out stale userspace mappings */
+void flush_cache_mm(struct mm_struct *mm);
+void flush_cache_range(struct vm_area_struct *vma,
+	unsigned long start,unsigned long end);
+void flush_cache_page(struct vm_area_struct *vma,
+	unsigned long user_addr, unsigned long page);
+
+/*
+ * To make sure that userspace mapping is flushed to memory before
+ * get_user_pages() uses a kernel mapping to access the page
+ */
+#define ARCH_HAS_FLUSH_ANON_PAGE
+void flush_anon_page(struct vm_area_struct *vma,
+	struct page *page, unsigned long u_vaddr);
+
+#endif	/* CONFIG_ARC_CACHE_VIPT_ALIASING */
+
+/*
+ * A new pagecache page has PG_arch_1 clear - thus dcache dirty by default
+ * This works around some PIO based drivers which don't call flush_dcache_page
+ * to record that they dirtied the dcache
+ */
+#define PG_dc_clean	PG_arch_1
+
+#define CACHE_COLORS_NUM	4
+#define CACHE_COLORS_MSK	(CACHE_COLORS_NUM - 1)
+#define CACHE_COLOR(addr)	(((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK)
+
+/*
+ * Simple wrapper over config option
+ * Bootup code ensures that hardware matches kernel configuration
+ */
+static inline int cache_is_vipt_aliasing(void)
+{
+	return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+}
+
+/*
+ * checks if two addresses (after page aligning) index into same cache set
+ */
+#define addr_not_cache_congruent(addr1, addr2)				\
+({									\
+	cache_is_vipt_aliasing() ? 					\
+		(CACHE_COLOR(addr1) != CACHE_COLOR(addr2)) : 0;		\
+})
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len)		\
+do {									\
+	memcpy(dst, src, len);						\
+	if (vma->vm_flags & VM_EXEC)					\
+		__sync_icache_dcache((unsigned long)(dst), vaddr, len);	\
+} while (0)
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len)		\
+	memcpy(dst, src, len);						\
+
+#endif
diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h
new file mode 100644
index 0000000000..0b485800a3
--- /dev/null
+++ b/arch/arc/include/asm/checksum.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Joern Rennecke  <joern.rennecke@embecosm.com>: Jan 2012
+ *  -Insn Scheduling improvements to csum core routines.
+ *      = csum_fold( ) largely derived from ARM version.
+ *      = ip_fast_cum( ) to have module scheduling
+ *  -gcc 4.4.x broke networking. Alias analysis needed to be primed.
+ *   worked around by adding memory clobber to ip_fast_csum( )
+ *
+ * vineetg: May 2010
+ *  -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm
+ */
+
+#ifndef _ASM_ARC_CHECKSUM_H
+#define _ASM_ARC_CHECKSUM_H
+
+/*
+ *	Fold a partial checksum
+ *
+ *  The 2 swords comprising the 32bit sum are added, any carry to 16th bit
+ *  added back and final sword result inverted.
+ */
+static inline __sum16 csum_fold(__wsum s)
+{
+	unsigned int r = s << 16 | s >> 16;	/* ror */
+	s = ~s;
+	s -= r;
+	return s >> 16;
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+static inline __sum16
+ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	const void *ptr = iph;
+	unsigned int tmp, tmp2, sum;
+
+	__asm__(
+	"	ld.ab  %0, [%3, 4]		\n"
+	"	ld.ab  %2, [%3, 4]		\n"
+	"	sub    %1, %4, 2		\n"
+	"	lsr.f  lp_count, %1, 1		\n"
+	"	bcc    0f			\n"
+	"	add.f  %0, %0, %2		\n"
+	"	ld.ab  %2, [%3, 4]		\n"
+	"0:	lp     1f			\n"
+	"	ld.ab  %1, [%3, 4]		\n"
+	"	adc.f  %0, %0, %2		\n"
+	"	ld.ab  %2, [%3, 4]		\n"
+	"	adc.f  %0, %0, %1		\n"
+	"1:	adc.f  %0, %0, %2		\n"
+	"	add.cs %0,%0,1			\n"
+	: "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr)
+	: "r"(ihl)
+	: "cc", "lp_count", "memory");
+
+	return csum_fold(sum);
+}
+
+/*
+ * TCP pseudo Header is 12 bytes:
+ * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2]
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+		   __u8 proto, __wsum sum)
+{
+	__asm__ __volatile__(
+	"	add.f %0, %0, %1	\n"
+	"	adc.f %0, %0, %2	\n"
+	"	adc.f %0, %0, %3	\n"
+	"	adc.f %0, %0, %4	\n"
+	"	adc   %0, %0, 0		\n"
+	: "+&r"(sum)
+	: "r"(saddr), "r"(daddr),
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	  "r"(len),
+#else
+	  "r"(len << 8),
+#endif
+	  "r"(htons(proto))
+	: "cc");
+
+	return sum;
+}
+
+#define csum_fold csum_fold
+#define ip_fast_csum ip_fast_csum
+#define csum_tcpudp_nofold csum_tcpudp_nofold
+
+#include <asm-generic/checksum.h>
+
+#endif /* _ASM_ARC_CHECKSUM_H */
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
new file mode 100644
index 0000000000..c5b544a5fe
--- /dev/null
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_ARC_CMPXCHG_H
+#define __ASM_ARC_CMPXCHG_H
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_ARC_HAS_LLSC
+
+/*
+ * if (*ptr == @old)
+ *      *ptr = @new
+ */
+#define __cmpxchg(ptr, old, new)					\
+({									\
+	__typeof__(*(ptr)) _prev;					\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock  %0, [%1]	\n"					\
+	"	brne   %0, %2, 2f	\n"				\
+	"	scond  %3, [%1]	\n"					\
+	"	bnz     1b		\n"				\
+	"2:				\n"				\
+	: "=&r"(_prev)	/* Early clobber prevent reg reuse */		\
+	: "r"(ptr),	/* Not "m": llock only supports reg */		\
+	  "ir"(old),							\
+	  "r"(new)	/* Not "ir": scond can't take LIMM */		\
+	: "cc",								\
+	  "memory");	/* gcc knows memory is clobbered */		\
+									\
+	_prev;								\
+})
+
+#define arch_cmpxchg_relaxed(ptr, old, new)				\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+									\
+	switch(sizeof((_p_))) {						\
+	case 4:								\
+		_prev_ = __cmpxchg(_p_, _o_, _n_);			\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_prev_;								\
+})
+
+#else
+
+#define arch_cmpxchg(ptr, old, new)				        \
+({									\
+	volatile __typeof__(ptr) _p_ = (ptr);				\
+	__typeof__(*(ptr)) _o_ = (old);					\
+	__typeof__(*(ptr)) _n_ = (new);					\
+	__typeof__(*(ptr)) _prev_;					\
+	unsigned long __flags;						\
+									\
+	BUILD_BUG_ON(sizeof(_p_) != 4);					\
+									\
+	/*								\
+	 * spin lock/unlock provide the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(__flags);					\
+	_prev_ = *_p_;							\
+	if (_prev_ == _o_)						\
+		*_p_ = _n_;						\
+	atomic_ops_unlock(__flags);					\
+	_prev_;								\
+})
+
+#endif
+
+/*
+ * xchg
+ */
+#ifdef CONFIG_ARC_HAS_LLSC
+
+#define __xchg(ptr, val)						\
+({									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"	/* set new value */	        \
+	: "+r"(val)							\
+	: "r"(ptr)							\
+	: "memory");							\
+	_val_;		/* get old value */				\
+})
+
+#define arch_xchg_relaxed(ptr, val)					\
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	switch(sizeof(*(_p_))) {					\
+	case 4:								\
+		_val_ = __xchg(_p_, _val_);				\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	_val_;								\
+})
+
+#else  /* !CONFIG_ARC_HAS_LLSC */
+
+/*
+ * EX instructions is baseline and present in !LLSC too. But in this
+ * regime it still needs use @atomic_ops_lock spinlock to allow interop
+ * with cmpxchg() which uses spinlock in !LLSC
+ * (llist.h use xchg and cmpxchg on sama data)
+ */
+
+#define arch_xchg(ptr, val)					        \
+({									\
+	__typeof__(ptr) _p_ = (ptr);					\
+	__typeof__(*(ptr)) _val_ = (val);				\
+									\
+	unsigned long __flags;						\
+									\
+	atomic_ops_lock(__flags);					\
+									\
+	__asm__ __volatile__(						\
+	"	ex  %0, [%1]	\n"					\
+	: "+r"(_val_)							\
+	: "r"(_p_)							\
+	: "memory");							\
+									\
+	atomic_ops_unlock(__flags);					\
+	_val_;								\
+})
+
+#endif
+
+#endif
diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h
new file mode 100644
index 0000000000..9b9bdd3e65
--- /dev/null
+++ b/arch/arc/include/asm/current.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Vineetg: May 16th, 2008
+ *  - Current macro is now implemented as "global register" r25
+ */
+
+#ifndef _ASM_ARC_CURRENT_H
+#define _ASM_ARC_CURRENT_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+
+register struct task_struct *curr_arc asm("r25");
+#define current (curr_arc)
+
+#else
+#include <asm-generic/current.h>
+#endif /* ! CONFIG_ARC_CURR_IN_REG */
+
+#endif /* ! __ASSEMBLY__ */
+
+#endif /* _ASM_ARC_CURRENT_H */
diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
new file mode 100644
index 0000000000..54db798f0a
--- /dev/null
+++ b/arch/arc/include/asm/delay.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Delay routines using pre computed loops_per_jiffy value.
+ *
+ * vineetg: Feb 2012
+ *  -Rewrote in "C" to avoid dealing with availability of H/w MPY
+ *  -Also reduced the num of MPY operations from 3 to 2
+ *
+ * Amit Bhor: Codito Technologies 2004
+ */
+
+#ifndef __ASM_ARC_UDELAY_H
+#define __ASM_ARC_UDELAY_H
+
+#include <asm-generic/types.h>
+#include <asm/param.h>		/* HZ */
+
+extern unsigned long loops_per_jiffy;
+
+static inline void __delay(unsigned long loops)
+{
+	__asm__ __volatile__(
+	"	mov lp_count, %0	\n"
+	"	lp  1f			\n"
+	"	nop			\n"
+	"1:				\n"
+	:
+        : "r"(loops)
+        : "lp_count");
+}
+
+extern void __bad_udelay(void);
+
+/*
+ * Normal Math for computing loops in "N" usecs
+ *  -we have precomputed @loops_per_jiffy
+ *  -1 sec has HZ jiffies
+ * loops per "N" usecs = ((loops_per_jiffy * HZ / 1000000) * N)
+ *
+ * Approximate Division by multiplication:
+ *  -Mathematically if we multiply and divide a number by same value the
+ *   result remains unchanged:  In this case, we use 2^32
+ *  -> (loops_per_N_usec * 2^32 ) / 2^32
+ *  -> (((loops_per_jiffy * HZ / 1000000) * N) * 2^32) / 2^32
+ *  -> (loops_per_jiffy * HZ * N * 4295) / 2^32
+ *
+ *  -Divide by 2^32 is very simply right shift by 32
+ *  -We simply need to ensure that the multiply per above eqn happens in
+ *   64-bit precision (if CPU doesn't support it - gcc can emaulate it)
+ */
+
+static inline void __udelay(unsigned long usecs)
+{
+	unsigned long loops;
+
+	/* (u64) cast ensures 64 bit MPY - real or emulated
+	 * HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops
+	 */
+	loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
+
+	__delay(loops);
+}
+
+#define udelay(n) (__builtin_constant_p(n) ? ((n) > 20000 ? __bad_udelay() \
+				: __udelay(n)) : __udelay(n))
+
+#endif /* __ASM_ARC_UDELAY_H */
diff --git a/arch/arc/include/asm/disasm.h b/arch/arc/include/asm/disasm.h
new file mode 100644
index 0000000000..61fb4d7aff
--- /dev/null
+++ b/arch/arc/include/asm/disasm.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * several functions that help interpret ARC instructions
+ * used for unaligned accesses, kprobes and kgdb
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ARC_DISASM_H__
+#define __ARC_DISASM_H__
+
+enum {
+	op_Bcc = 0, op_BLcc = 1, op_LD = 2, op_ST = 3, op_MAJOR_4 = 4,
+	op_MAJOR_5 = 5, op_LD_ADD = 12, op_ADD_SUB_SHIFT = 13,
+	op_ADD_MOV_CMP = 14, op_S = 15, op_LD_S = 16, op_LDB_S = 17,
+	op_LDW_S = 18, op_LDWX_S = 19, op_ST_S = 20, op_STB_S = 21,
+	op_STW_S = 22, op_Su5 = 23, op_SP = 24, op_GP = 25,
+	op_Pcl = 26, op_MOV_S = 27, op_ADD_CMP = 28, op_BR_S = 29,
+	op_B_S = 30, op_BL_S = 31
+};
+
+enum flow {
+	noflow,
+	direct_jump,
+	direct_call,
+	indirect_jump,
+	indirect_call,
+	invalid_instr
+};
+
+#define IS_BIT(word, n)		((word) & (1<<n))
+#define BITS(word, s, e)	(((word) >> (s)) & (~((-2) << ((e) - (s)))))
+
+#define MAJOR_OPCODE(word)	(BITS((word), 27, 31))
+#define MINOR_OPCODE(word)	(BITS((word), 16, 21))
+#define FIELD_A(word)		(BITS((word), 0, 5))
+#define FIELD_B(word)		((BITS((word), 12, 14)<<3) | \
+				(BITS((word), 24, 26)))
+#define FIELD_C(word)		(BITS((word), 6, 11))
+#define FIELD_u6(word)		FIELDC(word)
+#define FIELD_s12(word)		sign_extend(((BITS((word), 0, 5) << 6) | \
+					BITS((word), 6, 11)), 12)
+
+/* note that for BL/BRcc these two macro's need another AND statement to mask
+ * out bit 1 (make the result a multiple of 4) */
+#define FIELD_s9(word)		sign_extend(((BITS(word, 15, 15) << 8) | \
+					BITS(word, 16, 23)), 9)
+#define FIELD_s21(word)		sign_extend(((BITS(word, 6, 15) << 11) | \
+					(BITS(word, 17, 26) << 1)), 12)
+#define FIELD_s25(word)		sign_extend(((BITS(word, 0, 3) << 21) | \
+					(BITS(word, 6, 15) << 11) | \
+					(BITS(word, 17, 26) << 1)), 12)
+
+/* note: these operate on 16 bits! */
+#define FIELD_S_A(word)		((BITS((word), 2, 2)<<3) | BITS((word), 0, 2))
+#define FIELD_S_B(word)		((BITS((word), 10, 10)<<3) | \
+				BITS((word), 8, 10))
+#define FIELD_S_C(word)		((BITS((word), 7, 7)<<3) | BITS((word), 5, 7))
+#define FIELD_S_H(word)		((BITS((word), 0, 2)<<3) | BITS((word), 5, 8))
+#define FIELD_S_u5(word)	(BITS((word), 0, 4))
+#define FIELD_S_u6(word)	(BITS((word), 0, 4) << 1)
+#define FIELD_S_u7(word)	(BITS((word), 0, 4) << 2)
+#define FIELD_S_u10(word)	(BITS((word), 0, 7) << 2)
+#define FIELD_S_s7(word)	sign_extend(BITS((word), 0, 5) << 1, 9)
+#define FIELD_S_s8(word)	sign_extend(BITS((word), 0, 7) << 1, 9)
+#define FIELD_S_s9(word)	sign_extend(BITS((word), 0, 8), 9)
+#define FIELD_S_s10(word)	sign_extend(BITS((word), 0, 8) << 1, 10)
+#define FIELD_S_s11(word)	sign_extend(BITS((word), 0, 8) << 2, 11)
+#define FIELD_S_s13(word)	sign_extend(BITS((word), 0, 10) << 2, 13)
+
+#define STATUS32_L		0x00000100
+#define REG_LIMM		62
+
+struct disasm_state {
+	/* generic info */
+	unsigned long words[2];
+	int instr_len;
+	int major_opcode;
+	/* info for branch/jump */
+	int is_branch;
+	int target;
+	int delay_slot;
+	enum flow flow;
+	/* info for load/store */
+	int src1, src2, src3, dest, wb_reg;
+	int zz, aa, x, pref, di;
+	int fault, write;
+};
+
+static inline int sign_extend(int value, int bits)
+{
+	if (IS_BIT(value, (bits - 1)))
+		value |= (0xffffffff << bits);
+
+	return value;
+}
+
+static inline int is_short_instr(unsigned long addr)
+{
+	uint16_t word = *((uint16_t *)addr);
+	int opcode = (word >> 11) & 0x1F;
+	return (opcode >= 0x0B);
+}
+
+void disasm_instr(unsigned long addr, struct disasm_state *state,
+	int userspace, struct pt_regs *regs, struct callee_regs *cregs);
+int disasm_next_pc(unsigned long pc, struct pt_regs *regs, struct callee_regs
+	*cregs, unsigned long *fall_thru, unsigned long *target);
+long get_reg(int reg, struct pt_regs *regs, struct callee_regs *cregs);
+void set_reg(int reg, long val, struct pt_regs *regs,
+		struct callee_regs *cregs);
+
+#endif	/* __ARC_DISASM_H__ */
diff --git a/arch/arc/include/asm/dma.h b/arch/arc/include/asm/dma.h
new file mode 100644
index 0000000000..5b744f4b10
--- /dev/null
+++ b/arch/arc/include/asm/dma.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef ASM_ARC_DMA_H
+#define ASM_ARC_DMA_H
+
+#define MAX_DMA_ADDRESS 0xC0000000
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy	0
+#endif
+
+#endif
diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h
new file mode 100644
index 0000000000..cd5636dfeb
--- /dev/null
+++ b/arch/arc/include/asm/dsp-impl.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+#ifndef __ASM_ARC_DSP_IMPL_H
+#define __ASM_ARC_DSP_IMPL_H
+
+#include <asm/dsp.h>
+
+#define DSP_CTRL_DISABLED_ALL		0
+
+#ifdef __ASSEMBLY__
+
+/* clobbers r5 register */
+.macro DSP_EARLY_INIT
+#ifdef CONFIG_ISA_ARCV2
+	lr	r5, [ARC_AUX_DSP_BUILD]
+	bmsk	r5, r5, 7
+	breq    r5, 0, 1f
+	mov	r5, DSP_CTRL_DISABLED_ALL
+	sr	r5, [ARC_AUX_DSP_CTRL]
+1:
+#endif
+.endm
+
+/* clobbers r10, r11 registers pair */
+.macro DSP_SAVE_REGFILE_IRQ
+#if defined(CONFIG_ARC_DSP_KERNEL)
+	/*
+	 * Drop any changes to DSP_CTRL made by userspace so userspace won't be
+	 * able to break kernel - reset it to DSP_CTRL_DISABLED_ALL value
+	 */
+	mov	r10, DSP_CTRL_DISABLED_ALL
+	sr	r10, [ARC_AUX_DSP_CTRL]
+
+#elif defined(CONFIG_ARC_DSP_SAVE_RESTORE_REGS)
+	/*
+	 * Save DSP_CTRL register and reset it to value suitable for kernel
+	 * (DSP_CTRL_DISABLED_ALL)
+	 */
+	mov	r10, DSP_CTRL_DISABLED_ALL
+	aex	r10, [ARC_AUX_DSP_CTRL]
+	st	r10, [sp, PT_DSP_CTRL]
+
+#endif
+.endm
+
+/* clobbers r10, r11 registers pair */
+.macro DSP_RESTORE_REGFILE_IRQ
+#if defined(CONFIG_ARC_DSP_SAVE_RESTORE_REGS)
+	ld	r10, [sp, PT_DSP_CTRL]
+	sr	r10, [ARC_AUX_DSP_CTRL]
+
+#endif
+.endm
+
+#else /* __ASEMBLY__ */
+
+#include <linux/sched.h>
+#include <asm/asserts.h>
+#include <asm/switch_to.h>
+
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+
+/*
+ * As we save new and restore old AUX register value in the same place we
+ * can optimize a bit and use AEX instruction (swap contents of an auxiliary
+ * register with a core register) instead of LR + SR pair.
+ */
+#define AUX_SAVE_RESTORE(_saveto, _readfrom, _offt, _aux)		\
+do {									\
+	long unsigned int _scratch;					\
+									\
+	__asm__ __volatile__(						\
+		"ld	%0, [%2, %4]			\n"		\
+		"aex	%0, [%3]			\n"		\
+		"st	%0, [%1, %4]			\n"		\
+		:							\
+		  "=&r" (_scratch)	/* must be early clobber */	\
+		:							\
+		   "r" (_saveto),					\
+		   "r" (_readfrom),					\
+		   "Ir" (_aux),						\
+		   "Ir" (_offt)						\
+		:							\
+		  "memory"						\
+	);								\
+} while (0)
+
+#define DSP_AUX_SAVE_RESTORE(_saveto, _readfrom, _aux)			\
+	AUX_SAVE_RESTORE(_saveto, _readfrom,				\
+		offsetof(struct dsp_callee_regs, _aux),			\
+		ARC_AUX_##_aux)
+
+static inline void dsp_save_restore(struct task_struct *prev,
+					struct task_struct *next)
+{
+	long unsigned int *saveto = &prev->thread.dsp.ACC0_GLO;
+	long unsigned int *readfrom = &next->thread.dsp.ACC0_GLO;
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, ACC0_GLO);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, ACC0_GHI);
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_BFLY0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_FFT_CTRL);
+
+#ifdef CONFIG_ARC_DSP_AGU_USERSPACE
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_AP0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_AP1);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_AP2);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_AP3);
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_OS0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_OS1);
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_MOD0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_MOD1);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_MOD2);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, AGU_MOD3);
+#endif /* CONFIG_ARC_DSP_AGU_USERSPACE */
+}
+
+#else /* !CONFIG_ARC_DSP_SAVE_RESTORE_REGS */
+#define dsp_save_restore(p, n)
+#endif /* CONFIG_ARC_DSP_SAVE_RESTORE_REGS */
+
+static inline bool dsp_exist(void)
+{
+	struct bcr_generic bcr;
+
+	READ_BCR(ARC_AUX_DSP_BUILD, bcr);
+	return !!bcr.ver;
+}
+
+static inline bool agu_exist(void)
+{
+	struct bcr_generic bcr;
+
+	READ_BCR(ARC_AUX_AGU_BUILD, bcr);
+	return !!bcr.ver;
+}
+
+static inline void dsp_config_check(void)
+{
+	CHK_OPT_STRICT(CONFIG_ARC_DSP_HANDLED, dsp_exist());
+	CHK_OPT_WEAK(CONFIG_ARC_DSP_AGU_USERSPACE, agu_exist());
+}
+
+#endif /* __ASEMBLY__ */
+#endif /* __ASM_ARC_DSP_IMPL_H */
diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h
new file mode 100644
index 0000000000..202c78e567
--- /dev/null
+++ b/arch/arc/include/asm/dsp.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+#ifndef __ASM_ARC_DSP_H
+#define __ASM_ARC_DSP_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * DSP-related saved registers - need to be saved only when you are
+ * scheduled out.
+ * structure fields name must correspond to aux register defenitions for
+ * automatic offset calculation in DSP_AUX_SAVE_RESTORE macros
+ */
+struct dsp_callee_regs {
+	unsigned long ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_FFT_CTRL;
+#ifdef CONFIG_ARC_DSP_AGU_USERSPACE
+	unsigned long AGU_AP0, AGU_AP1, AGU_AP2, AGU_AP3;
+	unsigned long AGU_OS0, AGU_OS1;
+	unsigned long AGU_MOD0, AGU_MOD1, AGU_MOD2, AGU_MOD3;
+#endif
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_ARC_DSP_H */
diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h
new file mode 100644
index 0000000000..5f4de05bd4
--- /dev/null
+++ b/arch/arc/include/asm/dwarf.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2016-17 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_DWARF_H
+#define _ASM_ARC_DWARF_H
+
+#ifdef __ASSEMBLY__
+
+#ifdef ARC_DW2_UNWIND_AS_CFI
+
+#define CFI_STARTPROC	.cfi_startproc
+#define CFI_ENDPROC	.cfi_endproc
+#define CFI_DEF_CFA	.cfi_def_cfa
+#define CFI_REGISTER	.cfi_register
+#define CFI_REL_OFFSET	.cfi_rel_offset
+#define CFI_UNDEFINED	.cfi_undefined
+
+#else
+
+#define CFI_IGNORE	#
+
+#define CFI_STARTPROC	CFI_IGNORE
+#define CFI_ENDPROC	CFI_IGNORE
+#define CFI_DEF_CFA	CFI_IGNORE
+#define CFI_REGISTER	CFI_IGNORE
+#define CFI_REL_OFFSET	CFI_IGNORE
+#define CFI_UNDEFINED	CFI_IGNORE
+
+#endif	/* !ARC_DW2_UNWIND_AS_CFI */
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* _ASM_ARC_DWARF_H */
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
new file mode 100644
index 0000000000..0284ace0e1
--- /dev/null
+++ b/arch/arc/include/asm/elf.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_ARC_ELF_H
+#define __ASM_ARC_ELF_H
+
+#include <linux/types.h>
+#include <linux/elf-em.h>
+#include <uapi/asm/elf.h>
+
+#define EM_ARC_INUSE		(IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \
+					EM_ARCOMPACT : EM_ARCV2)
+
+/* ARC Relocations (kernel Modules only) */
+#define  R_ARC_32		0x4
+#define  R_ARC_32_ME		0x1B
+#define  R_ARC_32_PCREL		0x31
+
+/*to set parameters in the core dumps */
+#define ELF_ARCH		EM_ARC_INUSE
+#define ELF_CLASS		ELFCLASS32
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ELF_DATA		ELFDATA2MSB
+#else
+#define ELF_DATA		ELFDATA2LSB
+#endif
+
+/*
+ * To ensure that
+ *  -we don't load something for the wrong architecture.
+ *  -The userspace is using the correct syscall ABI
+ */
+struct elf32_hdr;
+extern int elf_check_arch(const struct elf32_hdr *);
+#define elf_check_arch	elf_check_arch
+
+#define CORE_DUMP_USE_REGSET
+
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE		(2UL * TASK_SIZE / 3)
+
+/*
+ * When the program starts, a1 contains a pointer to a function to be
+ * registered with atexit, as per the SVR4 ABI.  A value of 0 means we
+ * have no such handler.
+ */
+#define ELF_PLAT_INIT(_r, load_addr)	((_r)->r0 = 0)
+
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this cpu supports.
+ */
+#define ELF_HWCAP	(0)
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+#define ELF_PLATFORM	(NULL)
+
+#endif
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
new file mode 100644
index 0000000000..0ff4c06105
--- /dev/null
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_ARC_ENTRY_ARCV2_H
+#define __ASM_ARC_ENTRY_ARCV2_H
+
+#include <asm/asm-offsets.h>
+#include <asm/dsp-impl.h>
+#include <asm/irqflags-arcv2.h>
+#include <asm/thread_info.h>	/* For THREAD_SIZE */
+
+/*
+ * Interrupt/Exception stack layout (pt_regs) for ARCv2
+ *   (End of struct aligned to end of page [unless nested])
+ *
+ *  INTERRUPT                          EXCEPTION
+ *
+ *    manual    ---------------------  manual
+ *              |      orig_r0      |
+ *              |      event/ECR    |
+ *              |      bta          |
+ *              |      user_r25     |
+ *              |      gp           |
+ *              |      fp           |
+ *              |      sp           |
+ *              |      r12          |
+ *              |      r30          |
+ *              |      r58          |
+ *              |      r59          |
+ *  hw autosave ---------------------
+ *    optional  |      r0           |
+ *              |      r1           |
+ *              ~                   ~
+ *              |      r9           |
+ *              |      r10          |
+ *              |      r11          |
+ *              |      blink        |
+ *              |      lpe          |
+ *              |      lps          |
+ *              |      lpc          |
+ *              |      ei base      |
+ *              |      ldi base     |
+ *              |      jli base     |
+ *              ---------------------
+ *  hw autosave |       pc / eret   |
+ *   mandatory  | stat32 / erstatus |
+ *              ---------------------
+ */
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_PROLOGUE
+
+	; (A) Before jumping to Interrupt Vector, hardware micro-ops did following:
+	;   1. SP auto-switched to kernel mode stack
+	;   2. STATUS32.Z flag set if in U mode at time of interrupt (U:1,K:0)
+	;   3. Auto save: (mandatory) Push PC and STAT32 on stack
+	;                 hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE
+	;   4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
+	;
+	; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair
+
+#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
+	; carve pt_regs on stack (case #3), PC/STAT32 already on stack
+	sub	sp, sp, SZ_PT_REGS - 8
+
+	__SAVE_REGFILE_HARD
+#else
+	; carve pt_regs on stack (case #4), which grew partially already
+	sub	sp, sp, PT_r0
+#endif
+
+	__SAVE_REGFILE_SOFT
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+	; (A) Before jumping to Exception Vector, hardware micro-ops did following:
+	;   1. SP auto-switched to kernel mode stack
+	;   2. STATUS32.Z flag set if in U mode at time of exception (U:1,K:0)
+	;
+	; (B) Manually save the complete reg file below
+
+	sub	sp, sp, SZ_PT_REGS	; carve pt_regs
+
+	; _HARD saves r10 clobbered by _SOFT as scratch hence comes first
+
+	__SAVE_REGFILE_HARD
+	__SAVE_REGFILE_SOFT
+
+	st	r0, [sp]	; orig_r0
+
+	lr	r10, [eret]
+	lr	r11, [erstatus]
+	ST2	r10, r11, PT_ret
+
+	lr	r10, [ecr]
+	lr	r11, [erbta]
+	ST2	r10, r11, PT_event
+
+	; OUTPUT: r10 has ECR expected by EV_Trap
+.endm
+
+/*------------------------------------------------------------------------
+ * This macro saves the registers manually which would normally be autosaved
+ * by hardware on taken interrupts. It is used by
+ *   - exception handlers (which don't have autosave)
+ *   - interrupt autosave disabled due to CONFIG_ARC_IRQ_NO_AUTOSAVE
+ */
+.macro __SAVE_REGFILE_HARD
+
+	ST2	r0,  r1,  PT_r0
+	ST2	r2,  r3,  PT_r2
+	ST2	r4,  r5,  PT_r4
+	ST2	r6,  r7,  PT_r6
+	ST2	r8,  r9,  PT_r8
+	ST2	r10, r11, PT_r10
+
+	st	blink, [sp, PT_blink]
+
+	lr	r10, [lp_end]
+	lr	r11, [lp_start]
+	ST2	r10, r11, PT_lpe
+
+	st	lp_count, [sp, PT_lpc]
+
+	; skip JLI, LDI, EI for now
+.endm
+
+/*------------------------------------------------------------------------
+ * This macros saves a bunch of other registers which can't be autosaved for
+ * various reasons:
+ *   - r12: the last caller saved scratch reg since hardware saves in pairs so r0-r11
+ *   - r30: free reg, used by gcc as scratch
+ *   - ACCL/ACCH pair when they exist
+ */
+.macro __SAVE_REGFILE_SOFT
+
+	ST2	gp, fp, PT_r26		; gp (r26), fp (r27)
+
+	st	r12, [sp, PT_sp + 4]
+	st	r30, [sp, PT_sp + 8]
+
+	; Saving pt_regs->sp correctly requires some extra work due to the way
+	; Auto stack switch works
+	;  - U mode: retrieve it from AUX_USER_SP
+	;  - K mode: add the offset from current SP where H/w starts auto push
+	;
+	; 1. Utilize the fact that Z bit is set if Intr taken in U mode
+	; 2. Upon entry SP is always saved (for any inspection, unwinding etc),
+	;    but on return, restored only if U mode
+
+	lr	r10, [AUX_USER_SP]	; U mode SP
+
+	; ISA requires ADD.nz to have same dest and src reg operands
+	mov.nz	r10, sp
+	add.nz	r10, r10, SZ_PT_REGS	; K mode SP
+
+	st	r10, [sp, PT_sp]	; SP (pt_regs->sp)
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	st	r25, [sp, PT_user_r25]
+	GET_CURR_TASK_ON_CPU	r25
+#endif
+
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	ST2	r58, r59, PT_r58
+#endif
+
+	/* clobbers r10, r11 registers pair */
+	DSP_SAVE_REGFILE_IRQ
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro __RESTORE_REGFILE_SOFT
+
+	LD2	gp, fp, PT_r26		; gp (r26), fp (r27)
+
+	ld	r12, [sp, PT_r12]
+	ld	r30, [sp, PT_r30]
+
+	; Restore SP (into AUX_USER_SP) only if returning to U mode
+	;  - for K mode, it will be implicitly restored as stack is unwound
+	;  - Z flag set on K is inverse of what hardware does on interrupt entry
+	;    but that doesn't really matter
+	bz	1f
+
+	ld	r10, [sp, PT_sp]	; SP (pt_regs->sp)
+	sr	r10, [AUX_USER_SP]
+1:
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	ld	r25, [sp, PT_user_r25]
+#endif
+
+	/* clobbers r10, r11 registers pair */
+	DSP_RESTORE_REGFILE_IRQ
+
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	LD2	r58, r59, PT_r58
+#endif
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro __RESTORE_REGFILE_HARD
+
+	ld	blink, [sp, PT_blink]
+
+	LD2	r10, r11, PT_lpe
+	sr	r10, [lp_end]
+	sr	r11, [lp_start]
+
+	ld	r10, [sp, PT_lpc]	; lp_count can't be target of LD
+	mov	lp_count, r10
+
+	LD2	r0,  r1,  PT_r0
+	LD2	r2,  r3,  PT_r2
+	LD2	r4,  r5,  PT_r4
+	LD2	r6,  r7,  PT_r6
+	LD2	r8,  r9,  PT_r8
+	LD2	r10, r11, PT_r10
+.endm
+
+
+/*------------------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE
+
+	; INPUT: r0 has STAT32 of calling context
+	; INPUT: Z flag set if returning to K mode
+
+	; _SOFT clobbers r10 restored by _HARD hence the order
+
+	__RESTORE_REGFILE_SOFT
+
+#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
+	__RESTORE_REGFILE_HARD
+
+	; SP points to PC/STAT32: hw restores them despite NO_AUTOSAVE
+	add	sp, sp, SZ_PT_REGS - 8
+#else
+	add	sp, sp, PT_r0
+#endif
+
+.endm
+
+/*------------------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+
+	; INPUT: r0 has STAT32 of calling context
+
+	btst	r0, STATUS_U_BIT	; Z flag set if K, used in restoring SP
+
+	ld	r10, [sp, PT_event + 4]
+	sr	r10, [erbta]
+
+	LD2	r10, r11, PT_ret
+	sr	r10, [eret]
+	sr	r11, [erstatus]
+
+	__RESTORE_REGFILE_SOFT
+	__RESTORE_REGFILE_HARD
+
+	add	sp, sp, SZ_PT_REGS
+.endm
+
+.macro FAKE_RET_FROM_EXCPN
+	lr      r9, [status32]
+	bic     r9, r9, STATUS_AE_MASK
+	or      r9, r9, STATUS_IE_MASK
+	kflag   r9
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP  reg
+	bmskn \reg, sp, THREAD_SHIFT - 1
+.endm
+
+/* Get CPU-ID of this core */
+.macro  GET_CPU_ID  reg
+	lr  \reg, [identity]
+	xbfu \reg, \reg, 0xE8	/* 00111    01000 */
+				/* M = 8-1  N = 8 */
+.endm
+
+#endif
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
new file mode 100644
index 0000000000..5aab4f93ab
--- /dev/null
+++ b/arch/arc/include/asm/entry-compact.h
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ *  Stack switching code can no longer reliably rely on the fact that
+ *  if we are NOT in user mode, stack is switched to kernel mode.
+ *  e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
+ *  it's prologue including stack switching from user mode
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ *   Normally CPU does this automatically, however when doing FAKE rtie,
+ *   we also need to explicitly do this. The problem in macros
+ *   FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ *   was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context
+ *
+ * Vineetg: May 5th 2008
+ *  -Modified CALLEE_REG save/restore macros to handle the fact that
+ *      r25 contains the kernel current task ptr
+ *  - Defined Stack Switching Macro to be reused in all intr/excp hdlrs
+ *  - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the
+ *      address Write back load ld.ab instead of seperate ld/add instn
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef __ASM_ARC_ENTRY_COMPACT_H
+#define __ASM_ARC_ENTRY_COMPACT_H
+
+#include <asm/asm-offsets.h>
+#include <asm/irqflags-compact.h>
+#include <asm/thread_info.h>	/* For THREAD_SIZE */
+
+/*--------------------------------------------------------------
+ * Switch to Kernel Mode stack if SP points to User Mode stack
+ *
+ * Entry   : r9 contains pre-IRQ/exception/trap status32
+ * Exit    : SP set to K mode stack
+ *           SP at the time of entry (K/U) saved @ pt_regs->sp
+ * Clobbers: r9
+ *-------------------------------------------------------------*/
+
+.macro SWITCH_TO_KERNEL_STK
+
+	/* User Mode when this happened ? Yes: Proceed to switch stack */
+	bbit1   r9, STATUS_U_BIT, 88f
+
+	/* OK we were already in kernel mode when this event happened, thus can
+	 * assume SP is kernel mode SP. _NO_ need to do any stack switching
+	 */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+	/* However....
+	 * If Level 2 Interrupts enabled, we may end up with a corner case:
+	 * 1. User Task executing
+	 * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
+	 * 3. But before it could switch SP from USER to KERNEL stack
+	 *      a L2 IRQ "Interrupts" L1
+	 * Thay way although L2 IRQ happened in Kernel mode, stack is still
+	 * not switched.
+	 * To handle this, we may need to switch stack even if in kernel mode
+	 * provided SP has values in range of USER mode stack ( < 0x7000_0000 )
+	 */
+	brlo sp, VMALLOC_START, 88f
+
+	/* TODO: vineetg:
+	 * We need to be a bit more cautious here. What if a kernel bug in
+	 * L1 ISR, caused SP to go whaco (some small value which looks like
+	 * USER stk) and then we take L2 ISR.
+	 * Above brlo alone would treat it as a valid L1-L2 scenario
+	 * instead of shouting around
+	 * The only feasible way is to make sure this L2 happened in
+	 * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in
+	 * L1 ISR before it switches stack
+	 */
+
+#endif
+
+    /*------Intr/Ecxp happened in kernel mode, SP already setup ------ */
+	/* save it nevertheless @ pt_regs->sp for uniformity */
+
+	b.d	66f
+	st	sp, [sp, PT_sp - SZ_PT_REGS]
+
+88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */
+
+	GET_CURR_TASK_ON_CPU   r9
+
+	/* With current tsk in r9, get it's kernel mode stack base */
+	GET_TSK_STACK_BASE  r9, r9
+
+	/* save U mode SP @ pt_regs->sp */
+	st	sp, [r9, PT_sp - SZ_PT_REGS]
+
+	/* final SP switch */
+	mov	sp, r9
+66:
+.endm
+
+/*------------------------------------------------------------
+ * "FAKE" a rtie to return from CPU Exception context
+ * This is to re-enable Exceptions within exception
+ * Look at EV_ProtV to see how this is actually used
+ *-------------------------------------------------------------*/
+
+.macro FAKE_RET_FROM_EXCPN
+
+	lr	r9, [status32]
+	bclr	r9, r9, STATUS_AE_BIT
+	or	r9, r9, (STATUS_E1_MASK|STATUS_E2_MASK)
+	sr	r9, [erstatus]
+	mov	r9, 55f
+	sr	r9, [eret]
+	rtie
+55:
+.endm
+
+/*--------------------------------------------------------------
+ * For early Exception/ISR Prologue, a core reg is temporarily needed to
+ * code the rest of prolog (stack switching). This is done by stashing
+ * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
+ *
+ * Before saving the full regfile - this reg is restored back, only
+ * to be saved again on kernel mode stack, as part of pt_regs.
+ *-------------------------------------------------------------*/
+.macro PROLOG_FREEUP_REG	reg, mem
+	st  \reg, [\mem]
+.endm
+
+.macro PROLOG_RESTORE_REG	reg, mem
+	ld  \reg, [\mem]
+.endm
+
+/*--------------------------------------------------------------
+ * Exception Entry prologue
+ * -Switches stack to K mode (if not already)
+ * -Saves the register file
+ *
+ * After this it is safe to call the "C" handlers
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+	/* Need at least 1 reg to code the early exception prologue */
+	PROLOG_FREEUP_REG r9, @ex_saved_reg1
+
+	/* U/K mode at time of exception (stack not switched if already K) */
+	lr  r9, [erstatus]
+
+	/* ARC700 doesn't provide auto-stack switching */
+	SWITCH_TO_KERNEL_STK
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/* Treat r25 as scratch reg (save on stack) and load with "current" */
+	PUSH    r25
+	GET_CURR_TASK_ON_CPU   r25
+#else
+	sub     sp, sp, 4
+#endif
+
+	st.a	r0, [sp, -8]    /* orig_r0 needed for syscall (skip ECR slot) */
+	sub	sp, sp, 4	/* skip pt_regs->sp, already saved above */
+
+	/* Restore r9 used to code the early prologue */
+	PROLOG_RESTORE_REG  r9, @ex_saved_reg1
+
+	/* now we are ready to save the regfile */
+	SAVE_R0_TO_R12
+	PUSH	gp
+	PUSH	fp
+	PUSH	blink
+	PUSHAX	eret
+	PUSHAX	erstatus
+	PUSH	lp_count
+	PUSHAX	lp_end
+	PUSHAX	lp_start
+	PUSHAX	erbta
+
+	lr	r10, [ecr]
+	st      r10, [sp, PT_event]    /* EV_Trap expects r10 to have ECR */
+.endm
+
+/*--------------------------------------------------------------
+ * Restore all registers used by system call or Exceptions
+ * SP should always be pointing to the next free stack element
+ * when entering this macro.
+ *
+ * NOTE:
+ *
+ * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
+ * for memory load operations. If used in that way interrupts are deffered
+ * by hardware and that is not good.
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_EPILOGUE
+
+	POPAX	erbta
+	POPAX	lp_start
+	POPAX	lp_end
+
+	POP	r9
+	mov	lp_count, r9	;LD to lp_count is not allowed
+
+	POPAX	erstatus
+	POPAX	eret
+	POP	blink
+	POP	fp
+	POP	gp
+	RESTORE_R12_TO_R0
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	ld	r25, [sp, 12]
+#endif
+	ld  sp, [sp] /* restore original sp */
+	/* orig_r0, ECR, user_r25 skipped automatically */
+.endm
+
+/* Dummy ECR values for Interrupts */
+#define event_IRQ1		0x0031abcd
+#define event_IRQ2		0x0032abcd
+
+.macro INTERRUPT_PROLOGUE  LVL
+
+	/* free up r9 as scratchpad */
+	PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg
+
+	/* Which mode (user/kernel) was the system in when intr occurred */
+	lr  r9, [status32_l\LVL\()]
+
+	SWITCH_TO_KERNEL_STK
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/* Treat r25 as scratch reg (save on stack) and load with "current" */
+	PUSH    r25
+	GET_CURR_TASK_ON_CPU   r25
+#else
+	sub     sp, sp, 4
+#endif
+
+	PUSH	0x003\LVL\()abcd    /* Dummy ECR */
+	sub	sp, sp, 8	    /* skip orig_r0 (not needed)
+				       skip pt_regs->sp, already saved above */
+
+	/* Restore r9 used to code the early prologue */
+	PROLOG_RESTORE_REG  r9, @int\LVL\()_saved_reg
+
+	SAVE_R0_TO_R12
+	PUSH	gp
+	PUSH	fp
+	PUSH	blink
+	PUSH	ilink\LVL\()
+	PUSHAX	status32_l\LVL\()
+	PUSH	lp_count
+	PUSHAX	lp_end
+	PUSHAX	lp_start
+	PUSHAX	bta_l\LVL\()
+
+.endm
+
+/*--------------------------------------------------------------
+ * Restore all registers used by interrupt handlers.
+ *
+ * NOTE:
+ *
+ * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
+ * for memory load operations. If used in that way interrupts are deffered
+ * by hardware and that is not good.
+ *-------------------------------------------------------------*/
+.macro INTERRUPT_EPILOGUE  LVL
+
+	POPAX	bta_l\LVL\()
+	POPAX	lp_start
+	POPAX	lp_end
+
+	POP	r9
+	mov	lp_count, r9	;LD to lp_count is not allowed
+
+	POPAX	status32_l\LVL\()
+	POP	ilink\LVL\()
+	POP	blink
+	POP	fp
+	POP	gp
+	RESTORE_R12_TO_R0
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	ld	r25, [sp, 12]
+#endif
+	ld  sp, [sp] /* restore original sp */
+	/* orig_r0, ECR, user_r25 skipped automatically */
+.endm
+
+/* Get thread_info of "current" tsk */
+.macro GET_CURR_THR_INFO_FROM_SP  reg
+	bic \reg, sp, (THREAD_SIZE - 1)
+.endm
+
+/* Get CPU-ID of this core */
+.macro  GET_CPU_ID  reg
+	lr  \reg, [identity]
+	lsr \reg, \reg, 8
+	bmsk \reg, \reg, 7
+.endm
+
+#endif  /* __ASM_ARC_ENTRY_COMPACT_H */
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
new file mode 100644
index 0000000000..fcdd59d77f
--- /dev/null
+++ b/arch/arc/include/asm/entry.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_ARC_ENTRY_H
+#define __ASM_ARC_ENTRY_H
+
+#include <asm/unistd.h>		/* For NR_syscalls defination */
+#include <asm/arcregs.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>	/* For VMALLOC_START */
+#include <asm/mmu.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
+#include <asm/entry-compact.h>	/* ISA specific bits */
+#else
+#include <asm/entry-arcv2.h>
+#endif
+
+/* Note on the LD/ST addr modes with addr reg wback
+ *
+ * LD.a same as LD.aw
+ *
+ * LD.a    reg1, [reg2, x]  => Pre Incr
+ *      Eff Addr for load = [reg2 + x]
+ *
+ * LD.ab   reg1, [reg2, x]  => Post Incr
+ *      Eff Addr for load = [reg2]
+ */
+
+.macro PUSH reg
+	st.a	\reg, [sp, -4]
+.endm
+
+.macro PUSHAX aux
+	lr	r9, [\aux]
+	PUSH	r9
+.endm
+
+.macro POP reg
+	ld.ab	\reg, [sp, 4]
+.endm
+
+.macro POPAX aux
+	POP	r9
+	sr	r9, [\aux]
+.endm
+
+/*--------------------------------------------------------------
+ * Helpers to save/restore Scratch Regs:
+ * used by Interrupt/Exception Prologue/Epilogue
+ *-------------------------------------------------------------*/
+.macro  SAVE_R0_TO_R12
+	PUSH	r0
+	PUSH	r1
+	PUSH	r2
+	PUSH	r3
+	PUSH	r4
+	PUSH	r5
+	PUSH	r6
+	PUSH	r7
+	PUSH	r8
+	PUSH	r9
+	PUSH	r10
+	PUSH	r11
+	PUSH	r12
+.endm
+
+.macro RESTORE_R12_TO_R0
+	POP	r12
+	POP	r11
+	POP	r10
+	POP	r9
+	POP	r8
+	POP	r7
+	POP	r6
+	POP	r5
+	POP	r4
+	POP	r3
+	POP	r2
+	POP	r1
+	POP	r0
+
+.endm
+
+/*--------------------------------------------------------------
+ * Helpers to save/restore callee-saved regs:
+ * used by several macros below
+ *-------------------------------------------------------------*/
+.macro SAVE_R13_TO_R24
+	PUSH	r13
+	PUSH	r14
+	PUSH	r15
+	PUSH	r16
+	PUSH	r17
+	PUSH	r18
+	PUSH	r19
+	PUSH	r20
+	PUSH	r21
+	PUSH	r22
+	PUSH	r23
+	PUSH	r24
+.endm
+
+.macro RESTORE_R24_TO_R13
+	POP	r24
+	POP	r23
+	POP	r22
+	POP	r21
+	POP	r20
+	POP	r19
+	POP	r18
+	POP	r17
+	POP	r16
+	POP	r15
+	POP	r14
+	POP	r13
+.endm
+
+/*--------------------------------------------------------------
+ * Collect User Mode callee regs as struct callee_regs - needed by
+ * fork/do_signal/unaligned-access-emulation.
+ * (By default only scratch regs are saved on entry to kernel)
+ *
+ * Special handling for r25 if used for caching Task Pointer.
+ * It would have been saved in task->thread.user_r25 already, but to keep
+ * the interface same it is copied into regular r25 placeholder in
+ * struct callee_regs.
+ *-------------------------------------------------------------*/
+.macro SAVE_CALLEE_SAVED_USER
+
+	mov	r12, sp		; save SP as ref to pt_regs
+	SAVE_R13_TO_R24
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	; Retrieve orig r25 and save it with rest of callee_regs
+	ld	r12, [r12, PT_user_r25]
+	PUSH	r12
+#else
+	PUSH	r25
+#endif
+
+.endm
+
+/*--------------------------------------------------------------
+ * Save kernel Mode callee regs at the time of Contect Switch.
+ *
+ * Special handling for r25 if used for caching Task Pointer.
+ * Kernel simply skips saving it since it will be loaded with
+ * incoming task pointer anyways
+ *-------------------------------------------------------------*/
+.macro SAVE_CALLEE_SAVED_KERNEL
+
+	SAVE_R13_TO_R24
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	sub     sp, sp, 4
+#else
+	PUSH	r25
+#endif
+.endm
+
+/*--------------------------------------------------------------
+ * Opposite of SAVE_CALLEE_SAVED_KERNEL
+ *-------------------------------------------------------------*/
+.macro RESTORE_CALLEE_SAVED_KERNEL
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	add     sp, sp, 4  /* skip usual r25 placeholder */
+#else
+	POP	r25
+#endif
+	RESTORE_R24_TO_R13
+.endm
+
+/*--------------------------------------------------------------
+ * Opposite of SAVE_CALLEE_SAVED_USER
+ *
+ * ptrace tracer or unaligned-access fixup might have changed a user mode
+ * callee reg which is saved back to usual r25 storage location
+ *-------------------------------------------------------------*/
+.macro RESTORE_CALLEE_SAVED_USER
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	POP	r12
+#else
+	POP	r25
+#endif
+	RESTORE_R24_TO_R13
+
+	; SP is back to start of pt_regs
+#ifdef CONFIG_ARC_CURR_IN_REG
+	st	r12, [sp, PT_user_r25]
+#endif
+.endm
+
+/*--------------------------------------------------------------
+ * Super FAST Restore callee saved regs by simply re-adjusting SP
+ *-------------------------------------------------------------*/
+.macro DISCARD_CALLEE_SAVED_USER
+	add     sp, sp, SZ_CALLEE_REGS
+.endm
+
+/*-------------------------------------------------------------
+ * given a tsk struct, get to the base of it's kernel mode stack
+ * tsk->thread_info is really a PAGE, whose bottom hoists stack
+ * which grows upwards towards thread_info
+ *------------------------------------------------------------*/
+
+.macro GET_TSK_STACK_BASE tsk, out
+
+	/* Get task->thread_info (this is essentially start of a PAGE) */
+	ld  \out, [\tsk, TASK_THREAD_INFO]
+
+	/* Go to end of page where stack begins (grows upwards) */
+	add2 \out, \out, (THREAD_SIZE)/4
+
+.endm
+
+/*
+ * @reg [OUT] thread_info->flags of "current"
+ */
+.macro GET_CURR_THR_INFO_FLAGS  reg
+	GET_CURR_THR_INFO_FROM_SP  \reg
+	ld  \reg, [\reg, THREAD_INFO_FLAGS]
+.endm
+
+#ifdef CONFIG_SMP
+
+/*-------------------------------------------------
+ * Retrieve the current running task on this CPU
+ * 1. Determine curr CPU id.
+ * 2. Use it to index into _current_task[ ]
+ */
+.macro  GET_CURR_TASK_ON_CPU   reg
+	GET_CPU_ID  \reg
+	ld.as  \reg, [@_current_task, \reg]
+.endm
+
+/*-------------------------------------------------
+ * Save a new task as the "current" task on this CPU
+ * 1. Determine curr CPU id.
+ * 2. Use it to index into _current_task[ ]
+ *
+ * Coded differently than GET_CURR_TASK_ON_CPU (which uses LD.AS)
+ * because ST r0, [r1, offset] can ONLY have s9 @offset
+ * while   LD can take s9 (4 byte insn) or LIMM (8 byte insn)
+ */
+
+.macro  SET_CURR_TASK_ON_CPU    tsk, tmp
+	GET_CPU_ID  \tmp
+	add2 \tmp, @_current_task, \tmp
+	st   \tsk, [\tmp]
+#ifdef CONFIG_ARC_CURR_IN_REG
+	mov r25, \tsk
+#endif
+
+.endm
+
+
+#else   /* Uniprocessor implementation of macros */
+
+.macro  GET_CURR_TASK_ON_CPU    reg
+	ld  \reg, [@_current_task]
+.endm
+
+.macro  SET_CURR_TASK_ON_CPU    tsk, tmp
+	st  \tsk, [@_current_task]
+#ifdef CONFIG_ARC_CURR_IN_REG
+	mov r25, \tsk
+#endif
+.endm
+
+#endif /* SMP / UNI */
+
+/* ------------------------------------------------------------------
+ * Get the ptr to some field of Current Task at @off in task struct
+ *  -Uses r25 for Current task ptr if that is enabled
+ */
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+
+.macro GET_CURR_TASK_FIELD_PTR  off,  reg
+	add \reg, r25, \off
+.endm
+
+#else
+
+.macro GET_CURR_TASK_FIELD_PTR  off,  reg
+	GET_CURR_TASK_ON_CPU  \reg
+	add \reg, \reg, \off
+.endm
+
+#endif	/* CONFIG_ARC_CURR_IN_REG */
+
+#endif  /* __ASM_ARC_ENTRY_H */
diff --git a/arch/arc/include/asm/exec.h b/arch/arc/include/asm/exec.h
new file mode 100644
index 0000000000..6134175d96
--- /dev/null
+++ b/arch/arc/include/asm/exec.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_ARC_EXEC_H
+#define __ASM_ARC_EXEC_H
+
+/* Align to 16b */
+#define arch_align_stack(p) ((unsigned long)(p) & ~0xf)
+
+#endif
diff --git a/arch/arc/include/asm/fb.h b/arch/arc/include/asm/fb.h
new file mode 100644
index 0000000000..dc2e303cdb
--- /dev/null
+++ b/arch/arc/include/asm/fb.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_FB_H_
+#define _ASM_FB_H_
+
+#include <linux/fb.h>
+#include <linux/fs.h>
+#include <asm/page.h>
+
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+}
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* _ASM_FB_H_ */
diff --git a/arch/arc/include/asm/fpu.h b/arch/arc/include/asm/fpu.h
new file mode 100644
index 0000000000..006bcf88a7
--- /dev/null
+++ b/arch/arc/include/asm/fpu.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ */
+
+#ifndef _ASM_ARC_FPU_H
+#define _ASM_ARC_FPU_H
+
+#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
+
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+/* These DPFP regs need to be saved/restored across ctx-sw */
+struct arc_fpu {
+	struct {
+		unsigned int l, h;
+	} aux_dpfp[2];
+};
+
+#define fpu_init_task(regs)
+
+#else
+
+/*
+ * ARCv2 FPU Control aux register
+ *   - bits to enable Traps on Exceptions
+ *   - Rounding mode
+ *
+ * ARCv2 FPU Status aux register
+ *   - FPU exceptions flags (Inv, Div-by-Zero, overflow, underflow, inexact)
+ *   - Flag Write Enable to clear flags explicitly (vs. by fpu instructions
+ *     only
+ */
+
+struct arc_fpu {
+	unsigned int ctrl, status;
+};
+
+extern void fpu_init_task(struct pt_regs *regs);
+
+#endif	/* !CONFIG_ISA_ARCOMPACT */
+
+struct task_struct;
+
+extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
+
+#else	/* !CONFIG_ARC_FPU_SAVE_RESTORE */
+
+#define fpu_save_restore(p, n)
+#define fpu_init_task(regs)
+
+#endif	/* CONFIG_ARC_FPU_SAVE_RESTORE */
+
+#endif	/* _ASM_ARC_FPU_H */
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
new file mode 100644
index 0000000000..607d1c16d4
--- /dev/null
+++ b/arch/arc/include/asm/futex.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Vineetg: August 2010: From Android kernel work
+ */
+
+#ifndef _ASM_FUTEX_H
+#define _ASM_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/preempt.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+#ifdef CONFIG_ARC_HAS_LLSC
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
+							\
+	smp_mb();					\
+	__asm__ __volatile__(				\
+	"1:	llock	%1, [%2]		\n"	\
+		insn				"\n"	\
+	"2:	scond	%0, [%2]		\n"	\
+	"	bnz	1b			\n"	\
+	"	mov %0, 0			\n"	\
+	"3:					\n"	\
+	"	.section .fixup,\"ax\"		\n"	\
+	"	.align  4			\n"	\
+	"4:	mov %0, %4			\n"	\
+	"	j   3b				\n"	\
+	"	.previous			\n"	\
+	"	.section __ex_table,\"a\"	\n"	\
+	"	.align  4			\n"	\
+	"	.word   1b, 4b			\n"	\
+	"	.word   2b, 4b			\n"	\
+	"	.previous			\n"	\
+							\
+	: "=&r" (ret), "=&r" (oldval)			\
+	: "r" (uaddr), "r" (oparg), "ir" (-EFAULT)	\
+	: "cc", "memory");				\
+	smp_mb()					\
+
+#else	/* !CONFIG_ARC_HAS_LLSC */
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
+							\
+	smp_mb();					\
+	__asm__ __volatile__(				\
+	"1:	ld	%1, [%2]		\n"	\
+		insn				"\n"	\
+	"2:	st	%0, [%2]		\n"	\
+	"	mov %0, 0			\n"	\
+	"3:					\n"	\
+	"	.section .fixup,\"ax\"		\n"	\
+	"	.align  4			\n"	\
+	"4:	mov %0, %4			\n"	\
+	"	j   3b				\n"	\
+	"	.previous			\n"	\
+	"	.section __ex_table,\"a\"	\n"	\
+	"	.align  4			\n"	\
+	"	.word   1b, 4b			\n"	\
+	"	.word   2b, 4b			\n"	\
+	"	.previous			\n"	\
+							\
+	: "=&r" (ret), "=&r" (oldval)			\
+	: "r" (uaddr), "r" (oparg), "ir" (-EFAULT)	\
+	: "cc", "memory");				\
+	smp_mb()					\
+
+#endif
+
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
+{
+	int oldval = 0, ret;
+
+	if (!access_ok(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+#ifndef CONFIG_ARC_HAS_LLSC
+	preempt_disable();	/* to guarantee atomic r-m-w of futex op */
+#endif
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		/* oldval = *uaddr; *uaddr += oparg ; ret = *uaddr */
+		__futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("or  %0, %1, %3", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("bic %0, %1, %3", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("xor %0, %1, %3", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+#ifndef CONFIG_ARC_HAS_LLSC
+	preempt_enable();
+#endif
+
+	if (!ret)
+		*oval = oldval;
+
+	return ret;
+}
+
+/*
+ * cmpxchg of futex (pagefaults disabled by caller)
+ * Return 0 for success, -EFAULT otherwise
+ */
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 expval,
+			      u32 newval)
+{
+	int ret = 0;
+	u32 existval;
+
+	if (!access_ok(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+#ifndef CONFIG_ARC_HAS_LLSC
+	preempt_disable();	/* to guarantee atomic r-m-w of futex op */
+#endif
+	smp_mb();
+
+	__asm__ __volatile__(
+#ifdef CONFIG_ARC_HAS_LLSC
+	"1:	llock	%1, [%4]		\n"
+	"	brne	%1, %2, 3f		\n"
+	"2:	scond	%3, [%4]		\n"
+	"	bnz	1b			\n"
+#else
+	"1:	ld	%1, [%4]		\n"
+	"	brne	%1, %2, 3f		\n"
+	"2:	st	%3, [%4]		\n"
+#endif
+	"3:	\n"
+	"	.section .fixup,\"ax\"	\n"
+	"4:	mov %0, %5	\n"
+	"	j   3b	\n"
+	"	.previous	\n"
+	"	.section __ex_table,\"a\"	\n"
+	"	.align  4	\n"
+	"	.word   1b, 4b	\n"
+	"	.word   2b, 4b	\n"
+	"	.previous\n"
+	: "+&r"(ret), "=&r"(existval)
+	: "r"(expval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
+	: "cc", "memory");
+
+	smp_mb();
+
+#ifndef CONFIG_ARC_HAS_LLSC
+	preempt_enable();
+#endif
+	*uval = existval;
+	return ret;
+}
+
+#endif
diff --git a/arch/arc/include/asm/highmem.h b/arch/arc/include/asm/highmem.h
new file mode 100644
index 0000000000..a6b8e2c352
--- /dev/null
+++ b/arch/arc/include/asm/highmem.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#ifdef CONFIG_HIGHMEM
+
+#include <uapi/asm/page.h>
+#include <asm/kmap_size.h>
+
+#define FIXMAP_SIZE		PGDIR_SIZE
+#define PKMAP_SIZE		PGDIR_SIZE
+
+/* start after vmalloc area */
+#define FIXMAP_BASE		(PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE)
+
+#define FIX_KMAP_SLOTS		(KM_MAX_IDX * NR_CPUS)
+#define FIX_KMAP_BEGIN		(0UL)
+#define FIX_KMAP_END		((FIX_KMAP_BEGIN + FIX_KMAP_SLOTS) - 1)
+
+#define FIXADDR_TOP		(FIXMAP_BASE + (FIX_KMAP_END << PAGE_SHIFT))
+
+/*
+ * This should be converted to the asm-generic version, but of course this
+ * is needlessly different from all other architectures. Sigh - tglx
+ */
+#define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)	(((FIXADDR_TOP - ((x) & PAGE_MASK))) >> PAGE_SHIFT)
+
+/* start after fixmap area */
+#define PKMAP_BASE		(FIXMAP_BASE + FIXMAP_SIZE)
+#define LAST_PKMAP		(PKMAP_SIZE >> PAGE_SHIFT)
+#define LAST_PKMAP_MASK		(LAST_PKMAP - 1)
+#define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
+#define PKMAP_NR(virt)		(((virt) - PKMAP_BASE) >> PAGE_SHIFT)
+
+#include <asm/cacheflush.h>
+
+extern void kmap_init(void);
+
+#define arch_kmap_local_post_unmap(vaddr)			\
+	local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE)
+
+static inline void flush_cache_kmaps(void)
+{
+	flush_cache_all();
+}
+#endif
+
+#endif
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
new file mode 100644
index 0000000000..11b0ff26b9
--- /dev/null
+++ b/arch/arc/include/asm/hugepage.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+
+#ifndef _ASM_ARC_HUGEPAGE_H
+#define _ASM_ARC_HUGEPAGE_H
+
+#include <linux/types.h>
+#include <asm-generic/pgtable-nopmd.h>
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd(pte_val(pte));
+}
+
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkhuge(pmd)		pte_pmd(pte_mkhuge(pmd_pte(pmd)))
+#define pmd_mkinvalid(pmd)	pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
+#define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
+
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
+
+#define mk_pmd(page, prot)	pte_pmd(mk_pte(page, prot))
+
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) & _PAGE_HW_SZ)
+
+#define pfn_pmd(pfn, prot)	(__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+        /*
+         * open-coded pte_modify() with additional retaining of HW_SZ bit
+         * so that pmd_trans_huge() remains true for this PMD
+         */
+        return __pmd((pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HW_SZ)) | pgprot_val(newprot));
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd);
+
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+				unsigned long end);
+
+/* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/
+#define pmdp_establish generic_pmdp_establish
+
+#endif
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
new file mode 100644
index 0000000000..8f777d6441
--- /dev/null
+++ b/arch/arc/include/asm/io.h
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_IO_H
+#define _ASM_ARC_IO_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/page.h>
+#include <asm/unaligned.h>
+
+#ifdef CONFIG_ISA_ARCV2
+#include <asm/barrier.h>
+#define __iormb()		rmb()
+#define __iowmb()		wmb()
+#else
+#define __iormb()		do { } while (0)
+#define __iowmb()		do { } while (0)
+#endif
+
+extern void __iomem *ioremap(phys_addr_t paddr, unsigned long size);
+extern void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
+				  unsigned long flags);
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return (void __iomem *)port;
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+}
+
+extern void iounmap(const void __iomem *addr);
+
+/*
+ * io{read,write}{16,32}be() macros
+ */
+#define ioread16be(p)		({ u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
+#define ioread32be(p)		({ u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
+
+#define iowrite16be(v,p)	({ __iowmb(); __raw_writew((__force u16)cpu_to_be16(v), p); })
+#define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force u32)cpu_to_be32(v), p); })
+
+/* Change struct page to physical address */
+#define page_to_phys(page)		(page_to_pfn(page) << PAGE_SHIFT)
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	u8 b;
+
+	__asm__ __volatile__(
+	"	ldb%U1 %0, %1	\n"
+	: "=r" (b)
+	: "m" (*(volatile u8 __force *)addr)
+	: "memory");
+
+	return b;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	u16 s;
+
+	__asm__ __volatile__(
+	"	ldw%U1 %0, %1	\n"
+	: "=r" (s)
+	: "m" (*(volatile u16 __force *)addr)
+	: "memory");
+
+	return s;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	u32 w;
+
+	__asm__ __volatile__(
+	"	ld%U1 %0, %1	\n"
+	: "=r" (w)
+	: "m" (*(volatile u32 __force *)addr)
+	: "memory");
+
+	return w;
+}
+
+/*
+ * {read,write}s{b,w,l}() repeatedly access the same IO address in
+ * native endianness in 8-, 16-, 32-bit chunks {into,from} memory,
+ * @count times
+ */
+#define __raw_readsx(t,f) \
+static inline void __raw_reads##f(const volatile void __iomem *addr,	\
+				  void *ptr, unsigned int count)	\
+{									\
+	bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0;	\
+	u##t *buf = ptr;						\
+									\
+	if (!count)							\
+		return;							\
+									\
+	/* Some ARC CPU's don't support unaligned accesses */		\
+	if (is_aligned) {						\
+		do {							\
+			u##t x = __raw_read##f(addr);			\
+			*buf++ = x;					\
+		} while (--count);					\
+	} else {							\
+		do {							\
+			u##t x = __raw_read##f(addr);			\
+			put_unaligned(x, buf++);			\
+		} while (--count);					\
+	}								\
+}
+
+#define __raw_readsb __raw_readsb
+__raw_readsx(8, b)
+#define __raw_readsw __raw_readsw
+__raw_readsx(16, w)
+#define __raw_readsl __raw_readsl
+__raw_readsx(32, l)
+
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+	__asm__ __volatile__(
+	"	stb%U1 %0, %1	\n"
+	:
+	: "r" (b), "m" (*(volatile u8 __force *)addr)
+	: "memory");
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 s, volatile void __iomem *addr)
+{
+	__asm__ __volatile__(
+	"	stw%U1 %0, %1	\n"
+	:
+	: "r" (s), "m" (*(volatile u16 __force *)addr)
+	: "memory");
+
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 w, volatile void __iomem *addr)
+{
+	__asm__ __volatile__(
+	"	st%U1 %0, %1	\n"
+	:
+	: "r" (w), "m" (*(volatile u32 __force *)addr)
+	: "memory");
+
+}
+
+#define __raw_writesx(t,f)						\
+static inline void __raw_writes##f(volatile void __iomem *addr, 	\
+				   const void *ptr, unsigned int count)	\
+{									\
+	bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0;	\
+	const u##t *buf = ptr;						\
+									\
+	if (!count)							\
+		return;							\
+									\
+	/* Some ARC CPU's don't support unaligned accesses */		\
+	if (is_aligned) {						\
+		do {							\
+			__raw_write##f(*buf++, addr);			\
+		} while (--count);					\
+	} else {							\
+		do {							\
+			__raw_write##f(get_unaligned(buf++), addr);	\
+		} while (--count);					\
+	}								\
+}
+
+#define __raw_writesb __raw_writesb
+__raw_writesx(8, b)
+#define __raw_writesw __raw_writesw
+__raw_writesx(16, w)
+#define __raw_writesl __raw_writesl
+__raw_writesx(32, l)
+
+/*
+ * MMIO can also get buffered/optimized in micro-arch, so barriers needed
+ * Based on ARM model for the typical use case
+ *
+ *	<ST [DMA buffer]>
+ *	<writel MMIO "go" reg>
+ *  or:
+ *	<readl MMIO "status" reg>
+ *	<LD [DMA buffer]>
+ *
+ * http://lkml.kernel.org/r/20150622133656.GG1583@arm.com
+ */
+#define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
+#define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
+#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+#define readsb(p,d,l)		({ __raw_readsb(p,d,l); __iormb(); })
+#define readsw(p,d,l)		({ __raw_readsw(p,d,l); __iormb(); })
+#define readsl(p,d,l)		({ __raw_readsl(p,d,l); __iormb(); })
+
+#define writeb(v,c)		({ __iowmb(); writeb_relaxed(v,c); })
+#define writew(v,c)		({ __iowmb(); writew_relaxed(v,c); })
+#define writel(v,c)		({ __iowmb(); writel_relaxed(v,c); })
+#define writesb(p,d,l)		({ __iowmb(); __raw_writesb(p,d,l); })
+#define writesw(p,d,l)		({ __iowmb(); __raw_writesw(p,d,l); })
+#define writesl(p,d,l)		({ __iowmb(); __raw_writesl(p,d,l); })
+
+/*
+ * Relaxed API for drivers which can handle barrier ordering themselves
+ *
+ * Also these are defined to perform little endian accesses.
+ * To provide the typical device register semantics of fixed endian,
+ * swap the byte order for Big Endian
+ *
+ * http://lkml.kernel.org/r/201603100845.30602.arnd@arndb.de
+ */
+#define readb_relaxed(c)	__raw_readb(c)
+#define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16) \
+					__raw_readw(c)); __r; })
+#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32) \
+					__raw_readl(c)); __r; })
+
+#define writeb_relaxed(v,c)	__raw_writeb(v,c)
+#define writew_relaxed(v,c)	__raw_writew((__force u16) cpu_to_le16(v),c)
+#define writel_relaxed(v,c)	__raw_writel((__force u32) cpu_to_le32(v),c)
+
+#include <asm-generic/io.h>
+
+#endif /* _ASM_ARC_IO_H */
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
new file mode 100644
index 0000000000..0309cb405c
--- /dev/null
+++ b/arch/arc/include/asm/irq.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_ARC_IRQ_H
+#define __ASM_ARC_IRQ_H
+
+/*
+ * ARCv2 can support 240 interrupts in the core interrupts controllers and
+ * 128 interrupts in IDU. Thus 512 virtual IRQs must be enough for most
+ * configurations of boards.
+ * This doesnt affect ARCompact, but we change it to same value
+ */
+#define NR_IRQS		512
+
+/* Platform Independent IRQs */
+#ifdef CONFIG_ISA_ARCV2
+#define IPI_IRQ		19
+#define SOFTIRQ_IRQ	21
+#define FIRST_EXT_IRQ	24
+#endif
+
+#include <linux/interrupt.h>
+#include <asm-generic/irq.h>
+
+extern void arc_init_IRQ(void);
+
+#endif
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
new file mode 100644
index 0000000000..fb3c21f1a2
--- /dev/null
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_IRQFLAGS_ARCV2_H
+#define __ASM_IRQFLAGS_ARCV2_H
+
+#include <asm/arcregs.h>
+
+/* status32 Bits */
+#define STATUS_AD_BIT	19   /* Disable Align chk: core supports non-aligned */
+#define STATUS_IE_BIT	31
+
+#define STATUS_AD_MASK		(1<<STATUS_AD_BIT)
+#define STATUS_IE_MASK		(1<<STATUS_IE_BIT)
+
+/* status32 Bits as encoded/expected by CLRI/SETI */
+#define CLRI_STATUS_IE_BIT	4
+
+#define CLRI_STATUS_E_MASK	0xF
+#define CLRI_STATUS_IE_MASK	(1 << CLRI_STATUS_IE_BIT)
+
+#define AUX_USER_SP		0x00D
+#define AUX_IRQ_CTRL		0x00E
+#define AUX_IRQ_ACT		0x043	/* Active Intr across all levels */
+#define AUX_IRQ_LVL_PEND	0x200	/* Pending Intr across all levels */
+#define AUX_IRQ_HINT		0x201	/* For generating Soft Interrupts */
+#define AUX_IRQ_PRIORITY	0x206
+#define ICAUSE			0x40a
+#define AUX_IRQ_SELECT		0x40b
+#define AUX_IRQ_ENABLE		0x40c
+
+/* Was Intr taken in User Mode */
+#define AUX_IRQ_ACT_BIT_U	31
+
+/*
+ * Hardware supports 16 priorities (0 highest, 15 lowest)
+ * Linux by default runs at 1, priority 0 reserved for NMI style interrupts
+ */
+#define ARCV2_IRQ_DEF_PRIO	1
+
+/* seed value for status register */
+#ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS
+#define __AD_ENB	STATUS_AD_MASK
+#else
+#define __AD_ENB	0
+#endif
+
+#define ISA_INIT_STATUS_BITS	(STATUS_IE_MASK | __AD_ENB | \
+					(ARCV2_IRQ_DEF_PRIO << 1))
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Save IRQ state and disable IRQs
+ */
+static inline long arch_local_irq_save(void)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__("	clri %0	\n" : "=r" (flags) : : "memory");
+
+	return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	__asm__ __volatile__("	seti %0	\n" : : "r" (flags) : "memory");
+}
+
+/*
+ * Unconditionally Enable IRQs
+ */
+static inline void arch_local_irq_enable(void)
+{
+	unsigned int irqact = read_aux_reg(AUX_IRQ_ACT);
+
+	if (irqact & 0xffff)
+		write_aux_reg(AUX_IRQ_ACT, irqact & ~0xffff);
+
+	__asm__ __volatile__("	seti	\n" : : : "memory");
+}
+
+/*
+ * Unconditionally Disable IRQs
+ */
+static inline void arch_local_irq_disable(void)
+{
+	__asm__ __volatile__("	clri	\n" : : : "memory");
+}
+
+/*
+ * save IRQ state
+ */
+static inline long arch_local_save_flags(void)
+{
+	unsigned long temp;
+
+	__asm__ __volatile__(
+	"	lr  %0, [status32]	\n"
+	: "=&r"(temp)
+	:
+	: "memory");
+
+	/* To be compatible with irq_save()/irq_restore()
+	 * encode the irq bits as expected by CLRI/SETI
+	 * (this was needed to make CONFIG_TRACE_IRQFLAGS work)
+	 */
+	temp = (1 << 5) |
+		((!!(temp & STATUS_IE_MASK)) << CLRI_STATUS_IE_BIT) |
+		((temp >> 1) & CLRI_STATUS_E_MASK);
+	return temp;
+}
+
+/*
+ * Query IRQ state
+ */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & CLRI_STATUS_IE_MASK);
+}
+
+static inline int arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+static inline void arc_softirq_trigger(int irq)
+{
+	write_aux_reg(AUX_IRQ_HINT, irq);
+}
+
+static inline void arc_softirq_clear(int irq)
+{
+	write_aux_reg(AUX_IRQ_HINT, 0);
+}
+
+#else
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+.macro TRACE_ASM_IRQ_DISABLE
+	bl	trace_hardirqs_off
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+	bl	trace_hardirqs_on
+.endm
+
+#else
+
+.macro TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif
+.macro IRQ_DISABLE  scratch
+	clri
+	TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro IRQ_ENABLE  scratch
+	TRACE_ASM_IRQ_ENABLE
+	seti
+.endm
+
+#endif	/* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
new file mode 100644
index 0000000000..0d63e568d6
--- /dev/null
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_IRQFLAGS_ARCOMPACT_H
+#define __ASM_IRQFLAGS_ARCOMPACT_H
+
+/* vineetg: March 2010 : local_irq_save( ) optimisation
+ *  -Remove explicit mov of current status32 into reg, that is not needed
+ *  -Use BIC  insn instead of INVERTED + AND
+ *  -Conditionally disable interrupts (if they are not enabled, don't disable)
+*/
+
+#include <asm/arcregs.h>
+
+/* status32 Reg bits related to Interrupt Handling */
+#define STATUS_E1_BIT		1	/* Int 1 enable */
+#define STATUS_E2_BIT		2	/* Int 2 enable */
+#define STATUS_A1_BIT		3	/* Int 1 active */
+#define STATUS_A2_BIT		4	/* Int 2 active */
+#define STATUS_AE_BIT		5	/* Exception active */
+
+#define STATUS_E1_MASK		(1<<STATUS_E1_BIT)
+#define STATUS_E2_MASK		(1<<STATUS_E2_BIT)
+#define STATUS_A1_MASK		(1<<STATUS_A1_BIT)
+#define STATUS_A2_MASK		(1<<STATUS_A2_BIT)
+#define STATUS_AE_MASK		(1<<STATUS_AE_BIT)
+#define STATUS_IE_MASK		(STATUS_E1_MASK | STATUS_E2_MASK)
+
+/* Other Interrupt Handling related Aux regs */
+#define AUX_IRQ_LEV		0x200	/* IRQ Priority: L1 or L2 */
+#define AUX_IRQ_HINT		0x201	/* For generating Soft Interrupts */
+#define AUX_IRQ_LV12		0x43	/* interrupt level register */
+
+#define AUX_IENABLE		0x40c
+#define AUX_ITRIGGER		0x40d
+#define AUX_IPULSE		0x415
+
+#define ISA_INIT_STATUS_BITS	STATUS_IE_MASK
+
+#ifndef __ASSEMBLY__
+
+/******************************************************************
+ * IRQ Control Macros
+ *
+ * All of them have "memory" clobber (compiler barrier) which is needed to
+ * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
+ *
+ * Noted at the time of Abilis Timer List corruption
+ *
+ * Orig Bug + Rejected solution:
+ * https://lore.kernel.org/lkml/1364553218-31255-1-git-send-email-vgupta@synopsys.com
+ *
+ * Reasoning:
+ * https://lore.kernel.org/lkml/CA+55aFyFWjpSVQM6M266tKrG_ZXJzZ-nYejpmXYQXbrr42mGPQ@mail.gmail.com
+ *
+ ******************************************************************/
+
+/*
+ * Save IRQ state and disable IRQs
+ */
+static inline long arch_local_irq_save(void)
+{
+	unsigned long temp, flags;
+
+	__asm__ __volatile__(
+	"	lr  %1, [status32]	\n"
+	"	bic %0, %1, %2		\n"
+	"	and.f 0, %1, %2	\n"
+	"	flag.nz %0		\n"
+	: "=r"(temp), "=r"(flags)
+	: "n"((STATUS_E1_MASK | STATUS_E2_MASK))
+	: "memory", "cc");
+
+	return flags;
+}
+
+/*
+ * restore saved IRQ state
+ */
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+
+	__asm__ __volatile__(
+	"	flag %0			\n"
+	:
+	: "r"(flags)
+	: "memory");
+}
+
+/*
+ * Unconditionally Enable IRQs
+ */
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+extern void arch_local_irq_enable(void);
+#else
+static inline void arch_local_irq_enable(void)
+{
+	unsigned long temp;
+
+	__asm__ __volatile__(
+	"	lr   %0, [status32]	\n"
+	"	or   %0, %0, %1		\n"
+	"	flag %0			\n"
+	: "=&r"(temp)
+	: "n"((STATUS_E1_MASK | STATUS_E2_MASK))
+	: "cc", "memory");
+}
+#endif
+
+/*
+ * Unconditionally Disable IRQs
+ */
+static inline void arch_local_irq_disable(void)
+{
+	unsigned long temp;
+
+	__asm__ __volatile__(
+	"	lr  %0, [status32]	\n"
+	"	and %0, %0, %1		\n"
+	"	flag %0			\n"
+	: "=&r"(temp)
+	: "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))
+	: "memory");
+}
+
+/*
+ * save IRQ state
+ */
+static inline long arch_local_save_flags(void)
+{
+	unsigned long temp;
+
+	__asm__ __volatile__(
+	"	lr  %0, [status32]	\n"
+	: "=&r"(temp)
+	:
+	: "memory");
+
+	return temp;
+}
+
+/*
+ * Query IRQ state
+ */
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (STATUS_E1_MASK
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+			| STATUS_E2_MASK
+#endif
+		));
+}
+
+static inline int arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#else
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+.macro TRACE_ASM_IRQ_DISABLE
+	bl	trace_hardirqs_off
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+	bl	trace_hardirqs_on
+.endm
+
+#else
+
+.macro TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro TRACE_ASM_IRQ_ENABLE
+.endm
+
+#endif
+
+.macro IRQ_DISABLE  scratch
+	lr	\scratch, [status32]
+	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+	flag	\scratch
+	TRACE_ASM_IRQ_DISABLE
+.endm
+
+.macro IRQ_ENABLE  scratch
+	TRACE_ASM_IRQ_ENABLE
+	lr	\scratch, [status32]
+	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
+	flag	\scratch
+.endm
+
+#endif	/* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
new file mode 100644
index 0000000000..edf201a699
--- /dev/null
+++ b/arch/arc/include/asm/irqflags.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_ARC_IRQFLAGS_H
+#define __ASM_ARC_IRQFLAGS_H
+
+#ifdef CONFIG_ISA_ARCOMPACT
+#include <asm/irqflags-compact.h>
+#else
+#include <asm/irqflags-arcv2.h>
+#endif
+
+#endif
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h
new file mode 100644
index 0000000000..9d96180797
--- /dev/null
+++ b/arch/arc/include/asm/jump_label.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARC_JUMP_LABEL_H
+#define _ASM_ARC_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+
+#define JUMP_LABEL_NOP_SIZE 4
+
+/*
+ * NOTE about '.balign 4':
+ *
+ * To make atomic update of patched instruction available we need to guarantee
+ * that this instruction doesn't cross L1 cache line boundary.
+ *
+ * As of today we simply align instruction which can be patched by 4 byte using
+ * ".balign 4" directive. In that case patched instruction is aligned with one
+ * 16-bit NOP_S if this is required.
+ * However 'align by 4' directive is much stricter than it actually required.
+ * It's enough that our 32-bit instruction don't cross L1 cache line boundary /
+ * L1 I$ fetch block boundary which can be achieved by using
+ * ".bundle_align_mode" assembler directive. That will save us from adding
+ * useless NOP_S padding in most of the cases.
+ *
+ * TODO: switch to ".bundle_align_mode" directive using whin it will be
+ * supported by ARC toolchain.
+ */
+
+static __always_inline bool arch_static_branch(struct static_key *key,
+					       bool branch)
+{
+	asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"	\n"
+		 "1:							\n"
+		 "nop							\n"
+		 ".pushsection __jump_table, \"aw\"			\n"
+		 ".word 1b, %l[l_yes], %c0				\n"
+		 ".popsection						\n"
+		 : : "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key,
+						    bool branch)
+{
+	asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"	\n"
+		 "1:							\n"
+		 "b %l[l_yes]						\n"
+		 ".pushsection __jump_table, \"aw\"			\n"
+		 ".word 1b, %l[l_yes], %c0				\n"
+		 ".popsection						\n"
+		 : : "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+typedef u32 jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif  /* __ASSEMBLY__ */
+#endif
diff --git a/arch/arc/include/asm/kdebug.h b/arch/arc/include/asm/kdebug.h
new file mode 100644
index 0000000000..f92049d1d3
--- /dev/null
+++ b/arch/arc/include/asm/kdebug.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_KDEBUG_H
+#define _ASM_ARC_KDEBUG_H
+
+enum die_val {
+	DIE_UNUSED,
+	DIE_TRAP,
+	DIE_IERR,
+	DIE_OOPS
+};
+
+#endif
diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h
new file mode 100644
index 0000000000..f9f71b9096
--- /dev/null
+++ b/arch/arc/include/asm/kgdb.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * kgdb support for ARC
+ *
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ARC_KGDB_H__
+#define __ARC_KGDB_H__
+
+#ifdef CONFIG_KGDB
+
+#include <asm/ptrace.h>
+
+/* to ensure compatibility with Linux 2.6.35, we don't implement the get/set
+ * register API yet */
+#undef DBG_MAX_REG_NUM
+
+#define GDB_MAX_REGS		87
+
+#define BREAK_INSTR_SIZE	2
+#define CACHE_FLUSH_IS_SAFE	1
+#define NUMREGBYTES		(GDB_MAX_REGS * 4)
+#define BUFMAX			2048
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	__asm__ __volatile__ ("trap_s	0x4\n");
+}
+
+extern void kgdb_trap(struct pt_regs *regs);
+
+/* This is the numbering of registers according to the GDB. See GDB's
+ * arc-tdep.h for details.
+ *
+ * Registers are ordered for GDB 7.5. It is incompatible with GDB 6.8. */
+enum arc_linux_regnums {
+	_R0		= 0,
+	_R1, _R2, _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13,
+	_R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, _R23, _R24,
+	_R25, _R26,
+	_FP		= 27,
+	__SP		= 28,
+	_R30		= 30,
+	_BLINK		= 31,
+	_LP_COUNT	= 60,
+	_STOP_PC	= 64,
+	_RET		= 64,
+	_LP_START	= 65,
+	_LP_END		= 66,
+	_STATUS32	= 67,
+	_ECR		= 76,
+	_BTA		= 82,
+};
+
+#else
+#define kgdb_trap(regs)
+#endif
+
+#endif	/* __ARC_KGDB_H__ */
diff --git a/arch/arc/include/asm/kmap_types.h b/arch/arc/include/asm/kmap_types.h
new file mode 100644
index 0000000000..fecf7851ec
--- /dev/null
+++ b/arch/arc/include/asm/kmap_types.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+/*
+ * We primarily need to define KM_TYPE_NR here but that in turn
+ * is a function of PGDIR_SIZE etc.
+ * To avoid circular deps issue, put everything in asm/highmem.h
+ */
+#endif
diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h
new file mode 100644
index 0000000000..de1566e32c
--- /dev/null
+++ b/arch/arc/include/asm/kprobes.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ARC_KPROBES_H
+#define _ARC_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
+
+typedef u16 kprobe_opcode_t;
+
+#define UNIMP_S_INSTRUCTION 0x79e0
+#define TRAP_S_2_INSTRUCTION 0x785e
+
+#define MAX_INSN_SIZE   8
+#define MAX_STACK_SIZE  64
+
+struct arch_specific_insn {
+	int is_short;
+	kprobe_opcode_t *t1_addr, *t2_addr;
+	kprobe_opcode_t t1_opcode, t2_opcode;
+};
+
+#define flush_insn_slot(p)  do {  } while (0)
+
+#define kretprobe_blacklist_size    0
+
+struct kprobe;
+
+void arch_remove_kprobe(struct kprobe *p);
+
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data);
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+struct kprobe_ctlblk {
+	unsigned int kprobe_status;
+	struct prev_kprobe prev_kprobe;
+};
+
+int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause);
+void __kretprobe_trampoline(void);
+void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
+#else
+#define trap_is_kprobe(address, regs)
+#endif /* CONFIG_KPROBES */
+
+#endif /* _ARC_KPROBES_H */
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
new file mode 100644
index 0000000000..c9434ff3aa
--- /dev/null
+++ b/arch/arc/include/asm/linkage.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#include <asm/dwarf.h>
+
+#ifdef __ASSEMBLY__
+
+.macro ST2 e, o, off
+#ifdef CONFIG_ARC_HAS_LL64
+	std	\e, [sp, \off]
+#else
+	st	\e, [sp, \off]
+	st	\o, [sp, \off+4]
+#endif
+.endm
+
+.macro LD2 e, o, off
+#ifdef CONFIG_ARC_HAS_LL64
+	ldd	\e, [sp, \off]
+#else
+	ld	\e, [sp, \off]
+	ld	\o, [sp, \off+4]
+#endif
+.endm
+
+#define ASM_NL		 `	/* use '`' to mark new line in macro */
+#define __ALIGN		.align 4
+#define __ALIGN_STR	__stringify(__ALIGN)
+
+/* annotation for data we want in DCCM - if enabled in .config */
+.macro ARCFP_DATA nm
+#ifdef CONFIG_ARC_HAS_DCCM
+	.section .data.arcfp
+#else
+	.section .data
+#endif
+	.global \nm
+.endm
+
+/* annotation for data we want in DCCM - if enabled in .config */
+.macro ARCFP_CODE
+#ifdef CONFIG_ARC_HAS_ICCM
+	.section .text.arcfp, "ax",@progbits
+#else
+	.section .text, "ax",@progbits
+#endif
+.endm
+
+#define ENTRY_CFI(name)		\
+	.globl name ASM_NL	\
+	ALIGN ASM_NL 		\
+	name: ASM_NL		\
+	CFI_STARTPROC ASM_NL
+
+#define END_CFI(name) 		\
+	CFI_ENDPROC ASM_NL	\
+	.size name, .-name
+
+#else	/* !__ASSEMBLY__ */
+
+#ifdef CONFIG_ARC_HAS_ICCM
+#define __arcfp_code __section(".text.arcfp")
+#else
+#define __arcfp_code __section(".text")
+#endif
+
+#ifdef CONFIG_ARC_HAS_DCCM
+#define __arcfp_data __section(".data.arcfp")
+#else
+#define __arcfp_data __section(".data")
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
new file mode 100644
index 0000000000..c4e1970593
--- /dev/null
+++ b/arch/arc/include/asm/mach_desc.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * based on METAG mach/arch.h (which in turn was based on ARM)
+ */
+
+#ifndef _ASM_ARC_MACH_DESC_H_
+#define _ASM_ARC_MACH_DESC_H_
+
+/**
+ * struct machine_desc - Board specific callbacks, called from ARC common code
+ *	Provided by each ARC board using MACHINE_START()/MACHINE_END(), so
+ *	a multi-platform kernel builds with array of such descriptors.
+ *	We extend the early DT scan to also match the DT's "compatible" string
+ *	against the @dt_compat of all such descriptors, and one with highest
+ *	"DT score" is selected as global @machine_desc.
+ *
+ * @name:		Board/SoC name
+ * @dt_compat:		Array of device tree 'compatible' strings
+ * 			(XXX: although only 1st entry is looked at)
+ * @init_early:		Very early callback [called from setup_arch()]
+ * @init_per_cpu:	for each CPU as it is coming up (SMP as well as UP)
+ * 			[(M):init_IRQ(), (o):start_kernel_secondary()]
+ * @init_machine:	arch initcall level callback (e.g. populate static
+ * 			platform devices or parse Devicetree)
+ * @init_late:		Late initcall level callback
+ *
+ */
+struct machine_desc {
+	const char		*name;
+	const char		**dt_compat;
+	void			(*init_early)(void);
+	void			(*init_per_cpu)(unsigned int);
+	void			(*init_machine)(void);
+	void			(*init_late)(void);
+
+};
+
+/*
+ * Current machine - only accessible during boot.
+ */
+extern const struct machine_desc *machine_desc;
+
+/*
+ * Machine type table - also only accessible during boot
+ */
+extern const struct machine_desc __arch_info_begin[], __arch_info_end[];
+
+/*
+ * Set of macros to define architecture features.
+ * This is built into a table by the linker.
+ */
+#define MACHINE_START(_type, _name)			\
+static const struct machine_desc __mach_desc_##_type	\
+__used __section(".arch.info.init") = {			\
+	.name		= _name,
+
+#define MACHINE_END				\
+};
+
+extern const struct machine_desc *setup_machine_fdt(void *dt);
+
+#endif
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
new file mode 100644
index 0000000000..ed9036d4ed
--- /dev/null
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
+ *
+ * MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
+ * This file contains the TLB access registers and commands
+ */
+
+#ifndef _ASM_ARC_MMU_ARCV2_H
+#define _ASM_ARC_MMU_ARCV2_H
+
+/*
+ * TLB Management regs
+ */
+#define ARC_REG_MMU_BCR		0x06f
+
+#ifdef CONFIG_ARC_MMU_V3
+#define ARC_REG_TLBPD0		0x405
+#define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBPD1HI	0	/* Dummy: allows common code */
+#define ARC_REG_TLBINDEX	0x407
+#define ARC_REG_TLBCOMMAND	0x408
+#define ARC_REG_PID		0x409
+#define ARC_REG_SCRATCH_DATA0	0x418
+#else
+#define ARC_REG_TLBPD0		0x460
+#define ARC_REG_TLBPD1		0x461
+#define ARC_REG_TLBPD1HI	0x463
+#define ARC_REG_TLBINDEX	0x464
+#define ARC_REG_TLBCOMMAND	0x465
+#define ARC_REG_PID		0x468
+#define ARC_REG_SCRATCH_DATA0	0x46c
+#endif
+
+/* Bits in MMU PID reg */
+#define __TLB_ENABLE		(1 << 31)
+#define __PROG_ENABLE		(1 << 30)
+#define MMU_ENABLE		(__TLB_ENABLE | __PROG_ENABLE)
+
+/* Bits in TLB Index reg */
+#define TLB_LKUP_ERR		0x80000000
+
+#ifdef CONFIG_ARC_MMU_V3
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR		(TLB_LKUP_ERR | 0x40000000)
+#endif
+
+/*
+ * TLB Commands
+ */
+#define TLBWrite    		0x1
+#define TLBRead     		0x2
+#define TLBGetIndex 		0x3
+#define TLBProbe    		0x4
+#define TLBWriteNI		0x5  /* write JTLB without inv uTLBs */
+#define TLBIVUTLB		0x6  /* explicitly inv uTLBs */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define TLBInsertEntry		0x7
+#define TLBDeleteEntry		0x8
+#endif
+
+/* Masks for actual TLB "PD"s */
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+extern int pae40_exist_but_not_enab(void);
+
+static inline int is_pae40_enabled(void)
+{
+	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
+static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
+{
+	write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
+}
+
+static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
+{
+	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
+#ifdef CONFIG_ISA_ARCV2
+	write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
+#endif
+}
+
+#else
+
+.macro ARC_MMU_REENABLE reg
+	lr \reg, [ARC_REG_PID]
+	or \reg, \reg, MMU_ENABLE
+	sr \reg, [ARC_REG_PID]
+.endm
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
new file mode 100644
index 0000000000..ca427c30f7
--- /dev/null
+++ b/arch/arc/include/asm/mmu.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_MMU_H
+#define _ASM_ARC_MMU_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/threads.h>	/* NR_CPUS */
+
+typedef struct {
+	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
+} mm_context_t;
+
+#endif
+
+#include <asm/mmu-arcv2.h>
+
+#endif
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
new file mode 100644
index 0000000000..dda471f5f0
--- /dev/null
+++ b/arch/arc/include/asm/mmu_context.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * vineetg: May 2011
+ *  -Refactored get_new_mmu_context( ) to only handle live-mm.
+ *   retiring-mm handled in other hooks
+ *
+ * Vineetg: March 25th, 2008: Bug #92690
+ *  -Major rewrite of Core ASID allocation routine get_new_mmu_context
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef _ASM_ARC_MMU_CONTEXT_H
+#define _ASM_ARC_MMU_CONTEXT_H
+
+#include <linux/sched/mm.h>
+
+#include <asm/tlb.h>
+#include <asm-generic/mm_hooks.h>
+
+/*		ARC ASID Management
+ *
+ * MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
+ * context-switch.
+ *
+ * ASID is managed per cpu, so task threads across CPUs can have different
+ * ASID. Global ASID management is needed if hardware supports TLB shootdown
+ * and/or shared TLB across cores, which ARC doesn't.
+ *
+ * Each task is assigned unique ASID, with a simple round-robin allocator
+ * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
+ * over from 0, and TLB is flushed
+ *
+ * A new allocation cycle, post rollover, could potentially reassign an ASID
+ * to a different task. Thus the rule is to refresh the ASID in a new cycle.
+ * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits
+ * serve as cycle/generation indicator and natural 32 bit unsigned math
+ * automagically increments the generation when lower 8 bits rollover.
+ */
+
+#define MM_CTXT_ASID_MASK	0x000000ff /* MMU PID reg :8 bit PID */
+#define MM_CTXT_CYCLE_MASK	(~MM_CTXT_ASID_MASK)
+
+#define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
+#define MM_CTXT_NO_ASID		0UL
+
+#define asid_mm(mm, cpu)	mm->context.asid[cpu]
+#define hw_pid(mm, cpu)		(asid_mm(mm, cpu) & MM_CTXT_ASID_MASK)
+
+DECLARE_PER_CPU(unsigned int, asid_cache);
+#define asid_cpu(cpu)		per_cpu(asid_cache, cpu)
+
+/*
+ * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
+ * Also set the MMU PID register to existing/updated ASID
+ */
+static inline void get_new_mmu_context(struct mm_struct *mm)
+{
+	const unsigned int cpu = smp_processor_id();
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	/*
+	 * Move to new ASID if it was not from current alloc-cycle/generation.
+	 * This is done by ensuring that the generation bits in both mm->ASID
+	 * and cpu's ASID counter are exactly same.
+	 *
+	 * Note: Callers needing new ASID unconditionally, independent of
+	 * 	 generation, e.g. local_flush_tlb_mm() for forking  parent,
+	 * 	 first need to destroy the context, setting it to invalid
+	 * 	 value.
+	 */
+	if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK))
+		goto set_hw;
+
+	/* move to new ASID and handle rollover */
+	if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) {
+
+		local_flush_tlb_all();
+
+		/*
+		 * Above check for rollover of 8 bit ASID in 32 bit container.
+		 * If the container itself wrapped around, set it to a non zero
+		 * "generation" to distinguish from no context
+		 */
+		if (!asid_cpu(cpu))
+			asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE;
+	}
+
+	/* Assign new ASID to tsk */
+	asid_mm(mm, cpu) = asid_cpu(cpu);
+
+set_hw:
+	mmu_setup_asid(mm, hw_pid(mm, cpu));
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Initialize the context related info for a new mm_struct
+ * instance.
+ */
+#define init_new_context init_new_context
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		asid_mm(mm, i) = MM_CTXT_NO_ASID;
+
+	return 0;
+}
+
+#define destroy_context destroy_context
+static inline void destroy_context(struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	/* Needed to elide CONFIG_DEBUG_PREEMPT warning */
+	local_irq_save(flags);
+	asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID;
+	local_irq_restore(flags);
+}
+
+/* Prepare the MMU for task: setup PID reg with allocated ASID
+    If task doesn't have an ASID (never alloc or stolen, get a new ASID)
+*/
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	const int cpu = smp_processor_id();
+
+	/*
+	 * Note that the mm_cpumask is "aggregating" only, we don't clear it
+	 * for the switched-out task, unlike some other arches.
+	 * It is used to enlist cpus for sending TLB flush IPIs and not sending
+	 * it to CPUs where a task once ran-on, could cause stale TLB entry
+	 * re-use, specially for a multi-threaded task.
+	 * e.g. T1 runs on C1, migrates to C3. T2 running on C2 munmaps.
+	 *      For a non-aggregating mm_cpumask, IPI not sent C1, and if T1
+	 *      were to re-migrate to C1, it could access the unmapped region
+	 *      via any existing stale TLB entries.
+	 */
+	cpumask_set_cpu(cpu, mm_cpumask(next));
+
+	mmu_setup_pgd(next, next->pgd);
+
+	get_new_mmu_context(next);
+}
+
+/*
+ * activate_mm defaults (in asm-generic) to switch_mm and is called at the
+ * time of execve() to get a new ASID Note the subtlety here:
+ * get_new_mmu_context() behaves differently here vs. in switch_mm(). Here
+ * it always returns a new ASID, because mm has an unallocated "initial"
+ * value, while in latter, it moves to a new ASID, only if it was
+ * unallocated
+ */
+
+/* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
+ * for retiring-mm. However destroy_context( ) still needs to do that because
+ * between mm_release( ) = >deactive_mm( ) and
+ * mmput => .. => __mmdrop( ) => destroy_context( )
+ * there is a good chance that task gets sched-out/in, making it's ASID valid
+ * again (this teased me for a whole day).
+ */
+
+#include <asm-generic/mmu_context.h>
+
+#endif /* __ASM_ARC_MMU_CONTEXT_H */
diff --git a/arch/arc/include/asm/mmzone.h b/arch/arc/include/asm/mmzone.h
new file mode 100644
index 0000000000..b86b9d1e54
--- /dev/null
+++ b/arch/arc/include/asm/mmzone.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_MMZONE_H
+#define _ASM_ARC_MMZONE_H
+
+#ifdef CONFIG_DISCONTIGMEM
+
+extern struct pglist_data node_data[];
+#define NODE_DATA(nid) (&node_data[nid])
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+	int is_end_low = 1;
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40))
+		is_end_low = pfn <= virt_to_pfn(0xFFFFFFFFUL);
+
+	/*
+	 * node 0: lowmem:             0x8000_0000   to 0xFFFF_FFFF
+	 * node 1: HIGHMEM w/o  PAE40: 0x0           to 0x7FFF_FFFF
+	 *         HIGHMEM with PAE40: 0x1_0000_0000 to ...
+	 */
+	if (pfn >= ARCH_PFN_OFFSET && is_end_low)
+		return 0;
+
+	return 1;
+}
+
+static inline int pfn_valid(unsigned long pfn)
+{
+	int nid = pfn_to_nid(pfn);
+
+	return (pfn <= node_end_pfn(nid));
+}
+#endif /* CONFIG_DISCONTIGMEM  */
+
+#endif
diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h
new file mode 100644
index 0000000000..f534a1fef0
--- /dev/null
+++ b/arch/arc/include/asm/module.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef _ASM_ARC_MODULE_H
+#define _ASM_ARC_MODULE_H
+
+#include <asm-generic/module.h>
+
+struct mod_arch_specific {
+#ifdef CONFIG_ARC_DW2_UNWIND
+	void *unw_info;
+	int unw_sec_idx;
+#endif
+	const char *secstr;
+};
+
+#endif /* _ASM_ARC_MODULE_H */
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
new file mode 100644
index 0000000000..9a62e1d879
--- /dev/null
+++ b/arch/arc/include/asm/page.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+#ifndef __ASM_ARC_PAGE_H
+#define __ASM_ARC_PAGE_H
+
+#include <uapi/asm/page.h>
+
+#ifdef CONFIG_ARC_HAS_PAE40
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	40
+#define PAGE_MASK_PHYS			(0xff00000000ull | PAGE_MASK)
+
+#else /* CONFIG_ARC_HAS_PAE40 */
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	32
+#define PAGE_MASK_PHYS			PAGE_MASK
+
+#endif /* CONFIG_ARC_HAS_PAE40 */
+
+#ifndef __ASSEMBLY__
+
+#define clear_page(paddr)		memset((paddr), 0, PAGE_SIZE)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+#define copy_page(to, from)		memcpy((to), (from), PAGE_SIZE)
+
+struct vm_area_struct;
+struct page;
+
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
+
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long u_vaddr, struct vm_area_struct *vma);
+void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
+
+typedef struct {
+	unsigned long pgd;
+} pgd_t;
+
+#define pgd_val(x)	((x).pgd)
+#define __pgd(x)	((pgd_t) { (x) })
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+typedef struct {
+	unsigned long pud;
+} pud_t;
+
+#define pud_val(x)      	((x).pud)
+#define __pud(x)        	((pud_t) { (x) })
+
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+typedef struct {
+	unsigned long pmd;
+} pmd_t;
+
+#define pmd_val(x)	((x).pmd)
+#define __pmd(x)	((pmd_t) { (x) })
+
+#endif
+
+typedef struct {
+#ifdef CONFIG_ARC_HAS_PAE40
+	unsigned long long pte;
+#else
+	unsigned long pte;
+#endif
+} pte_t;
+
+#define pte_val(x)	((x).pte)
+#define __pte(x)	((pte_t) { (x) })
+
+typedef struct {
+	unsigned long pgprot;
+} pgprot_t;
+
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+#define pte_pgprot(x)	__pgprot(pte_val(x))
+
+typedef struct page *pgtable_t;
+
+/*
+ * Use virt_to_pfn with caution:
+ * If used in pte or paddr related macros, it could cause truncation
+ * in PAE40 builds
+ * As a rule of thumb, only use it in helpers starting with virt_
+ * You have been warned !
+ */
+#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
+
+/*
+ * When HIGHMEM is enabled we have holes in the memory map so we need
+ * pfn_valid() that takes into account the actual extents of the physical
+ * memory
+ */
+#ifdef CONFIG_HIGHMEM
+
+extern unsigned long arch_pfn_offset;
+#define ARCH_PFN_OFFSET		arch_pfn_offset
+
+extern int pfn_valid(unsigned long pfn);
+#define pfn_valid		pfn_valid
+
+#else /* CONFIG_HIGHMEM */
+
+#define ARCH_PFN_OFFSET		virt_to_pfn(CONFIG_LINUX_RAM_BASE)
+#define pfn_valid(pfn)		(((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
+
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * __pa, __va, virt_to_page (ALERT: deprecated, don't use them)
+ *
+ * These macros have historically been misnamed
+ * virt here means link-address/program-address as embedded in object code.
+ * And for ARC, link-addr = physical address
+ */
+#define __pa(vaddr)  		((unsigned long)(vaddr))
+#define __va(paddr)  		((void *)((unsigned long)(paddr)))
+
+#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
+#define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))
+
+/* Default Permissions for stack/heaps pages (Non Executable) */
+#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
+
+#define WANT_PAGE_VIRTUAL   1
+
+#include <asm-generic/memory_model.h>   /* page_to_pfn, pfn_to_page */
+#include <asm-generic/getorder.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pci.h b/arch/arc/include/asm/pci.h
new file mode 100644
index 0000000000..a6858e1117
--- /dev/null
+++ b/arch/arc/include/asm/pci.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2016 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_PCI_H
+#define _ASM_ARC_PCI_H
+
+#ifdef __KERNEL__
+#include <linux/ioport.h>
+
+#define PCIBIOS_MIN_IO 0x100
+#define PCIBIOS_MIN_MEM 0x100000
+
+#define pcibios_assign_all_busses()	1
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_ARC_PCI_H */
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
new file mode 100644
index 0000000000..4c919c0f4b
--- /dev/null
+++ b/arch/arc/include/asm/perf_event.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Linux performance counter support for ARC
+ *
+ * Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_PERF_EVENT_H
+#define __ASM_PERF_EVENT_H
+
+/* Max number of counters that PCT block may ever have */
+#define ARC_PERF_MAX_COUNTERS	32
+
+#define ARC_REG_CC_BUILD	0xF6
+#define ARC_REG_CC_INDEX	0x240
+#define ARC_REG_CC_NAME0	0x241
+#define ARC_REG_CC_NAME1	0x242
+
+#define ARC_REG_PCT_BUILD	0xF5
+#define ARC_REG_PCT_COUNTL	0x250
+#define ARC_REG_PCT_COUNTH	0x251
+#define ARC_REG_PCT_SNAPL	0x252
+#define ARC_REG_PCT_SNAPH	0x253
+#define ARC_REG_PCT_CONFIG	0x254
+#define ARC_REG_PCT_CONTROL	0x255
+#define ARC_REG_PCT_INDEX	0x256
+#define ARC_REG_PCT_INT_CNTL	0x25C
+#define ARC_REG_PCT_INT_CNTH	0x25D
+#define ARC_REG_PCT_INT_CTRL	0x25E
+#define ARC_REG_PCT_INT_ACT	0x25F
+
+#define ARC_REG_PCT_CONFIG_USER	(1 << 18)	/* count in user mode */
+#define ARC_REG_PCT_CONFIG_KERN	(1 << 19)	/* count in kernel mode */
+
+#define ARC_REG_PCT_CONTROL_CC	(1 << 16)	/* clear counts */
+#define ARC_REG_PCT_CONTROL_SN	(1 << 17)	/* snapshot */
+
+struct arc_reg_pct_build {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int m:8, c:8, r:5, i:1, s:2, v:8;
+#else
+	unsigned int v:8, s:2, i:1, r:5, c:8, m:8;
+#endif
+};
+
+struct arc_reg_cc_build {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int c:16, r:8, v:8;
+#else
+	unsigned int v:8, r:8, c:16;
+#endif
+};
+
+#define PERF_COUNT_ARC_DCLM	(PERF_COUNT_HW_MAX + 0)
+#define PERF_COUNT_ARC_DCSM	(PERF_COUNT_HW_MAX + 1)
+#define PERF_COUNT_ARC_ICM	(PERF_COUNT_HW_MAX + 2)
+#define PERF_COUNT_ARC_BPOK	(PERF_COUNT_HW_MAX + 3)
+#define PERF_COUNT_ARC_EDTLB	(PERF_COUNT_HW_MAX + 4)
+#define PERF_COUNT_ARC_EITLB	(PERF_COUNT_HW_MAX + 5)
+#define PERF_COUNT_ARC_LDC	(PERF_COUNT_HW_MAX + 6)
+#define PERF_COUNT_ARC_STC	(PERF_COUNT_HW_MAX + 7)
+
+#define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 8)
+
+#endif /* __ASM_PERF_EVENT_H */
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
new file mode 100644
index 0000000000..096b8ef58e
--- /dev/null
+++ b/arch/arc/include/asm/pgalloc.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * vineetg: June 2011
+ *  -"/proc/meminfo | grep PageTables" kept on increasing
+ *   Recently added pgtable dtor was not getting called.
+ *
+ * vineetg: May 2011
+ *  -Variable pg-sz means that Page Tables could be variable sized themselves
+ *    So calculate it based on addr traversal split [pgd-bits:pte-bits:xxx]
+ *  -Page Table size capped to max 1 to save memory - hence verified.
+ *  -Since these deal with constants, gcc compile-time optimizes them.
+ *
+ * vineetg: Nov 2010
+ *  -Added pgtable ctor/dtor used for pgtable mem accounting
+ *
+ * vineetg: April 2010
+ *  -Switched pgtable_t from being struct page * to unsigned long
+ *      =Needed so that Page Table allocator (pte_alloc_one) is not forced to
+ *       deal with struct page. That way in future we can make it allocate
+ *       multiple PG Tbls in one Page Frame
+ *      =sweet side effect is avoiding calls to ugly page_address( ) from the
+ *       pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate)
+ *
+ *  Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef _ASM_ARC_PGALLOC_H
+#define _ASM_ARC_PGALLOC_H
+
+#include <linux/mm.h>
+#include <linux/log2.h>
+#include <asm-generic/pgalloc.h>
+
+static inline void
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+{
+	/*
+	 * The cast to long below is OK in 32-bit PAE40 regime with long long pte
+	 * Despite "wider" pte, the pte table needs to be in non-PAE low memory
+	 * as all higher levels can only hold long pointers.
+	 *
+	 * The cast itself is needed given simplistic definition of set_pmd()
+	 */
+	set_pmd(pmd, __pmd((unsigned long)pte));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
+{
+	set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
+
+	if (ret) {
+		int num, num2;
+		num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
+		memzero(ret, num * sizeof(pgd_t));
+
+		num2 = VMALLOC_SIZE / PGDIR_SIZE;
+		memcpy(ret + num, swapper_pg_dir + num, num2 * sizeof(pgd_t));
+
+		memzero(ret + num + num2,
+			       (PTRS_PER_PGD - num - num2) * sizeof(pgd_t));
+
+	}
+	return ret;
+}
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
+{
+	set_p4d(p4dp, __p4d((unsigned long)pudp));
+}
+
+#define __pud_free_tlb(tlb, pmd, addr)  pud_free((tlb)->mm, pmd)
+
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
+{
+	set_pud(pudp, __pud((unsigned long)pmdp));
+}
+
+#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
+
+#endif
+
+#define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
+
+#endif /* _ASM_ARC_PGALLOC_H */
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
new file mode 100644
index 0000000000..183d23bc1e
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
+ * There correspond to the corresponding bits in the TLB
+ */
+
+#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
+#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
+
+#ifdef CONFIG_ARC_CACHE_PAGES
+#define _PAGE_CACHEABLE		(1 << 0)  /* Cached (H) */
+#else
+#define _PAGE_CACHEABLE		0
+#endif
+
+#define _PAGE_EXECUTE		(1 << 1)  /* User Execute  (H) */
+#define _PAGE_WRITE		(1 << 2)  /* User Write    (H) */
+#define _PAGE_READ		(1 << 3)  /* User Read     (H) */
+#define _PAGE_ACCESSED		(1 << 4)  /* Accessed      (s) */
+#define _PAGE_DIRTY		(1 << 5)  /* Modified      (s) */
+#define _PAGE_SPECIAL		(1 << 6)
+#define _PAGE_GLOBAL		(1 << 8)  /* ASID agnostic (H) */
+#define _PAGE_PRESENT		(1 << 9)  /* PTE/TLB Valid (H) */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define _PAGE_HW_SZ		(1 << 10)  /* Normal/super (H) */
+#else
+#define _PAGE_HW_SZ		0
+#endif
+
+/* Defaults for every user page */
+#define ___DEF		(_PAGE_PRESENT | _PAGE_CACHEABLE)
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+							   _PAGE_SPECIAL)
+
+/* More Abbrevaited helpers */
+#define PAGE_U_NONE     __pgprot(___DEF)
+#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
+#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
+#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
+#define PAGE_U_X_W_R    __pgprot(___DEF \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+#define PAGE_KERNEL     __pgprot(___DEF | _PAGE_GLOBAL \
+				| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+
+#define PAGE_SHARED	PAGE_U_W_R
+
+#define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
+
+/*
+ * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
+ *
+ * Certain cases have 1:1 mapping
+ *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
+ *       which directly corresponds to  PAGE_U_X_R
+ *
+ * Other rules which cause the divergence from 1:1 mapping
+ *
+ *  1. Although ARC700 can do exclusive execute/write protection (meaning R
+ *     can be tracked independet of X/W unlike some other CPUs), still to
+ *     keep things consistent with other archs:
+ *      -Write implies Read:   W => R
+ *      -Execute implies Read: X => R
+ *
+ *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
+ *     This is to enable COW mechanism
+ */
+	/* xwr */
+#define __P000  PAGE_U_NONE
+#define __P001  PAGE_U_R
+#define __P010  PAGE_U_R	/* Pvt-W => !W */
+#define __P011  PAGE_U_R	/* Pvt-W => !W */
+#define __P100  PAGE_U_X_R	/* X => R */
+#define __P101  PAGE_U_X_R
+#define __P110  PAGE_U_X_R	/* Pvt-W => !W and X => R */
+#define __P111  PAGE_U_X_R	/* Pvt-W => !W */
+
+#define __S000  PAGE_U_NONE
+#define __S001  PAGE_U_R
+#define __S010  PAGE_U_W_R	/* W => R */
+#define __S011  PAGE_U_W_R
+#define __S100  PAGE_U_X_R	/* X => R */
+#define __S101  PAGE_U_X_R
+#define __S110  PAGE_U_X_W_R	/* X => R */
+#define __S111  PAGE_U_X_W_R
+
+#ifndef __ASSEMBLY__
+
+#define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
+#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
+#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
+
+#define PTE_BIT_FUNC(fn, op) \
+	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+
+PTE_BIT_FUNC(mknotpresent,     &= ~(_PAGE_PRESENT));
+PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
+PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
+PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
+PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
+PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
+PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval)
+{
+	set_pte(ptep, pteval);
+}
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t *ptep);
+
+/* Encode swap {type,off} tuple into PTE
+ * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
+ * PAGE_PRESENT is zero in a PTE holding swap "identifier"
+ */
+#define __swp_entry(type, off)		((swp_entry_t) \
+					{ ((type) & 0x1f) | ((off) << 13) })
+
+/* Decode a PTE containing swap "identifier "into constituents */
+#define __swp_type(pte_lookalike)	(((pte_lookalike).val) & 0x1f)
+#define __swp_offset(pte_lookalike)	((pte_lookalike).val >> 13)
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+#define kern_addr_valid(addr)	(1)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <asm/hugepage.h>
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
new file mode 100644
index 0000000000..8084ef2f64
--- /dev/null
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Helpers for implemenintg paging levels
+ */
+
+#ifndef _ASM_ARC_PGTABLE_LEVELS_H
+#define _ASM_ARC_PGTABLE_LEVELS_H
+
+#if CONFIG_PGTABLE_LEVELS == 2
+
+/*
+ * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
+ *
+ * [31]            32 bit virtual address              [0]
+ * -------------------------------------------------------
+ * |               | <---------- PGDIR_SHIFT ----------> |
+ * |               |                | <-- PAGE_SHIFT --> |
+ * -------------------------------------------------------
+ *       |                  |                |
+ *       |                  |                --> off in page frame
+ *       |                  ---> index into Page Table
+ *       ----> index into Page Directory
+ *
+ * Given software walk, the vaddr split is arbitrary set to 11:8:13
+ * However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
+ * super page size.
+ */
+
+#if defined(CONFIG_ARC_HUGEPAGE_16M)
+#define PGDIR_SHIFT		24
+#elif defined(CONFIG_ARC_HUGEPAGE_2M)
+#define PGDIR_SHIFT		21
+#else
+/*
+ * No Super page case
+ * Default value provides 11:8:13 (8K), 10:10:12 (4K)
+ * Limits imposed by pgtable_t only PAGE_SIZE long
+ * (so 4K page can only have 1K entries: or 10 bits)
+ */
+#ifdef CONFIG_ARC_PAGE_SIZE_4K
+#define PGDIR_SHIFT		22
+#else
+#define PGDIR_SHIFT		21
+#endif
+
+#endif
+
+#else /* CONFIG_PGTABLE_LEVELS != 2 */
+
+/*
+ * A default 3 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <7> : <8> : <13>
+ * A default 4 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
+ */
+#define PGDIR_SHIFT		28
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SHIFT		25
+#endif
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SHIFT		21
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS */
+
+#define PGDIR_SIZE		BIT(PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE - 1))
+#define PTRS_PER_PGD		BIT(32 - PGDIR_SHIFT)
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SIZE		BIT(PUD_SHIFT)
+#define PUD_MASK		(~(PUD_SIZE - 1))
+#define PTRS_PER_PUD		BIT(PGDIR_SHIFT - PUD_SHIFT)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SIZE		BIT(PMD_SHIFT)
+#define PMD_MASK		(~(PMD_SIZE - 1))
+#define PTRS_PER_PMD		BIT(PUD_SHIFT - PMD_SHIFT)
+#endif
+
+#define PTRS_PER_PTE		BIT(PMD_SHIFT - PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/pgtable-nop4d.h>
+#elif CONFIG_PGTABLE_LEVELS > 2
+#include <asm-generic/pgtable-nopud.h>
+#else
+#include <asm-generic/pgtable-nopmd.h>
+#endif
+
+/*
+ * 1st level paging: pgd
+ */
+#define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
+#define pgd_offset(mm, addr)	(((mm)->pgd) + pgd_index(addr))
+#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
+#define pgd_ERROR(e) \
+	pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+/* In 4 level paging, p4d_* macros work on pgd */
+#define p4d_none(x)		(!p4d_val(x))
+#define p4d_bad(x)		((p4d_val(x) & ~PAGE_MASK))
+#define p4d_present(x)		(p4d_val(x))
+#define p4d_clear(xp)		do { p4d_val(*(xp)) = 0; } while (0)
+#define p4d_pgtable(p4d)	((pud_t *)(p4d_val(p4d) & PAGE_MASK))
+#define p4d_page(p4d)		virt_to_page(p4d_pgtable(p4d))
+#define set_p4d(p4dp, p4d)	(*(p4dp) = p4d)
+
+/*
+ * 2nd level paging: pud
+ */
+#define pud_ERROR(e) \
+	pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+/*
+ * In 3 level paging, pud_* macros work on pgd
+ * In 4 level paging, pud_* macros work on pud
+ */
+#define pud_none(x)		(!pud_val(x))
+#define pud_bad(x)		((pud_val(x) & ~PAGE_MASK))
+#define pud_present(x)		(pud_val(x))
+#define pud_clear(xp)		do { pud_val(*(xp)) = 0; } while (0)
+#define pud_pgtable(pud)	((pmd_t *)(pud_val(pud) & PAGE_MASK))
+#define pud_page(pud)		virt_to_page(pud_pgtable(pud))
+#define set_pud(pudp, pud)	(*(pudp) = pud)
+
+/*
+ * 3rd level paging: pmd
+ */
+#define pmd_ERROR(e) \
+	pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+
+#define pmd_pfn(pmd)		((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+#endif
+
+/*
+ * Due to the strange way generic pgtable level folding works, the pmd_* macros
+ *  - are valid even for 2 levels (which supposedly only has pgd - pte)
+ *  - behave differently for 2 vs. 3
+ * In 2  level paging        (pgd -> pte), pmd_* macros work on pgd
+ * In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
+ */
+#define pmd_none(x)		(!pmd_val(x))
+#define pmd_bad(x)		((pmd_val(x) & ~PAGE_MASK))
+#define pmd_present(x)		(pmd_val(x))
+#define pmd_clear(xp)		do { pmd_val(*(xp)) = 0; } while (0)
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & PAGE_MASK)
+#define pmd_page(pmd)		virt_to_page(pmd_page_vaddr(pmd))
+#define set_pmd(pmdp, pmd)	(*(pmdp) = pmd)
+#define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
+
+/*
+ * 4th level paging: pte
+ */
+#define pte_ERROR(e) \
+	pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+
+#define pte_none(x)		(!pte_val(x))
+#define pte_present(x)		(pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm,addr,ptep)	set_pte_at(mm, addr, ptep, __pte(0))
+#define pte_page(pte)		pfn_to_page(pte_pfn(pte))
+#define set_pte(ptep, pte)	((*(ptep)) = (pte))
+#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn, prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
+#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
+
+#ifdef CONFIG_ISA_ARCV2
+#define pmd_leaf(x)		(pmd_val(x) & _PAGE_HW_SZ)
+#endif
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
new file mode 100644
index 0000000000..4cf45a99fd
--- /dev/null
+++ b/arch/arc/include/asm/pgtable.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_PGTABLE_H
+#define _ASM_ARC_PGTABLE_H
+
+#include <linux/bits.h>
+
+#include <asm/pgtable-levels.h>
+#include <asm/pgtable-bits-arcv2.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+
+/*
+ * Number of entries a user land program use.
+ * TASK_SIZE is the maximum vaddr that can be used by a userland program.
+ */
+#define	USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+
+#ifndef __ASSEMBLY__
+
+extern char empty_zero_page[PAGE_SIZE];
+#define ZERO_PAGE(vaddr)	(virt_to_page(empty_zero_page))
+
+extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
+
+/* to cope with aliasing VIPT cache */
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
new file mode 100644
index 0000000000..54db9d7bb5
--- /dev/null
+++ b/arch/arc/include/asm/processor.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * vineetg: March 2009
+ *  -Implemented task_pt_regs( )
+ *
+ * Amit Bhor, Sameer Dhavale, Ashwin Chaugule: Codito Technologies 2004
+ */
+
+#ifndef __ASM_ARC_PROCESSOR_H
+#define __ASM_ARC_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/ptrace.h>
+#include <asm/dsp.h>
+#include <asm/fpu.h>
+
+/* Arch specific stuff which needs to be saved per task.
+ * However these items are not so important so as to earn a place in
+ * struct thread_info
+ */
+struct thread_struct {
+	unsigned long ksp;	/* kernel mode stack pointer */
+	unsigned long callee_reg;	/* pointer to callee regs */
+	unsigned long fault_address;	/* dbls as brkpt holder as well */
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	struct dsp_callee_regs dsp;
+#endif
+#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
+	struct arc_fpu fpu;
+#endif
+};
+
+#define INIT_THREAD  {                          \
+	.ksp = sizeof(init_stack) + (unsigned long) init_stack, \
+}
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+
+#define task_pt_regs(p) \
+	((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1)
+
+/* Free all resources held by a thread */
+#define release_thread(thread) do { } while (0)
+
+/*
+ * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
+ * get optimised away by gcc
+ */
+#define cpu_relax()		barrier()
+
+#define KSTK_EIP(tsk)   (task_pt_regs(tsk)->ret)
+#define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
+
+/*
+ * Where about of Task's sp, fp, blink when it was last seen in kernel mode.
+ * Look in process.c for details of kernel stack layout
+ */
+#define TSK_K_ESP(tsk)		(tsk->thread.ksp)
+
+#define TSK_K_REG(tsk, off)	(*((unsigned long *)(TSK_K_ESP(tsk) + \
+					sizeof(struct callee_regs) + off)))
+
+#define TSK_K_BLINK(tsk)	TSK_K_REG(tsk, 4)
+#define TSK_K_FP(tsk)		TSK_K_REG(tsk, 0)
+
+extern void start_thread(struct pt_regs * regs, unsigned long pc,
+			 unsigned long usp);
+
+extern unsigned int __get_wchan(struct task_struct *p);
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Default System Memory Map on ARC
+ *
+ * ---------------------------- (lower 2G, Translated) -------------------------
+ * 0x0000_0000		0x5FFF_FFFF	(user vaddr: TASK_SIZE)
+ * 0x6000_0000		0x6FFF_FFFF	(reserved gutter between U/K)
+ * 0x7000_0000		0x7FFF_FFFF	(kvaddr: vmalloc/modules/pkmap..)
+ *
+ * PAGE_OFFSET ---------------- (Upper 2G, Untranslated) -----------------------
+ * 0x8000_0000		0xBFFF_FFFF	(kernel direct mapped)
+ * 0xC000_0000		0xFFFF_FFFF	(peripheral uncached space)
+ * -----------------------------------------------------------------------------
+ */
+
+#define TASK_SIZE	0x60000000
+
+#define VMALLOC_START	(PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
+
+/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
+#define VMALLOC_SIZE	((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
+
+#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
+
+#define USER_KERNEL_GUTTER    (VMALLOC_START - TASK_SIZE)
+
+#define STACK_TOP       TASK_SIZE
+#define STACK_TOP_MAX   STACK_TOP
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE      (TASK_SIZE / 3)
+
+#endif /* __ASM_ARC_PROCESSOR_H */
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
new file mode 100644
index 0000000000..cca8d6583e
--- /dev/null
+++ b/arch/arc/include/asm/ptrace.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+#ifndef __ASM_ARC_PTRACE_H
+#define __ASM_ARC_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+
+#ifndef __ASSEMBLY__
+
+/* THE pt_regs: Defines how regs are saved during entry into kernel */
+
+#ifdef CONFIG_ISA_ARCOMPACT
+struct pt_regs {
+
+	/* Real registers */
+	unsigned long bta;	/* bta_l1, bta_l2, erbta */
+
+	unsigned long lp_start, lp_end, lp_count;
+
+	unsigned long status32;	/* status32_l1, status32_l2, erstatus */
+	unsigned long ret;	/* ilink1, ilink2 or eret */
+	unsigned long blink;
+	unsigned long fp;
+	unsigned long r26;	/* gp */
+
+	unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
+
+	unsigned long sp;	/* User/Kernel depending on where we came from */
+	unsigned long orig_r0;
+
+	/*
+	 * To distinguish bet excp, syscall, irq
+	 * For traps and exceptions, Exception Cause Register.
+	 * 	ECR: <00> <VV> <CC> <PP>
+	 * 	Last word used by Linux for extra state mgmt (syscall-restart)
+	 * For interrupts, use artificial ECR values to note current prio-level
+	 */
+	union {
+		struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+			unsigned long state:8, ecr_vec:8,
+				      ecr_cause:8, ecr_param:8;
+#else
+			unsigned long ecr_param:8, ecr_cause:8,
+				      ecr_vec:8, state:8;
+#endif
+		};
+		unsigned long event;
+	};
+
+	unsigned long user_r25;
+};
+#else
+
+struct pt_regs {
+
+	unsigned long orig_r0;
+
+	union {
+		struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+			unsigned long state:8, ecr_vec:8,
+				      ecr_cause:8, ecr_param:8;
+#else
+			unsigned long ecr_param:8, ecr_cause:8,
+				      ecr_vec:8, state:8;
+#endif
+		};
+		unsigned long event;
+	};
+
+	unsigned long bta;	/* bta_l1, bta_l2, erbta */
+
+	unsigned long user_r25;
+
+	unsigned long r26;	/* gp */
+	unsigned long fp;
+	unsigned long sp;	/* user/kernel sp depending on where we came from  */
+
+	unsigned long r12, r30;
+
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	unsigned long r58, r59;	/* ACCL/ACCH used by FPU / DSP MPY */
+#endif
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	unsigned long DSP_CTRL;
+#endif
+
+	/*------- Below list auto saved by h/w -----------*/
+	unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
+
+	unsigned long blink;
+	unsigned long lp_end, lp_start, lp_count;
+
+	unsigned long ei, ldi, jli;
+
+	unsigned long ret;
+	unsigned long status32;
+};
+
+#endif
+
+/* Callee saved registers - need to be saved only when you are scheduled out */
+
+struct callee_regs {
+	unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
+};
+
+#define instruction_pointer(regs)	((regs)->ret)
+#define profile_pc(regs)		instruction_pointer(regs)
+
+/* return 1 if user mode or 0 if kernel mode */
+#define user_mode(regs) (regs->status32 & STATUS_U_MASK)
+
+#define user_stack_pointer(regs)\
+({  unsigned int sp;		\
+	if (user_mode(regs))	\
+		sp = (regs)->sp;\
+	else			\
+		sp = -1;	\
+	sp;			\
+})
+
+/* return 1 if PC in delay slot */
+#define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK)
+
+#define in_syscall(regs)    ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param)
+#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param)
+
+#define STATE_SCALL_RESTARTED	0x01
+
+#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED)
+#define syscall_restartable(reg) !(reg->state &  STATE_SCALL_RESTARTED)
+
+#define current_pt_regs()					\
+({								\
+	/* open-coded current_thread_info() */			\
+	register unsigned long sp asm ("sp");			\
+	unsigned long pg_start = (sp & ~(THREAD_SIZE - 1));	\
+	(struct pt_regs *)(pg_start + THREAD_SIZE) - 1;	\
+})
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return (long)regs->r0;
+}
+
+static inline void instruction_pointer_set(struct pt_regs *regs,
+					   unsigned long val)
+{
+	instruction_pointer(regs) = val;
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_PTRACE_H */
diff --git a/arch/arc/include/asm/sections.h b/arch/arc/include/asm/sections.h
new file mode 100644
index 0000000000..860b4fd67a
--- /dev/null
+++ b/arch/arc/include/asm/sections.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_SECTIONS_H
+#define _ASM_ARC_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char __arc_dccm_base[];
+
+#endif
diff --git a/arch/arc/include/asm/segment.h b/arch/arc/include/asm/segment.h
new file mode 100644
index 0000000000..871f8ab11b
--- /dev/null
+++ b/arch/arc/include/asm/segment.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASMARC_SEGMENT_H
+#define __ASMARC_SEGMENT_H
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long mm_segment_t;
+
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+
+#define KERNEL_DS		MAKE_MM_SEG(0)
+#define USER_DS			MAKE_MM_SEG(TASK_SIZE)
+#define uaccess_kernel()	(get_fs() == KERNEL_DS)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASMARC_SEGMENT_H */
diff --git a/arch/arc/include/asm/serial.h b/arch/arc/include/asm/serial.h
new file mode 100644
index 0000000000..83062c8b97
--- /dev/null
+++ b/arch/arc/include/asm/serial.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_SERIAL_H
+#define _ASM_ARC_SERIAL_H
+
+/*
+ * early 8250 (now earlycon) requires BASE_BAUD to be defined in this header.
+ * However to still determine it dynamically (for multi-platform images)
+ * we do this in a helper by parsing the FDT early
+ */
+
+extern unsigned int __init arc_early_base_baud(void);
+
+#define BASE_BAUD	arc_early_base_baud()
+
+#endif /* _ASM_ARC_SERIAL_H */
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
new file mode 100644
index 0000000000..028a8cf762
--- /dev/null
+++ b/arch/arc/include/asm/setup.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+#ifndef __ASM_ARC_SETUP_H
+#define __ASM_ARC_SETUP_H
+
+
+#include <linux/types.h>
+#include <uapi/asm/setup.h>
+
+#define COMMAND_LINE_SIZE 256
+
+/*
+ * Data structure to map a ID to string
+ * Used a lot for bootup reporting of hardware diversity
+ */
+struct id_to_str {
+	int id;
+	const char *str;
+};
+
+extern int root_mountflags, end_mem;
+
+void setup_processor(void);
+void __init setup_arch_memory(void);
+long __init arc_get_mem_sz(void);
+
+/* Helpers used in arc_*_mumbojumbo routines */
+#define IS_AVAIL1(v, s)		((v) ? s : "")
+#define IS_DISABLED_RUN(v)	((v) ? "" : "(disabled) ")
+#define IS_USED_RUN(v)		((v) ? "" : "(not used) ")
+#define IS_USED_CFG(cfg)	IS_USED_RUN(IS_ENABLED(cfg))
+#define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
+#define IS_AVAIL3(v, v2, s)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
+
+extern void arc_mmu_init(void);
+extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_mmu_bcr(void);
+
+extern void arc_cache_init(void);
+extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_cache_bcr(void);
+
+#endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/shmparam.h b/arch/arc/include/asm/shmparam.h
new file mode 100644
index 0000000000..8b0251464f
--- /dev/null
+++ b/arch/arc/include/asm/shmparam.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ARC_ASM_SHMPARAM_H
+#define __ARC_ASM_SHMPARAM_H
+
+/* Handle upto 2 cache bins */
+#define	SHMLBA	(2 * PAGE_SIZE)
+
+/* Enforce SHMLBA in shmat */
+#define __ARCH_FORCE_SHMLBA
+
+#endif
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
new file mode 100644
index 0000000000..d856491606
--- /dev/null
+++ b/arch/arc/include/asm/smp.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_ARC_SMP_H
+#define __ASM_ARC_SMP_H
+
+#ifdef CONFIG_SMP
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+/* including cpumask.h leads to cyclic deps hence this Forward declaration */
+struct cpumask;
+
+/*
+ * APIs provided by arch SMP code to generic code
+ */
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+/*
+ * APIs provided by arch SMP code to rest of arch code
+ */
+extern void __init smp_init_cpus(void);
+extern void first_lines_of_secondary(void);
+extern const char *arc_platform_smp_cpuinfo(void);
+
+/*
+ * API expected BY platform smp code (FROM arch smp code)
+ *
+ * smp_ipi_irq_setup:
+ *	Takes @cpu and @hwirq to which the arch-common ISR is hooked up
+ */
+extern int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq);
+
+/*
+ * struct plat_smp_ops	- SMP callbacks provided by platform to ARC SMP
+ *
+ * @info:		SoC SMP specific info for /proc/cpuinfo etc
+ * @init_early_smp:	A SMP specific h/w block can init itself
+ * 			Could be common across platforms so not covered by
+ * 			mach_desc->init_early()
+ * @init_per_cpu:	Called for each core so SMP h/w block driver can do
+ * 			any needed setup per cpu (e.g. IPI request)
+ * @cpu_kick:		For Master to kickstart a cpu (optionally at a PC)
+ * @ipi_send:		To send IPI to a @cpu
+ * @ips_clear:		To clear IPI received at @irq
+ */
+struct plat_smp_ops {
+	const char 	*info;
+	void		(*init_early_smp)(void);
+	void		(*init_per_cpu)(int cpu);
+	void		(*cpu_kick)(int cpu, unsigned long pc);
+	void		(*ipi_send)(int cpu);
+	void		(*ipi_clear)(int irq);
+};
+
+/* TBD: stop exporting it for direct population by platform */
+extern struct plat_smp_ops  plat_smp_ops;
+
+#else /* CONFIG_SMP */
+
+static inline void smp_init_cpus(void) {}
+static inline const char *arc_platform_smp_cpuinfo(void)
+{
+	return "";
+}
+
+#endif  /* !CONFIG_SMP */
+
+/*
+ * ARC700 doesn't support atomic Read-Modify-Write ops.
+ * Originally Interrupts had to be disabled around code to gaurantee atomicity.
+ * The LLOCK/SCOND insns allow writing interrupt-hassle-free based atomic ops
+ * based on retry-if-irq-in-atomic (with hardware assist).
+ * However despite these, we provide the IRQ disabling variant
+ *
+ * (1) These insn were introduced only in 4.10 release. So for older released
+ *	support needed.
+ *
+ * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be
+ *	gaurantted by the platform (not something which core handles).
+ *	Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ
+ *	disabling for atomicity.
+ *
+ *	However exported spinlock API is not usable due to cyclic hdr deps
+ *	(even after system.h disintegration upstream)
+ *	asm/bitops.h -> linux/spinlock.h -> linux/preempt.h
+ *		-> linux/thread_info.h -> linux/bitops.h -> asm/bitops.h
+ *
+ *	So the workaround is to use the lowest level arch spinlock API.
+ *	The exported spinlock API is smart enough to be NOP for !CONFIG_SMP,
+ *	but same is not true for ARCH backend, hence the need for 2 variants
+ */
+#ifndef CONFIG_ARC_HAS_LLSC
+
+#include <linux/irqflags.h>
+#ifdef CONFIG_SMP
+
+#include <asm/spinlock.h>
+
+extern arch_spinlock_t smp_atomic_ops_lock;
+
+#define atomic_ops_lock(flags)	do {		\
+	local_irq_save(flags);			\
+	arch_spin_lock(&smp_atomic_ops_lock);	\
+} while (0)
+
+#define atomic_ops_unlock(flags) do {		\
+	arch_spin_unlock(&smp_atomic_ops_lock);	\
+	local_irq_restore(flags);		\
+} while (0)
+
+#else /* !CONFIG_SMP */
+
+#define atomic_ops_lock(flags)		local_irq_save(flags)
+#define atomic_ops_unlock(flags)	local_irq_restore(flags)
+
+#endif /* !CONFIG_SMP */
+
+#endif	/* !CONFIG_ARC_HAS_LLSC */
+
+#endif
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
new file mode 100644
index 0000000000..1928716089
--- /dev/null
+++ b/arch/arc/include/asm/spinlock.h
@@ -0,0 +1,382 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/spinlock_types.h>
+#include <asm/processor.h>
+#include <asm/barrier.h>
+
+#define arch_spin_is_locked(x)	((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
+
+#ifdef CONFIG_ARC_HAS_LLSC
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int val;
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[slock]]	\n"
+	"	breq	%[val], %[LOCKED], 1b	\n"	/* spin while LOCKED */
+	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [slock]	"r"	(&(lock->slock)),
+	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory", "cc");
+
+	/*
+	 * ACQUIRE barrier to ensure load/store after taking the lock
+	 * don't "bleed-up" out of the critical section (leak-in is allowed)
+	 * http://www.spinics.net/lists/kernel/msg2010409.html
+	 *
+	 * ARCv2 only has load-load, store-store and all-all barrier
+	 * thus need the full all-all barrier
+	 */
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int val, got_it = 0;
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[slock]]	\n"
+	"	breq	%[val], %[LOCKED], 4f	\n"	/* already LOCKED, just bail */
+	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
+	"	bnz	1b			\n"
+	"	mov	%[got_it], 1		\n"
+	"4:					\n"
+	"					\n"
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	: [slock]	"r"	(&(lock->slock)),
+	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	smp_mb();
+
+	WRITE_ONCE(lock->slock, __ARCH_SPIN_LOCK_UNLOCKED__);
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
+ */
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	/*
+	 * zero means writer holds the lock exclusively, deny Reader.
+	 * Otherwise grant lock to first/subseq reader
+	 *
+	 * 	if (rw->counter > 0) {
+	 *		rw->counter--;
+	 *		ret = 1;
+	 *	}
+	 */
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brls	%[val], %[WR_LOCKED], 1b\n"	/* <= 0: spin while write locked */
+	"	sub	%[val], %[val], 1	\n"	/* reader lock */
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val, got_it = 0;
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brls	%[val], %[WR_LOCKED], 4f\n"	/* <= 0: already write locked, bail */
+	"	sub	%[val], %[val], 1	\n"	/* counter-- */
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"	/* retry if collided with someone */
+	"	mov	%[got_it], 1		\n"
+	"					\n"
+	"4: ; --- done ---			\n"
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	/*
+	 * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
+	 * deny writer. Otherwise if unlocked grant to writer
+	 * Hence the claim that Linux rwlocks are unfair to writers.
+	 * (can be starved for an indefinite time by readers).
+	 *
+	 *	if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
+	 *		rw->counter = 0;
+	 *		ret = 1;
+	 *	}
+	 */
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brne	%[val], %[UNLOCKED], 1b	\n"	/* while !UNLOCKED spin */
+	"	mov	%[val], %[WR_LOCKED]	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val, got_it = 0;
+
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brne	%[val], %[UNLOCKED], 4f	\n"	/* !UNLOCKED, bail */
+	"	mov	%[val], %[WR_LOCKED]	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"	/* retry if collided with someone */
+	"	mov	%[got_it], 1		\n"
+	"					\n"
+	"4: ; --- done ---			\n"
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	smp_mb();
+
+	/*
+	 * rw->counter++;
+	 */
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	add	%[val], %[val], 1	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter))
+	: "memory", "cc");
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	smp_mb();
+
+	WRITE_ONCE(rw->counter, __ARCH_RW_LOCK_UNLOCKED__);
+}
+
+#else	/* !CONFIG_ARC_HAS_LLSC */
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
+
+	/*
+	 * Per lkmm, smp_mb() is only required after _lock (and before_unlock)
+	 * for ACQ and REL semantics respectively. However EX based spinlocks
+	 * need the extra smp_mb to workaround a hardware quirk.
+	 */
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	ex  %0, [%1]		\n"
+	"	breq  %0, %2, 1b	\n"
+	: "+&r" (val)
+	: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	ex  %0, [%1]		\n"
+	: "+r" (val)
+	: "r"(&(lock->slock))
+	: "memory");
+
+	smp_mb();
+
+	return (val == __ARCH_SPIN_LOCK_UNLOCKED__);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+	/*
+	 * RELEASE barrier: given the instructions avail on ARCv2, full barrier
+	 * is the only option
+	 */
+	smp_mb();
+
+	/*
+	 * EX is not really required here, a simple STore of 0 suffices.
+	 * However this causes tasklist livelocks in SystemC based SMP virtual
+	 * platforms where the systemc core scheduler uses EX as a cue for
+	 * moving to next core. Do a git log of this file for details
+	 */
+	__asm__ __volatile__(
+	"	ex  %0, [%1]		\n"
+	: "+r" (val)
+	: "r"(&(lock->slock))
+	: "memory");
+
+	/*
+	 * see pairing version/comment in arch_spin_lock above
+	 */
+	smp_mb();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
+ *
+ * The spinlock itself is contained in @counter and access to it is
+ * serialized with @lock_mutex.
+ */
+
+/* 1 - lock taken successfully */
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	arch_spin_lock(&(rw->lock_mutex));
+
+	/*
+	 * zero means writer holds the lock exclusively, deny Reader.
+	 * Otherwise grant lock to first/subseq reader
+	 */
+	if (rw->counter > 0) {
+		rw->counter--;
+		ret = 1;
+	}
+
+	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	arch_spin_lock(&(rw->lock_mutex));
+
+	/*
+	 * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
+	 * deny writer. Otherwise if unlocked grant to writer
+	 * Hence the claim that Linux rwlocks are unfair to writers.
+	 * (can be starved for an indefinite time by readers).
+	 */
+	if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
+		rw->counter = 0;
+		ret = 1;
+	}
+	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	while (!arch_read_trylock(rw))
+		cpu_relax();
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	while (!arch_write_trylock(rw))
+		cpu_relax();
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	arch_spin_lock(&(rw->lock_mutex));
+	rw->counter++;
+	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	arch_spin_lock(&(rw->lock_mutex));
+	rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
+	arch_spin_unlock(&(rw->lock_mutex));
+	local_irq_restore(flags);
+}
+
+#endif
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arc/include/asm/spinlock_types.h b/arch/arc/include/asm/spinlock_types.h
new file mode 100644
index 0000000000..7cd0373998
--- /dev/null
+++ b/arch/arc/include/asm/spinlock_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_SPINLOCK_TYPES_H
+#define __ASM_SPINLOCK_TYPES_H
+
+typedef struct {
+	volatile unsigned int slock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED__	0
+#define __ARCH_SPIN_LOCK_LOCKED__	1
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ __ARCH_SPIN_LOCK_UNLOCKED__ }
+#define __ARCH_SPIN_LOCK_LOCKED		{ __ARCH_SPIN_LOCK_LOCKED__ }
+
+/*
+ * Unlocked     : 0x0100_0000
+ * Read lock(s) : 0x00FF_FFFF to 0x01  (Multiple Readers decrement it)
+ * Write lock   : 0x0, but only if prior value is "unlocked" 0x0100_0000
+ */
+typedef struct {
+	volatile unsigned int	counter;
+#ifndef CONFIG_ARC_HAS_LLSC
+	arch_spinlock_t		lock_mutex;
+#endif
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED__	0x01000000
+#define __ARCH_RW_LOCK_UNLOCKED		{ .counter = __ARCH_RW_LOCK_UNLOCKED__ }
+
+#endif
diff --git a/arch/arc/include/asm/stacktrace.h b/arch/arc/include/asm/stacktrace.h
new file mode 100644
index 0000000000..4c50fb003d
--- /dev/null
+++ b/arch/arc/include/asm/stacktrace.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+#include <linux/sched.h>
+
+/**
+ * arc_unwind_core - Unwind the kernel mode stack for an execution context
+ * @tsk:		NULL for current task, specific task otherwise
+ * @regs:		pt_regs used to seed the unwinder {SP, FP, BLINK, PC}
+ * 			If NULL, use pt_regs of @tsk (if !NULL) otherwise
+ * 			use the current values of {SP, FP, BLINK, PC}
+ * @consumer_fn:	Callback invoked for each frame unwound
+ * 			Returns 0 to continue unwinding, -1 to stop
+ * @arg:		Arg to callback
+ *
+ * Returns the address of first function in stack
+ *
+ * Semantics:
+ *  - synchronous unwinding (e.g. dump_stack): @tsk  NULL, @regs  NULL
+ *  - Asynchronous unwinding of sleeping task: @tsk !NULL, @regs  NULL
+ *  - Asynchronous unwinding of intr/excp etc: @tsk !NULL, @regs !NULL
+ */
+notrace noinline unsigned int arc_unwind_core(
+	struct task_struct *tsk, struct pt_regs *regs,
+	int (*consumer_fn) (unsigned int, void *),
+	void *arg);
+
+#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arc/include/asm/string.h b/arch/arc/include/asm/string.h
new file mode 100644
index 0000000000..3182ea9dcd
--- /dev/null
+++ b/arch/arc/include/asm/string.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * vineetg: May 2011
+ *  -We had half-optimised memset/memcpy, got better versions of those
+ *  -Added memcmp, strchr, strcpy, strcmp, strlen
+ *
+ * Amit Bhor: Codito Technologies 2004
+ */
+
+#ifndef _ASM_ARC_STRING_H
+#define _ASM_ARC_STRING_H
+
+#include <linux/types.h>
+
+#define __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_STRCHR
+#define __HAVE_ARCH_STRCPY
+#define __HAVE_ARCH_STRCMP
+#define __HAVE_ARCH_STRLEN
+
+extern void *memset(void *ptr, int, __kernel_size_t);
+extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void memzero(void *ptr, __kernel_size_t n);
+extern int memcmp(const void *, const void *, __kernel_size_t);
+extern char *strchr(const char *s, int c);
+extern char *strcpy(char *dest, const char *src);
+extern int strcmp(const char *cs, const char *ct);
+extern __kernel_size_t strlen(const char *);
+
+#endif /* _ASM_ARC_STRING_H */
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
new file mode 100644
index 0000000000..1f85de8288
--- /dev/null
+++ b/arch/arc/include/asm/switch_to.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_SWITCH_TO_H
+#define _ASM_ARC_SWITCH_TO_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/sched.h>
+#include <asm/dsp-impl.h>
+#include <asm/fpu.h>
+
+struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
+
+#define switch_to(prev, next, last)	\
+do {					\
+	dsp_save_restore(prev, next);	\
+	fpu_save_restore(prev, next);	\
+	last = __switch_to(prev, next);\
+	mb();				\
+} while (0)
+
+#endif
+
+#endif
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
new file mode 100644
index 0000000000..94529e89df
--- /dev/null
+++ b/arch/arc/include/asm/syscall.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_SYSCALL_H
+#define _ASM_ARC_SYSCALL_H  1
+
+#include <uapi/linux/audit.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <asm/unistd.h>
+#include <asm/ptrace.h>		/* in_syscall() */
+
+static inline long
+syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
+{
+	if (user_mode(regs) && in_syscall(regs))
+		return regs->r8;
+	else
+		return -1;
+}
+
+static inline void
+syscall_rollback(struct task_struct *task, struct pt_regs *regs)
+{
+	regs->r0 = regs->orig_r0;
+}
+
+static inline long
+syscall_get_error(struct task_struct *task, struct pt_regs *regs)
+{
+	/* 0 if syscall succeeded, otherwise -Errorcode */
+	return IS_ERR_VALUE(regs->r0) ? regs->r0 : 0;
+}
+
+static inline long
+syscall_get_return_value(struct task_struct *task, struct pt_regs *regs)
+{
+	return regs->r0;
+}
+
+static inline void
+syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
+			 int error, long val)
+{
+	regs->r0 = (long) error ?: val;
+}
+
+/*
+ * @i:      argument index [0,5]
+ * @n:      number of arguments; n+i must be [1,6].
+ */
+static inline void
+syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
+		      unsigned long *args)
+{
+	unsigned long *inside_ptregs = &(regs->r0);
+	unsigned int n = 6;
+	unsigned int i = 0;
+
+	while (n--) {
+		args[i++] = (*inside_ptregs);
+		inside_ptregs--;
+	}
+}
+
+static inline int
+syscall_get_arch(struct task_struct *task)
+{
+	return IS_ENABLED(CONFIG_ISA_ARCOMPACT)
+		? (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+			? AUDIT_ARCH_ARCOMPACTBE : AUDIT_ARCH_ARCOMPACT)
+		: (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+			? AUDIT_ARCH_ARCV2BE : AUDIT_ARCH_ARCV2);
+}
+
+#endif
diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h
new file mode 100644
index 0000000000..c3f4714a4f
--- /dev/null
+++ b/arch/arc/include/asm/syscalls.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_SYSCALLS_H
+#define _ASM_ARC_SYSCALLS_H  1
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+int sys_clone_wrapper(int, int, int, int, int);
+int sys_clone3_wrapper(void *, size_t);
+int sys_cacheflush(uint32_t, uint32_t uint32_t);
+int sys_arc_settls(void *);
+int sys_arc_gettls(void);
+int sys_arc_usr_cmpxchg(int *, int, int);
+
+#include <asm-generic/syscalls.h>
+
+#endif
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
new file mode 100644
index 0000000000..c0942c24d4
--- /dev/null
+++ b/arch/arc/include/asm/thread_info.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Vineetg: Oct 2009
+ *  No need for ARC specific thread_info allocator (kmalloc/free). This is
+ *  anyways one page allocation, thus slab alloc can be short-circuited and
+ *  the generic version (get_free_page) would be loads better.
+ *
+ * Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#include <asm/page.h>
+
+#ifdef CONFIG_16KSTACKS
+#define THREAD_SIZE_ORDER 1
+#else
+#define THREAD_SIZE_ORDER 0
+#endif
+
+#define THREAD_SIZE     (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT	(PAGE_SHIFT << THREAD_SIZE_ORDER)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/thread_info.h>
+#include <asm/segment.h>
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants
+ *   must also be changed
+ */
+struct thread_info {
+	unsigned long flags;		/* low level flags */
+	int preempt_count;		/* 0 => preemptable, <0 => BUG */
+	struct task_struct *task;	/* main task structure */
+	mm_segment_t addr_limit;	/* thread address space */
+	__u32 cpu;			/* current CPU */
+	unsigned long thr_ptr;		/* TLS ptr */
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ *
+ * preempt_count needs to be 1 initially, until the scheduler is functional.
+ */
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task       = &tsk,			\
+	.flags      = 0,			\
+	.cpu        = 0,			\
+	.preempt_count  = INIT_PREEMPT_COUNT,	\
+	.addr_limit = KERNEL_DS,		\
+}
+
+static inline __attribute_const__ struct thread_info *current_thread_info(void)
+{
+	register unsigned long sp asm("sp");
+	return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to
+ *   access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_RESTORE_SIGMASK	0	/* restore sig mask in do_signal() */
+#define TIF_NOTIFY_RESUME	1	/* resumption notification requested */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
+#define TIF_NOTIFY_SIGNAL	5	/* signal notifications exist */
+#define TIF_SYSCALL_TRACE	15	/* syscall trace active */
+
+/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_MEMDIE		16
+
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_NOTIFY_SIGNAL	(1<<TIF_NOTIFY_SIGNAL)
+#define _TIF_MEMDIE		(1<<TIF_MEMDIE)
+
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+				 _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL)
+
+/*
+ * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it.
+ * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a
+ * syscall, so all that reamins to be tested is _TIF_WORK_MASK
+ */
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/arc/include/asm/timex.h b/arch/arc/include/asm/timex.h
new file mode 100644
index 0000000000..48b3482bc9
--- /dev/null
+++ b/arch/arc/include/asm/timex.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_TIMEX_H
+#define _ASM_ARC_TIMEX_H
+
+#define CLOCK_TICK_RATE	80000000 /* slated to be removed */
+
+#include <asm-generic/timex.h>
+
+/* XXX: get_cycles() to be implemented with RTSC insn */
+
+#endif /* _ASM_ARC_TIMEX_H */
diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h
new file mode 100644
index 0000000000..a3083b36f5
--- /dev/null
+++ b/arch/arc/include/asm/tlb-mmu1.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_TLB_MMU_V1_H__
+#define __ASM_TLB_MMU_V1_H__
+
+#include <asm/mmu.h>
+
+#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
+
+.macro TLB_WRITE_HEURISTICS
+
+#define JH_HACK1
+#undef JH_HACK2
+#undef JH_HACK3
+
+#ifdef JH_HACK3
+; Calculate set index for 2-way MMU
+; -avoiding use of GetIndex from MMU
+;   and its unpleasant LFSR pseudo-random sequence
+;
+; r1 = TLBPD0 from TLB_RELOAD above
+;
+; -- jh_ex_way_set not cleared on startup
+;    didn't want to change setup.c
+;    hence extra instruction to clean
+;
+; -- should be in cache since in same line
+;    as r0/r1 saves above
+;
+ld  r0,[jh_ex_way_sel]  ; victim pointer
+and r0,r0,1         ; clean
+xor.f   r0,r0,1         ; flip
+st  r0,[jh_ex_way_sel]  ; store back
+asr r0,r1,12        ; get set # <<1, note bit 12=R=0
+or.nz   r0,r0,1         ; set way bit
+and r0,r0,0xff      ; clean
+sr  r0,[ARC_REG_TLBINDEX]
+#endif
+
+#ifdef JH_HACK2
+; JH hack #2
+;  Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
+;  Slower in thrash case (where it matters) because more code is executed
+;  Inefficient due to two-register paradigm of this miss handler
+;
+/* r1 = data TLBPD0 at this point */
+lr      r0,[eret]               /* instruction address */
+xor     r0,r0,r1                /* compare set #       */
+and.f   r0,r0,0x000fe000        /* 2-way MMU mask      */
+bne     88f                     /* not in same set - no need to probe */
+
+lr      r0,[eret]               /* instruction address */
+and     r0,r0,PAGE_MASK         /* VPN of instruction address */
+; lr  r1,[ARC_REG_TLBPD0]     /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
+and     r1,r1,0xff              /* Data ASID */
+or      r0,r0,r1                /* Instruction address + Data ASID */
+
+lr      r1,[ARC_REG_TLBPD0]     /* save TLBPD0 containing data TLB*/
+sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
+sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
+lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
+sr      r1,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
+
+xor     r0,r0,1                 /* flip bottom bit of data index */
+b.d     89f
+sr      r0,[ARC_REG_TLBINDEX]   /* and put it back */
+88:
+sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
+89:
+#endif
+
+#ifdef JH_HACK1
+;
+; Always checks whether instruction will be kicked out by dtlb miss
+;
+mov_s   r3, r1                  ; save PD0 prepared by TLB_RELOAD in r3
+lr      r0,[eret]               /* instruction address */
+and     r0,r0,PAGE_MASK         /* VPN of instruction address */
+bmsk    r1,r3,7                 /* Data ASID, bits 7-0 */
+or_s    r0,r0,r1                /* Instruction address + Data ASID */
+
+sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
+sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
+lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
+sr      r3,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
+
+sr      TLBGetIndex, [ARC_REG_TLBCOMMAND]
+lr      r1,[ARC_REG_TLBINDEX]   /* r1 = index where MMU wants to put data */
+cmp     r0,r1                   /* if no match on indices, go around */
+xor.eq  r1,r1,1                 /* flip bottom bit of data index */
+sr      r1,[ARC_REG_TLBINDEX]   /* and put it back */
+#endif
+
+.endm
+
+#endif
+
+#endif
diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h
new file mode 100644
index 0000000000..975b35d373
--- /dev/null
+++ b/arch/arc/include/asm/tlb.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_TLB_H
+#define _ASM_ARC_TLB_H
+
+#include <linux/pagemap.h>
+#include <asm-generic/tlb.h>
+
+#endif /* _ASM_ARC_TLB_H */
diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h
new file mode 100644
index 0000000000..992a2837a5
--- /dev/null
+++ b/arch/arc/include/asm/tlbflush.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef __ASM_ARC_TLBFLUSH__
+#define __ASM_ARC_TLBFLUSH__
+
+#include <linux/mm.h>
+
+void local_flush_tlb_all(void);
+void local_flush_tlb_mm(struct mm_struct *mm);
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
+void local_flush_tlb_range(struct vm_area_struct *vma,
+			   unsigned long start, unsigned long end);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			       unsigned long end);
+#endif
+
+#ifndef CONFIG_SMP
+#define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
+#define flush_tlb_page(vma, page)	local_flush_tlb_page(vma, page)
+#define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
+#define flush_tlb_all()			local_flush_tlb_all()
+#define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define flush_pmd_tlb_range(vma, s, e)	local_flush_pmd_tlb_range(vma, s, e)
+#endif
+#else
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+							 unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+#endif
+#endif /* CONFIG_SMP */
+#endif
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
new file mode 100644
index 0000000000..783bfdb3bf
--- /dev/null
+++ b/arch/arc/include/asm/uaccess.h
@@ -0,0 +1,673 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * vineetg: June 2010
+ *    -__clear_user( ) called multiple times during elf load was byte loop
+ *    converted to do as much word clear as possible.
+ *
+ * vineetg: Dec 2009
+ *    -Hand crafted constant propagation for "constant" copy sizes
+ *    -stock kernel shrunk by 33K at -O3
+ *
+ * vineetg: Sept 2009
+ *    -Added option to (UN)inline copy_(to|from)_user to reduce code sz
+ *    -kernel shrunk by 200K even at -O3 (gcc 4.2.1)
+ *    -Enabled when doing -Os
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef _ASM_ARC_UACCESS_H
+#define _ASM_ARC_UACCESS_H
+
+#include <linux/string.h>	/* for generic string functions */
+
+
+#define __kernel_ok		(uaccess_kernel())
+
+/*
+ * Algorithmically, for __user_ok() we want do:
+ * 	(start < TASK_SIZE) && (start+len < TASK_SIZE)
+ * where TASK_SIZE could either be retrieved from thread_info->addr_limit or
+ * emitted directly in code.
+ *
+ * This can however be rewritten as follows:
+ *	(len <= TASK_SIZE) && (start+len < TASK_SIZE)
+ *
+ * Because it essentially checks if buffer end is within limit and @len is
+ * non-ngeative, which implies that buffer start will be within limit too.
+ *
+ * The reason for rewriting being, for majority of cases, @len is generally
+ * compile time constant, causing first sub-expression to be compile time
+ * subsumed.
+ *
+ * The second part would generate weird large LIMMs e.g. (0x6000_0000 - 0x10),
+ * so we check for TASK_SIZE using get_fs() since the addr_limit load from mem
+ * would already have been done at this call site for __kernel_ok()
+ *
+ */
+#define __user_ok(addr, sz)	(((sz) <= TASK_SIZE) && \
+				 ((addr) <= (get_fs() - (sz))))
+#define __access_ok(addr, sz)	(unlikely(__kernel_ok) || \
+				 likely(__user_ok((addr), (sz))))
+
+/*********** Single byte/hword/word copies ******************/
+
+#define __get_user_fn(sz, u, k)					\
+({								\
+	long __ret = 0;	/* success by default */	\
+	switch (sz) {						\
+	case 1: __arc_get_user_one(*(k), u, "ldb", __ret); break;	\
+	case 2: __arc_get_user_one(*(k), u, "ldw", __ret); break;	\
+	case 4: __arc_get_user_one(*(k), u, "ld", __ret);  break;	\
+	case 8: __arc_get_user_one_64(*(k), u, __ret);     break;	\
+	}							\
+	__ret;							\
+})
+
+/*
+ * Returns 0 on success, -EFAULT if not.
+ * @ret already contains 0 - given that errors will be less likely
+ * (hence +r asm constraint below).
+ * In case of error, fixup code will make it -EFAULT
+ */
+#define __arc_get_user_one(dst, src, op, ret)	\
+	__asm__ __volatile__(                   \
+	"1:	"op"    %1,[%2]\n"		\
+	"2:	;nop\n"				\
+	"	.section .fixup, \"ax\"\n"	\
+	"	.align 4\n"			\
+	"3:	# return -EFAULT\n"		\
+	"	mov %0, %3\n"			\
+	"	# zero out dst ptr\n"		\
+	"	mov %1,  0\n"			\
+	"	j   2b\n"			\
+	"	.previous\n"			\
+	"	.section __ex_table, \"a\"\n"	\
+	"	.align 4\n"			\
+	"	.word 1b,3b\n"			\
+	"	.previous\n"			\
+						\
+	: "+r" (ret), "=r" (dst)		\
+	: "r" (src), "ir" (-EFAULT))
+
+#define __arc_get_user_one_64(dst, src, ret)	\
+	__asm__ __volatile__(                   \
+	"1:	ld   %1,[%2]\n"			\
+	"4:	ld  %R1,[%2, 4]\n"		\
+	"2:	;nop\n"				\
+	"	.section .fixup, \"ax\"\n"	\
+	"	.align 4\n"			\
+	"3:	# return -EFAULT\n"		\
+	"	mov %0, %3\n"			\
+	"	# zero out dst ptr\n"		\
+	"	mov %1,  0\n"			\
+	"	mov %R1, 0\n"			\
+	"	j   2b\n"			\
+	"	.previous\n"			\
+	"	.section __ex_table, \"a\"\n"	\
+	"	.align 4\n"			\
+	"	.word 1b,3b\n"			\
+	"	.word 4b,3b\n"			\
+	"	.previous\n"			\
+						\
+	: "+r" (ret), "=r" (dst)		\
+	: "r" (src), "ir" (-EFAULT))
+
+#define __put_user_fn(sz, u, k)					\
+({								\
+	long __ret = 0;	/* success by default */	\
+	switch (sz) {						\
+	case 1: __arc_put_user_one(*(k), u, "stb", __ret); break;	\
+	case 2: __arc_put_user_one(*(k), u, "stw", __ret); break;	\
+	case 4: __arc_put_user_one(*(k), u, "st", __ret);  break;	\
+	case 8: __arc_put_user_one_64(*(k), u, __ret);     break;	\
+	}							\
+	__ret;							\
+})
+
+#define __arc_put_user_one(src, dst, op, ret)	\
+	__asm__ __volatile__(                   \
+	"1:	"op"    %1,[%2]\n"		\
+	"2:	;nop\n"				\
+	"	.section .fixup, \"ax\"\n"	\
+	"	.align 4\n"			\
+	"3:	mov %0, %3\n"			\
+	"	j   2b\n"			\
+	"	.previous\n"			\
+	"	.section __ex_table, \"a\"\n"	\
+	"	.align 4\n"			\
+	"	.word 1b,3b\n"			\
+	"	.previous\n"			\
+						\
+	: "+r" (ret)				\
+	: "r" (src), "r" (dst), "ir" (-EFAULT))
+
+#define __arc_put_user_one_64(src, dst, ret)	\
+	__asm__ __volatile__(                   \
+	"1:	st   %1,[%2]\n"			\
+	"4:	st  %R1,[%2, 4]\n"		\
+	"2:	;nop\n"				\
+	"	.section .fixup, \"ax\"\n"	\
+	"	.align 4\n"			\
+	"3:	mov %0, %3\n"			\
+	"	j   2b\n"			\
+	"	.previous\n"			\
+	"	.section __ex_table, \"a\"\n"	\
+	"	.align 4\n"			\
+	"	.word 1b,3b\n"			\
+	"	.word 4b,3b\n"			\
+	"	.previous\n"			\
+						\
+	: "+r" (ret)				\
+	: "r" (src), "r" (dst), "ir" (-EFAULT))
+
+
+static inline unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	long res = 0;
+	char val;
+	unsigned long tmp1, tmp2, tmp3, tmp4;
+	unsigned long orig_n = n;
+
+	if (n == 0)
+		return 0;
+
+	/* unaligned */
+	if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+
+		unsigned char tmp;
+
+		__asm__ __volatile__ (
+		"	mov.f   lp_count, %0		\n"
+		"	lpnz 2f				\n"
+		"1:	ldb.ab  %1, [%3, 1]		\n"
+		"	stb.ab  %1, [%2, 1]		\n"
+		"	sub     %0,%0,1			\n"
+		"2:	;nop				\n"
+		"	.section .fixup, \"ax\"		\n"
+		"	.align 4			\n"
+		"3:	j   2b				\n"
+		"	.previous			\n"
+		"	.section __ex_table, \"a\"	\n"
+		"	.align 4			\n"
+		"	.word   1b, 3b			\n"
+		"	.previous			\n"
+
+		: "+r" (n),
+		/*
+		 * Note as an '&' earlyclobber operand to make sure the
+		 * temporary register inside the loop is not the same as
+		 *  FROM or TO.
+		*/
+		  "=&r" (tmp), "+r" (to), "+r" (from)
+		:
+		: "lp_count", "memory");
+
+		return n;
+	}
+
+	/*
+	 * Hand-crafted constant propagation to reduce code sz of the
+	 * laddered copy 16x,8,4,2,1
+	 */
+	if (__builtin_constant_p(orig_n)) {
+		res = orig_n;
+
+		if (orig_n / 16) {
+			orig_n = orig_n % 16;
+
+			__asm__ __volatile__(
+			"	lsr   lp_count, %7,4		\n"
+			"	lp    3f			\n"
+			"1:	ld.ab   %3, [%2, 4]		\n"
+			"11:	ld.ab   %4, [%2, 4]		\n"
+			"12:	ld.ab   %5, [%2, 4]		\n"
+			"13:	ld.ab   %6, [%2, 4]		\n"
+			"	st.ab   %3, [%1, 4]		\n"
+			"	st.ab   %4, [%1, 4]		\n"
+			"	st.ab   %5, [%1, 4]		\n"
+			"	st.ab   %6, [%1, 4]		\n"
+			"	sub     %0,%0,16		\n"
+			"3:	;nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   3b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   1b, 4b			\n"
+			"	.word   11b,4b			\n"
+			"	.word   12b,4b			\n"
+			"	.word   13b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from),
+			  "=r"(tmp1), "=r"(tmp2), "=r"(tmp3), "=r"(tmp4)
+			: "ir"(n)
+			: "lp_count", "memory");
+		}
+		if (orig_n / 8) {
+			orig_n = orig_n % 8;
+
+			__asm__ __volatile__(
+			"14:	ld.ab   %3, [%2,4]		\n"
+			"15:	ld.ab   %4, [%2,4]		\n"
+			"	st.ab   %3, [%1,4]		\n"
+			"	st.ab   %4, [%1,4]		\n"
+			"	sub     %0,%0,8			\n"
+			"31:	;nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   31b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   14b,4b			\n"
+			"	.word   15b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from),
+			  "=r"(tmp1), "=r"(tmp2)
+			:
+			: "memory");
+		}
+		if (orig_n / 4) {
+			orig_n = orig_n % 4;
+
+			__asm__ __volatile__(
+			"16:	ld.ab   %3, [%2,4]		\n"
+			"	st.ab   %3, [%1,4]		\n"
+			"	sub     %0,%0,4			\n"
+			"32:	;nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   32b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   16b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from), "=r"(tmp1)
+			:
+			: "memory");
+		}
+		if (orig_n / 2) {
+			orig_n = orig_n % 2;
+
+			__asm__ __volatile__(
+			"17:	ldw.ab   %3, [%2,2]		\n"
+			"	stw.ab   %3, [%1,2]		\n"
+			"	sub      %0,%0,2		\n"
+			"33:	;nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   33b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   17b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from), "=r"(tmp1)
+			:
+			: "memory");
+		}
+		if (orig_n & 1) {
+			__asm__ __volatile__(
+			"18:	ldb.ab   %3, [%2,2]		\n"
+			"	stb.ab   %3, [%1,2]		\n"
+			"	sub      %0,%0,1		\n"
+			"34:	; nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   34b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   18b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from), "=r"(tmp1)
+			:
+			: "memory");
+		}
+	} else {  /* n is NOT constant, so laddered copy of 16x,8,4,2,1  */
+
+		__asm__ __volatile__(
+		"	mov %0,%3			\n"
+		"	lsr.f   lp_count, %3,4		\n"  /* 16x bytes */
+		"	lpnz    3f			\n"
+		"1:	ld.ab   %5, [%2, 4]		\n"
+		"11:	ld.ab   %6, [%2, 4]		\n"
+		"12:	ld.ab   %7, [%2, 4]		\n"
+		"13:	ld.ab   %8, [%2, 4]		\n"
+		"	st.ab   %5, [%1, 4]		\n"
+		"	st.ab   %6, [%1, 4]		\n"
+		"	st.ab   %7, [%1, 4]		\n"
+		"	st.ab   %8, [%1, 4]		\n"
+		"	sub     %0,%0,16		\n"
+		"3:	and.f   %3,%3,0xf		\n"  /* stragglers */
+		"	bz      34f			\n"
+		"	bbit0   %3,3,31f		\n"  /* 8 bytes left */
+		"14:	ld.ab   %5, [%2,4]		\n"
+		"15:	ld.ab   %6, [%2,4]		\n"
+		"	st.ab   %5, [%1,4]		\n"
+		"	st.ab   %6, [%1,4]		\n"
+		"	sub.f   %0,%0,8			\n"
+		"31:	bbit0   %3,2,32f		\n"  /* 4 bytes left */
+		"16:	ld.ab   %5, [%2,4]		\n"
+		"	st.ab   %5, [%1,4]		\n"
+		"	sub.f   %0,%0,4			\n"
+		"32:	bbit0   %3,1,33f		\n"  /* 2 bytes left */
+		"17:	ldw.ab  %5, [%2,2]		\n"
+		"	stw.ab  %5, [%1,2]		\n"
+		"	sub.f   %0,%0,2			\n"
+		"33:	bbit0   %3,0,34f		\n"
+		"18:	ldb.ab  %5, [%2,1]		\n"  /* 1 byte left */
+		"	stb.ab  %5, [%1,1]		\n"
+		"	sub.f   %0,%0,1			\n"
+		"34:	;nop				\n"
+		"	.section .fixup, \"ax\"		\n"
+		"	.align 4			\n"
+		"4:	j   34b				\n"
+		"	.previous			\n"
+		"	.section __ex_table, \"a\"	\n"
+		"	.align 4			\n"
+		"	.word   1b, 4b			\n"
+		"	.word   11b,4b			\n"
+		"	.word   12b,4b			\n"
+		"	.word   13b,4b			\n"
+		"	.word   14b,4b			\n"
+		"	.word   15b,4b			\n"
+		"	.word   16b,4b			\n"
+		"	.word   17b,4b			\n"
+		"	.word   18b,4b			\n"
+		"	.previous			\n"
+		: "=r" (res), "+r"(to), "+r"(from), "+r"(n), "=r"(val),
+		  "=r"(tmp1), "=r"(tmp2), "=r"(tmp3), "=r"(tmp4)
+		:
+		: "lp_count", "memory");
+	}
+
+	return res;
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	long res = 0;
+	char val;
+	unsigned long tmp1, tmp2, tmp3, tmp4;
+	unsigned long orig_n = n;
+
+	if (n == 0)
+		return 0;
+
+	/* unaligned */
+	if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+
+		unsigned char tmp;
+
+		__asm__ __volatile__(
+		"	mov.f   lp_count, %0		\n"
+		"	lpnz 3f				\n"
+		"	ldb.ab  %1, [%3, 1]		\n"
+		"1:	stb.ab  %1, [%2, 1]		\n"
+		"	sub     %0, %0, 1		\n"
+		"3:	;nop				\n"
+		"	.section .fixup, \"ax\"		\n"
+		"	.align 4			\n"
+		"4:	j   3b				\n"
+		"	.previous			\n"
+		"	.section __ex_table, \"a\"	\n"
+		"	.align 4			\n"
+		"	.word   1b, 4b			\n"
+		"	.previous			\n"
+
+		: "+r" (n),
+		/* Note as an '&' earlyclobber operand to make sure the
+		 * temporary register inside the loop is not the same as
+		 * FROM or TO.
+		 */
+		  "=&r" (tmp), "+r" (to), "+r" (from)
+		:
+		: "lp_count", "memory");
+
+		return n;
+	}
+
+	if (__builtin_constant_p(orig_n)) {
+		res = orig_n;
+
+		if (orig_n / 16) {
+			orig_n = orig_n % 16;
+
+			__asm__ __volatile__(
+			"	lsr lp_count, %7,4		\n"
+			"	lp  3f				\n"
+			"	ld.ab %3, [%2, 4]		\n"
+			"	ld.ab %4, [%2, 4]		\n"
+			"	ld.ab %5, [%2, 4]		\n"
+			"	ld.ab %6, [%2, 4]		\n"
+			"1:	st.ab %3, [%1, 4]		\n"
+			"11:	st.ab %4, [%1, 4]		\n"
+			"12:	st.ab %5, [%1, 4]		\n"
+			"13:	st.ab %6, [%1, 4]		\n"
+			"	sub   %0, %0, 16		\n"
+			"3:;nop					\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   3b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   1b, 4b			\n"
+			"	.word   11b,4b			\n"
+			"	.word   12b,4b			\n"
+			"	.word   13b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from),
+			  "=r"(tmp1), "=r"(tmp2), "=r"(tmp3), "=r"(tmp4)
+			: "ir"(n)
+			: "lp_count", "memory");
+		}
+		if (orig_n / 8) {
+			orig_n = orig_n % 8;
+
+			__asm__ __volatile__(
+			"	ld.ab   %3, [%2,4]		\n"
+			"	ld.ab   %4, [%2,4]		\n"
+			"14:	st.ab   %3, [%1,4]		\n"
+			"15:	st.ab   %4, [%1,4]		\n"
+			"	sub     %0, %0, 8		\n"
+			"31:;nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   31b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   14b,4b			\n"
+			"	.word   15b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from),
+			  "=r"(tmp1), "=r"(tmp2)
+			:
+			: "memory");
+		}
+		if (orig_n / 4) {
+			orig_n = orig_n % 4;
+
+			__asm__ __volatile__(
+			"	ld.ab   %3, [%2,4]		\n"
+			"16:	st.ab   %3, [%1,4]		\n"
+			"	sub     %0, %0, 4		\n"
+			"32:;nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   32b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   16b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from), "=r"(tmp1)
+			:
+			: "memory");
+		}
+		if (orig_n / 2) {
+			orig_n = orig_n % 2;
+
+			__asm__ __volatile__(
+			"	ldw.ab    %3, [%2,2]		\n"
+			"17:	stw.ab    %3, [%1,2]		\n"
+			"	sub       %0, %0, 2		\n"
+			"33:;nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   33b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   17b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from), "=r"(tmp1)
+			:
+			: "memory");
+		}
+		if (orig_n & 1) {
+			__asm__ __volatile__(
+			"	ldb.ab  %3, [%2,1]		\n"
+			"18:	stb.ab  %3, [%1,1]		\n"
+			"	sub     %0, %0, 1		\n"
+			"34:	;nop				\n"
+			"	.section .fixup, \"ax\"		\n"
+			"	.align 4			\n"
+			"4:	j   34b				\n"
+			"	.previous			\n"
+			"	.section __ex_table, \"a\"	\n"
+			"	.align 4			\n"
+			"	.word   18b,4b			\n"
+			"	.previous			\n"
+			: "+r" (res), "+r"(to), "+r"(from), "=r"(tmp1)
+			:
+			: "memory");
+		}
+	} else {  /* n is NOT constant, so laddered copy of 16x,8,4,2,1  */
+
+		__asm__ __volatile__(
+		"	mov   %0,%3			\n"
+		"	lsr.f lp_count, %3,4		\n"  /* 16x bytes */
+		"	lpnz  3f			\n"
+		"	ld.ab %5, [%2, 4]		\n"
+		"	ld.ab %6, [%2, 4]		\n"
+		"	ld.ab %7, [%2, 4]		\n"
+		"	ld.ab %8, [%2, 4]		\n"
+		"1:	st.ab %5, [%1, 4]		\n"
+		"11:	st.ab %6, [%1, 4]		\n"
+		"12:	st.ab %7, [%1, 4]		\n"
+		"13:	st.ab %8, [%1, 4]		\n"
+		"	sub   %0, %0, 16		\n"
+		"3:	and.f %3,%3,0xf			\n" /* stragglers */
+		"	bz 34f				\n"
+		"	bbit0   %3,3,31f		\n" /* 8 bytes left */
+		"	ld.ab   %5, [%2,4]		\n"
+		"	ld.ab   %6, [%2,4]		\n"
+		"14:	st.ab   %5, [%1,4]		\n"
+		"15:	st.ab   %6, [%1,4]		\n"
+		"	sub.f   %0, %0, 8		\n"
+		"31:	bbit0   %3,2,32f		\n"  /* 4 bytes left */
+		"	ld.ab   %5, [%2,4]		\n"
+		"16:	st.ab   %5, [%1,4]		\n"
+		"	sub.f   %0, %0, 4		\n"
+		"32:	bbit0 %3,1,33f			\n"  /* 2 bytes left */
+		"	ldw.ab    %5, [%2,2]		\n"
+		"17:	stw.ab    %5, [%1,2]		\n"
+		"	sub.f %0, %0, 2			\n"
+		"33:	bbit0 %3,0,34f			\n"
+		"	ldb.ab    %5, [%2,1]		\n"  /* 1 byte left */
+		"18:	stb.ab  %5, [%1,1]		\n"
+		"	sub.f %0, %0, 1			\n"
+		"34:	;nop				\n"
+		"	.section .fixup, \"ax\"		\n"
+		"	.align 4			\n"
+		"4:	j   34b				\n"
+		"	.previous			\n"
+		"	.section __ex_table, \"a\"	\n"
+		"	.align 4			\n"
+		"	.word   1b, 4b			\n"
+		"	.word   11b,4b			\n"
+		"	.word   12b,4b			\n"
+		"	.word   13b,4b			\n"
+		"	.word   14b,4b			\n"
+		"	.word   15b,4b			\n"
+		"	.word   16b,4b			\n"
+		"	.word   17b,4b			\n"
+		"	.word   18b,4b			\n"
+		"	.previous			\n"
+		: "=r" (res), "+r"(to), "+r"(from), "+r"(n), "=r"(val),
+		  "=r"(tmp1), "=r"(tmp2), "=r"(tmp3), "=r"(tmp4)
+		:
+		: "lp_count", "memory");
+	}
+
+	return res;
+}
+
+static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
+{
+	long res = n;
+	unsigned char *d_char = to;
+
+	__asm__ __volatile__(
+	"	bbit0   %0, 0, 1f		\n"
+	"75:	stb.ab  %2, [%0,1]		\n"
+	"	sub %1, %1, 1			\n"
+	"1:	bbit0   %0, 1, 2f		\n"
+	"76:	stw.ab  %2, [%0,2]		\n"
+	"	sub %1, %1, 2			\n"
+	"2:	asr.f   lp_count, %1, 2		\n"
+	"	lpnz    3f			\n"
+	"77:	st.ab   %2, [%0,4]		\n"
+	"	sub %1, %1, 4			\n"
+	"3:	bbit0   %1, 1, 4f		\n"
+	"78:	stw.ab  %2, [%0,2]		\n"
+	"	sub %1, %1, 2			\n"
+	"4:	bbit0   %1, 0, 5f		\n"
+	"79:	stb.ab  %2, [%0,1]		\n"
+	"	sub %1, %1, 1			\n"
+	"5:					\n"
+	"	.section .fixup, \"ax\"		\n"
+	"	.align 4			\n"
+	"3:	j   5b				\n"
+	"	.previous			\n"
+	"	.section __ex_table, \"a\"	\n"
+	"	.align 4			\n"
+	"	.word   75b, 3b			\n"
+	"	.word   76b, 3b			\n"
+	"	.word   77b, 3b			\n"
+	"	.word   78b, 3b			\n"
+	"	.word   79b, 3b			\n"
+	"	.previous			\n"
+	: "+r"(d_char), "+r"(res)
+	: "i"(0)
+	: "lp_count", "memory");
+
+	return res;
+}
+
+#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
+
+#define INLINE_COPY_TO_USER
+#define INLINE_COPY_FROM_USER
+
+#define __clear_user(d, n)		__arc_clear_user(d, n)
+#else
+extern unsigned long arc_clear_user_noinline(void __user *to,
+		unsigned long n);
+#define __clear_user(d, n)		arc_clear_user_noinline(d, n)
+#endif
+
+#include <asm/segment.h>
+#include <asm-generic/uaccess.h>
+
+#endif
diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
new file mode 100644
index 0000000000..cf5a02382e
--- /dev/null
+++ b/arch/arc/include/asm/unaligned.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_UNALIGNED_H
+#define _ASM_ARC_UNALIGNED_H
+
+/* ARC700 can't handle unaligned Data accesses. */
+
+#include <asm-generic/unaligned.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_ARC_EMUL_UNALIGNED
+int misaligned_fixup(unsigned long address, struct pt_regs *regs,
+		     struct callee_regs *cregs);
+#else
+static inline int
+misaligned_fixup(unsigned long address, struct pt_regs *regs,
+		 struct callee_regs *cregs)
+{
+	/* Not fixed */
+	return 1;
+}
+#endif
+
+#endif /* _ASM_ARC_UNALIGNED_H */
diff --git a/arch/arc/include/asm/unwind.h b/arch/arc/include/asm/unwind.h
new file mode 100644
index 0000000000..e95a20453a
--- /dev/null
+++ b/arch/arc/include/asm/unwind.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#ifndef _ASM_ARC_UNWIND_H
+#define _ASM_ARC_UNWIND_H
+
+#ifdef CONFIG_ARC_DW2_UNWIND
+
+#include <linux/sched.h>
+
+struct arc700_regs {
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	unsigned long r7;
+	unsigned long r8;
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long r16;
+	unsigned long r17;
+	unsigned long r18;
+	unsigned long r19;
+	unsigned long r20;
+	unsigned long r21;
+	unsigned long r22;
+	unsigned long r23;
+	unsigned long r24;
+	unsigned long r25;
+	unsigned long r26;
+	unsigned long r27;	/* fp */
+	unsigned long r28;	/* sp */
+	unsigned long r29;
+	unsigned long r30;
+	unsigned long r31;	/* blink */
+	unsigned long r63;	/* pc */
+};
+
+struct unwind_frame_info {
+	struct arc700_regs regs;
+	struct task_struct *task;
+	unsigned call_frame:1;
+};
+
+#define UNW_PC(frame)		((frame)->regs.r63)
+#define UNW_SP(frame)		((frame)->regs.r28)
+#define UNW_BLINK(frame)	((frame)->regs.r31)
+
+/* Rajesh FIXME */
+#ifdef CONFIG_FRAME_POINTER
+#define UNW_FP(frame)		((frame)->regs.r27)
+#define FRAME_RETADDR_OFFSET	4
+#define FRAME_LINK_OFFSET	0
+#define STACK_BOTTOM_UNW(tsk)	STACK_LIMIT((tsk)->thread.ksp)
+#define STACK_TOP_UNW(tsk)	((tsk)->thread.ksp)
+#else
+#define UNW_FP(frame)		((void)(frame), 0)
+#endif
+
+#define STACK_LIMIT(ptr)	(((ptr) - 1) & ~(THREAD_SIZE - 1))
+
+#define UNW_REGISTER_INFO \
+	PTREGS_INFO(r0), \
+	PTREGS_INFO(r1), \
+	PTREGS_INFO(r2), \
+	PTREGS_INFO(r3), \
+	PTREGS_INFO(r4), \
+	PTREGS_INFO(r5), \
+	PTREGS_INFO(r6), \
+	PTREGS_INFO(r7), \
+	PTREGS_INFO(r8), \
+	PTREGS_INFO(r9), \
+	PTREGS_INFO(r10), \
+	PTREGS_INFO(r11), \
+	PTREGS_INFO(r12), \
+	PTREGS_INFO(r13), \
+	PTREGS_INFO(r14), \
+	PTREGS_INFO(r15), \
+	PTREGS_INFO(r16), \
+	PTREGS_INFO(r17), \
+	PTREGS_INFO(r18), \
+	PTREGS_INFO(r19), \
+	PTREGS_INFO(r20), \
+	PTREGS_INFO(r21), \
+	PTREGS_INFO(r22), \
+	PTREGS_INFO(r23), \
+	PTREGS_INFO(r24), \
+	PTREGS_INFO(r25), \
+	PTREGS_INFO(r26), \
+	PTREGS_INFO(r27), \
+	PTREGS_INFO(r28), \
+	PTREGS_INFO(r29), \
+	PTREGS_INFO(r30), \
+	PTREGS_INFO(r31), \
+	PTREGS_INFO(r63)
+
+#define UNW_DEFAULT_RA(raItem, dataAlign) \
+	((raItem).where == Memory && !((raItem).value * (dataAlign) + 4))
+
+extern int arc_unwind(struct unwind_frame_info *frame);
+extern void arc_unwind_init(void);
+extern void *unwind_add_table(struct module *module, const void *table_start,
+			      unsigned long table_size);
+extern void unwind_remove_table(void *handle, int init_only);
+
+static inline int
+arch_unwind_init_running(struct unwind_frame_info *info,
+			 int (*callback) (struct unwind_frame_info *info,
+					  void *arg),
+			 void *arg)
+{
+	return 0;
+}
+
+static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
+{
+	return 0;
+}
+
+static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
+{
+	return;
+}
+
+static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
+					    struct pt_regs *regs)
+{
+	return;
+}
+
+#else
+
+#define UNW_PC(frame) ((void)(frame), 0)
+#define UNW_SP(frame) ((void)(frame), 0)
+#define UNW_FP(frame) ((void)(frame), 0)
+
+static inline void arc_unwind_init(void)
+{
+}
+
+#define unwind_add_table(a, b, c)
+#define unwind_remove_table(a, b)
+
+#endif /* CONFIG_ARC_DW2_UNWIND */
+
+#endif /* _ASM_ARC_UNWIND_H */
diff --git a/arch/arc/include/asm/vermagic.h b/arch/arc/include/asm/vermagic.h
new file mode 100644
index 0000000000..a10257d2c6
--- /dev/null
+++ b/arch/arc/include/asm/vermagic.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#define MODULE_ARCH_VERMAGIC "ARC700"
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/arc/include/asm/vmalloc.h b/arch/arc/include/asm/vmalloc.h
new file mode 100644
index 0000000000..973095aad6
--- /dev/null
+++ b/arch/arc/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_ARC_VMALLOC_H
+#define _ASM_ARC_VMALLOC_H
+
+#endif /* _ASM_ARC_VMALLOC_H */
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
new file mode 100644
index 0000000000..e784701419
--- /dev/null
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+generic-y += ucontext.h
diff --git a/arch/arc/include/uapi/asm/byteorder.h b/arch/arc/include/uapi/asm/byteorder.h
new file mode 100644
index 0000000000..5540111531
--- /dev/null
+++ b/arch/arc/include/uapi/asm/byteorder.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARC_BYTEORDER_H
+#define __ASM_ARC_BYTEORDER_H
+
+#ifdef __BIG_ENDIAN__
+#include <linux/byteorder/big_endian.h>
+#else
+#include <linux/byteorder/little_endian.h>
+#endif
+
+#endif /* ASM_ARC_BYTEORDER_H */
diff --git a/arch/arc/include/uapi/asm/cachectl.h b/arch/arc/include/uapi/asm/cachectl.h
new file mode 100644
index 0000000000..0e4f2affc7
--- /dev/null
+++ b/arch/arc/include/uapi/asm/cachectl.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ARC_ASM_CACHECTL_H
+#define __ARC_ASM_CACHECTL_H
+
+/*
+ * ARC ABI flags defined for Android's finegrained cacheflush requirements
+ */
+#define CF_I_INV	0x0002
+#define CF_D_FLUSH	0x0010
+#define CF_D_FLUSH_INV	0x0020
+
+#define CF_DEFAULT	(CF_I_INV | CF_D_FLUSH)
+
+/*
+ * Standard flags expected by cacheflush system call users
+ */
+#define ICACHE	CF_I_INV
+#define DCACHE	CF_D_FLUSH
+#define BCACHE	(CF_I_INV | CF_D_FLUSH)
+
+#endif
diff --git a/arch/arc/include/uapi/asm/elf.h b/arch/arc/include/uapi/asm/elf.h
new file mode 100644
index 0000000000..3c1dae4e5a
--- /dev/null
+++ b/arch/arc/include/uapi/asm/elf.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _UAPI__ASM_ARC_ELF_H
+#define _UAPI__ASM_ARC_ELF_H
+
+#include <asm/ptrace.h>		/* for user_regs_struct */
+
+/* Machine specific ELF Hdr flags */
+#define EF_ARC_OSABI_MSK	0x00000f00
+
+#define EF_ARC_OSABI_V3		0x00000300   /* v3 (no legacy syscalls) */
+#define EF_ARC_OSABI_V4		0x00000400   /* v4 (64bit data any reg align) */
+
+#if __GNUC__ < 6
+#define EF_ARC_OSABI_CURRENT	EF_ARC_OSABI_V3
+#else
+#define EF_ARC_OSABI_CURRENT	EF_ARC_OSABI_V4
+#endif
+
+typedef unsigned long elf_greg_t;
+typedef unsigned long elf_fpregset_t;
+
+#define ELF_NGREG	(sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
+#define ELF_ARCV2REG	(sizeof(struct user_regs_arcv2) / sizeof(elf_greg_t))
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+#endif
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
new file mode 100644
index 0000000000..2a4ad619ab
--- /dev/null
+++ b/arch/arc/include/uapi/asm/page.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _UAPI__ASM_ARC_PAGE_H
+#define _UAPI__ASM_ARC_PAGE_H
+
+#include <linux/const.h>
+
+/* PAGE_SHIFT determines the page size */
+#if defined(CONFIG_ARC_PAGE_SIZE_16K)
+#define PAGE_SHIFT 14
+#elif defined(CONFIG_ARC_PAGE_SIZE_4K)
+#define PAGE_SHIFT 12
+#else
+/*
+ * Default 8k
+ * done this way (instead of under CONFIG_ARC_PAGE_SIZE_8K) because adhoc
+ * user code (busybox appletlib.h) expects PAGE_SHIFT to be defined w/o
+ * using the correct uClibc header and in their build our autoconf.h is
+ * not available
+ */
+#define PAGE_SHIFT 13
+#endif
+
+#define PAGE_SIZE	_BITUL(PAGE_SHIFT)	/* Default 8K */
+#define PAGE_OFFSET	_AC(0x80000000, UL)	/* Kernel starts at 2G onwrds */
+
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#endif /* _UAPI__ASM_ARC_PAGE_H */
diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000000..2a6eff57f6
--- /dev/null
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef _UAPI__ASM_ARC_PTRACE_H
+#define _UAPI__ASM_ARC_PTRACE_H
+
+#define PTRACE_GET_THREAD_AREA	25
+
+#ifndef __ASSEMBLY__
+/*
+ * Userspace ABI: Register state needed by
+ *  -ptrace (gdbserver)
+ *  -sigcontext (SA_SIGNINFO signal frame)
+ *
+ * This is to decouple pt_regs from user-space ABI, to be able to change it
+ * w/o affecting the ABI.
+ *
+ * The intermediate pad,pad2 are relics of initial layout based on pt_regs
+ * for optimizations when copying pt_regs to/from user_regs_struct.
+ * We no longer need them, but can't be changed as they are part of ABI now.
+ *
+ * Also, sigcontext only care about the scratch regs as that is what we really
+ * save/restore for signal handling. However gdb also uses the same struct
+ * hence callee regs need to be in there too.
+*/
+struct user_regs_struct {
+
+	unsigned long pad;
+	struct {
+		unsigned long bta, lp_start, lp_end, lp_count;
+		unsigned long status32, ret, blink, fp, gp;
+		unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
+		unsigned long sp;
+	} scratch;
+	unsigned long pad2;
+	struct {
+		unsigned long r25, r24, r23, r22, r21, r20;
+		unsigned long r19, r18, r17, r16, r15, r14, r13;
+	} callee;
+	unsigned long efa;	/* break pt addr, for break points in delay slots */
+	unsigned long stop_pc;	/* give dbg stop_pc after ensuring brkpt trap */
+};
+
+struct user_regs_arcv2 {
+	unsigned long r30, r58, r59;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _UAPI__ASM_ARC_PTRACE_H */
diff --git a/arch/arc/include/uapi/asm/setup.h b/arch/arc/include/uapi/asm/setup.h
new file mode 100644
index 0000000000..a6d4e44938
--- /dev/null
+++ b/arch/arc/include/uapi/asm/setup.h
@@ -0,0 +1,6 @@
+/*
+ * setup.h is part of userspace header ABI so UAPI scripts have to generate it
+ * even if there's nothing to export - causing empty <uapi/asm/setup.h>
+ * However to prevent "patch" from discarding it we add this placeholder
+ * comment
+ */
diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h
new file mode 100644
index 0000000000..7a5449dfcb
--- /dev/null
+++ b/arch/arc/include/uapi/asm/sigcontext.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ARC_SIGCONTEXT_H
+#define _ASM_ARC_SIGCONTEXT_H
+
+#include <asm/ptrace.h>
+
+/*
+ * Signal context structure - contains all info to do with the state
+ * before the signal handler was invoked.
+ */
+struct sigcontext {
+	struct user_regs_struct regs;
+	struct user_regs_arcv2 v2abi;
+};
+
+#endif /* _ASM_ARC_SIGCONTEXT_H */
diff --git a/arch/arc/include/uapi/asm/signal.h b/arch/arc/include/uapi/asm/signal.h
new file mode 100644
index 0000000000..ba3143a1b3
--- /dev/null
+++ b/arch/arc/include/uapi/asm/signal.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
+ */
+
+#ifndef _ASM_ARC_SIGNAL_H
+#define _ASM_ARC_SIGNAL_H
+
+/*
+ * This is much needed for ARC sigreturn optimization.
+ * This allows uClibc to piggback the addr of a sigreturn stub in sigaction,
+ * which allows sigreturn based re-entry into kernel after handling signal.
+ * W/o this kernel needs to "synthesize" the sigreturn trampoline on user
+ * mode stack which in turn forces the following:
+ * -TLB Flush (after making the stack page executable)
+ * -Cache line Flush (to make I/D Cache lines coherent)
+ */
+#define SA_RESTORER	0x04000000
+
+#include <asm-generic/signal.h>
+
+#endif /* _ASM_ARC_SIGNAL_H */
diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h
new file mode 100644
index 0000000000..02109cd48e
--- /dev/null
+++ b/arch/arc/include/uapi/asm/swab.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -Support single cycle endian-swap insn in ARC700 4.10
+ *
+ * vineetg: June 2009
+ *  -Better htonl implementation (5 instead of 9 ALU instructions)
+ *  -Hardware assisted single cycle bswap (Use Case of ARC custom instrn)
+ */
+
+#ifndef __ASM_ARC_SWAB_H
+#define __ASM_ARC_SWAB_H
+
+#include <linux/types.h>
+
+/* Native single cycle endian swap insn */
+#ifdef CONFIG_ARC_HAS_SWAPE
+
+#define __arch_swab32(x)		\
+({					\
+	unsigned int tmp = x;		\
+	__asm__(			\
+	"	swape	%0, %1	\n"	\
+	: "=r" (tmp)			\
+	: "r" (tmp));			\
+	tmp;				\
+})
+
+#else
+
+/* Several ways of Endian-Swap Emulation for ARC
+ * 0: kernel generic
+ * 1: ARC optimised "C"
+ * 2: ARC Custom instruction
+ */
+#define ARC_BSWAP_TYPE	1
+
+#if (ARC_BSWAP_TYPE == 1)		/******* Software only ********/
+
+/* The kernel default implementation of htonl is
+ *		return  x<<24 | x>>24 |
+ *		 (x & (__u32)0x0000ff00UL)<<8 | (x & (__u32)0x00ff0000UL)>>8;
+ *
+ * This generates 9 instructions on ARC (excluding the ld/st)
+ *
+ * 8051fd8c:	ld     r3,[r7,20]	; Mem op : Get the value to be swapped
+ * 8051fd98:	asl    r5,r3,24		; get  3rd Byte
+ * 8051fd9c:	lsr    r2,r3,24		; get  0th Byte
+ * 8051fda0:	and    r4,r3,0xff00
+ * 8051fda8:	asl    r4,r4,8		; get 1st Byte
+ * 8051fdac:	and    r3,r3,0x00ff0000
+ * 8051fdb4:	or     r2,r2,r5		; combine 0th and 3rd Bytes
+ * 8051fdb8:	lsr    r3,r3,8		; 2nd Byte at correct place in Dst Reg
+ * 8051fdbc:	or     r2,r2,r4		; combine 0,3 Bytes with 1st Byte
+ * 8051fdc0:	or     r2,r2,r3		; combine 0,3,1 Bytes with 2nd Byte
+ * 8051fdc4:	st     r2,[r1,20]	; Mem op : save result back to mem
+ *
+ * Joern suggested a better "C" algorithm which is great since
+ * (1) It is portable to any architecure
+ * (2) At the same time it takes advantage of ARC ISA (rotate intrns)
+ */
+
+#define __arch_swab32(x)					\
+({	unsigned long __in = (x), __tmp;			\
+	__tmp = __in << 8 | __in >> 24; /* ror tmp,in,24 */	\
+	__in = __in << 24 | __in >> 8; /* ror in,in,8 */	\
+	__tmp ^= __in;						\
+	__tmp &= 0xff00ff;					\
+	__tmp ^ __in;						\
+})
+
+#elif (ARC_BSWAP_TYPE == 2)	/* Custom single cycle bswap instruction */
+
+#define __arch_swab32(x)						\
+({									\
+	unsigned int tmp = x;						\
+	__asm__(							\
+	"	.extInstruction	bswap, 7, 0x00, SUFFIX_NONE, SYNTAX_2OP	\n"\
+	"	bswap  %0, %1						\n"\
+	: "=r" (tmp)							\
+	: "r" (tmp));							\
+	tmp;								\
+})
+
+#endif /* ARC_BSWAP_TYPE=zzz */
+
+#endif /* CONFIG_ARC_HAS_SWAPE */
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#define __SWAB_64_THRU_32__
+#endif
+
+#endif
diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
new file mode 100644
index 0000000000..fa2713ae6b
--- /dev/null
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/******** no-legacy-syscalls-ABI *******/
+
+/*
+ * Non-typical guard macro to enable inclusion twice in ARCH sys.c
+ * That is how the Generic syscall wrapper generator works
+ */
+#if !defined(_UAPI_ASM_ARC_UNISTD_H) || defined(__SYSCALL)
+#define _UAPI_ASM_ARC_UNISTD_H
+
+#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_TIME32_SYSCALLS
+
+#define sys_mmap2 sys_mmap_pgoff
+
+#include <asm-generic/unistd.h>
+
+#define NR_syscalls	__NR_syscalls
+
+/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
+#define __NR_sysfs		(__NR_arch_specific_syscall + 3)
+
+/* ARC specific syscall */
+#define __NR_cacheflush		(__NR_arch_specific_syscall + 0)
+#define __NR_arc_settls		(__NR_arch_specific_syscall + 1)
+#define __NR_arc_gettls		(__NR_arch_specific_syscall + 2)
+#define __NR_arc_usr_cmpxchg	(__NR_arch_specific_syscall + 4)
+
+__SYSCALL(__NR_cacheflush, sys_cacheflush)
+__SYSCALL(__NR_arc_settls, sys_arc_settls)
+__SYSCALL(__NR_arc_gettls, sys_arc_gettls)
+__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg)
+__SYSCALL(__NR_sysfs, sys_sysfs)
+
+#undef __SYSCALL
+
+#endif
diff --git a/arch/arc/kernel/.gitignore b/arch/arc/kernel/.gitignore
new file mode 100644
index 0000000000..bbb90f92d0
--- /dev/null
+++ b/arch/arc/kernel/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vmlinux.lds
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
new file mode 100644
index 0000000000..8c4fc4b54c
--- /dev/null
+++ b/arch/arc/kernel/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+
+obj-y	:= arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
+obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
+obj-$(CONFIG_ISA_ARCOMPACT)		+= entry-compact.o intc-compact.o
+obj-$(CONFIG_ISA_ARCV2)			+= entry-arcv2.o intc-arcv2.o
+
+obj-$(CONFIG_MODULES)			+= arcksyms.o module.o
+obj-$(CONFIG_SMP) 			+= smp.o
+obj-$(CONFIG_ARC_MCIP)			+= mcip.o
+obj-$(CONFIG_ARC_DW2_UNWIND)		+= unwind.o
+obj-$(CONFIG_KPROBES)      		+= kprobes.o
+obj-$(CONFIG_ARC_EMUL_UNALIGNED) 	+= unaligned.o
+obj-$(CONFIG_KGDB)			+= kgdb.o
+obj-$(CONFIG_ARC_METAWARE_HLINK)	+= arc_hostlink.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
+obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
+
+obj-$(CONFIG_ARC_FPU_SAVE_RESTORE)	+= fpu.o
+ifdef CONFIG_ISA_ARCOMPACT
+CFLAGS_fpu.o   += -mdpfp
+endif
+
+ifdef CONFIG_ARC_DW2_UNWIND
+CFLAGS_ctx_sw.o += -fno-omit-frame-pointer
+obj-y += ctx_sw.o
+else
+obj-y += ctx_sw_asm.o
+endif
+
+extra-y := vmlinux.lds head.o
diff --git a/arch/arc/kernel/arc_hostlink.c b/arch/arc/kernel/arc_hostlink.c
new file mode 100644
index 0000000000..08c5196efe
--- /dev/null
+++ b/arch/arc/kernel/arc_hostlink.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arc_hostlink.c: Pseudo-driver for Metaware provided "hostlink" facility
+ *
+ * Allows Linux userland access to host in absence of any peripherals.
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/fs.h>		/* file_operations */
+#include <linux/miscdevice.h>
+#include <linux/mm.h>		/* VM_IO */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+static unsigned char __HOSTLINK__[4 * PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static int arc_hl_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot)) {
+		pr_warn("Hostlink buffer mmap ERROR\n");
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static long arc_hl_ioctl(struct file *file, unsigned int cmd,
+			unsigned long arg)
+{
+	/* we only support, returning the physical addr to mmap in user space */
+	put_user((unsigned int)__HOSTLINK__, (int __user *)arg);
+	return 0;
+}
+
+static const struct file_operations arc_hl_fops = {
+	.unlocked_ioctl	= arc_hl_ioctl,
+	.mmap		= arc_hl_mmap,
+};
+
+static struct miscdevice arc_hl_dev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "hostlink",
+	.fops	= &arc_hl_fops
+};
+
+static int __init arc_hl_init(void)
+{
+	pr_info("ARC Hostlink driver mmap at 0x%p\n", __HOSTLINK__);
+	return misc_register(&arc_hl_dev);
+}
+module_init(arc_hl_init);
diff --git a/arch/arc/kernel/arcksyms.c b/arch/arc/kernel/arcksyms.c
new file mode 100644
index 0000000000..8851c0a19e
--- /dev/null
+++ b/arch/arc/kernel/arcksyms.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arcksyms.c - Exporting symbols not exportable from their own sources
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/module.h>
+
+/* libgcc functions, not part of kernel sources */
+extern void __ashldi3(void);
+extern void __ashrdi3(void);
+extern void __divsi3(void);
+extern void __divsf3(void);
+extern void __lshrdi3(void);
+extern void __modsi3(void);
+extern void __muldi3(void);
+extern void __ucmpdi2(void);
+extern void __udivsi3(void);
+extern void __umodsi3(void);
+extern void __cmpdi2(void);
+extern void __fixunsdfsi(void);
+extern void __muldf3(void);
+extern void __divdf3(void);
+extern void __floatunsidf(void);
+extern void __floatunsisf(void);
+extern void __udivdi3(void);
+
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__divsf3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__ucmpdi2);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__umodsi3);
+EXPORT_SYMBOL(__cmpdi2);
+EXPORT_SYMBOL(__fixunsdfsi);
+EXPORT_SYMBOL(__muldf3);
+EXPORT_SYMBOL(__divdf3);
+EXPORT_SYMBOL(__floatunsidf);
+EXPORT_SYMBOL(__floatunsisf);
+EXPORT_SYMBOL(__udivdi3);
+
+/* ARC optimised assembler routines */
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strlen);
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
new file mode 100644
index 0000000000..0e884036ab
--- /dev/null
+++ b/arch/arc/kernel/asm-offsets.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/thread_info.h>
+#include <linux/kbuild.h>
+#include <linux/ptrace.h>
+#include <asm/hardirq.h>
+#include <asm/page.h>
+
+
+int main(void)
+{
+	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
+	DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack));
+
+	BLANK();
+
+	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
+	DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg));
+	DEFINE(THREAD_FAULT_ADDR,
+	       offsetof(struct thread_struct, fault_address));
+
+	BLANK();
+
+	DEFINE(THREAD_INFO_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(THREAD_INFO_PREEMPT_COUNT,
+	       offsetof(struct thread_info, preempt_count));
+
+	BLANK();
+
+	DEFINE(TASK_ACT_MM, offsetof(struct task_struct, active_mm));
+	DEFINE(TASK_TGID, offsetof(struct task_struct, tgid));
+	DEFINE(TASK_PID, offsetof(struct task_struct, pid));
+	DEFINE(TASK_COMM, offsetof(struct task_struct, comm));
+
+	DEFINE(MM_CTXT, offsetof(struct mm_struct, context));
+	DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
+
+	DEFINE(MM_CTXT_ASID, offsetof(mm_context_t, asid));
+
+	BLANK();
+
+	DEFINE(PT_status32, offsetof(struct pt_regs, status32));
+	DEFINE(PT_event, offsetof(struct pt_regs, event));
+	DEFINE(PT_sp, offsetof(struct pt_regs, sp));
+	DEFINE(PT_r0, offsetof(struct pt_regs, r0));
+	DEFINE(PT_r1, offsetof(struct pt_regs, r1));
+	DEFINE(PT_r2, offsetof(struct pt_regs, r2));
+	DEFINE(PT_r3, offsetof(struct pt_regs, r3));
+	DEFINE(PT_r4, offsetof(struct pt_regs, r4));
+	DEFINE(PT_r5, offsetof(struct pt_regs, r5));
+	DEFINE(PT_r6, offsetof(struct pt_regs, r6));
+	DEFINE(PT_r7, offsetof(struct pt_regs, r7));
+	DEFINE(PT_r8, offsetof(struct pt_regs, r8));
+	DEFINE(PT_r10, offsetof(struct pt_regs, r10));
+	DEFINE(PT_r26, offsetof(struct pt_regs, r26));
+	DEFINE(PT_ret, offsetof(struct pt_regs, ret));
+	DEFINE(PT_blink, offsetof(struct pt_regs, blink));
+	DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end));
+	DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count));
+	DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
+
+	DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
+	DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+
+#ifdef CONFIG_ISA_ARCV2
+	OFFSET(PT_r12, pt_regs, r12);
+	OFFSET(PT_r30, pt_regs, r30);
+#endif
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	OFFSET(PT_r58, pt_regs, r58);
+	OFFSET(PT_r59, pt_regs, r59);
+#endif
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL);
+#endif
+
+	return 0;
+}
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
new file mode 100644
index 0000000000..1a76f2d6f6
--- /dev/null
+++ b/arch/arc/kernel/ctx_sw.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Vineetg: Aug 2009
+ *  -"C" version of lowest level context switch asm macro called by schedular
+ *   gcc doesn't generate the dward CFI info for hand written asm, hence can't
+ *   backtrace out of it (e.g. tasks sleeping in kernel).
+ *   So we cheat a bit by writing almost similar code in inline-asm.
+ *  -This is a hacky way of doing things, but there is no other simple way.
+ *   I don't want/intend to extend unwinding code to understand raw asm
+ */
+
+#include <asm/asm-offsets.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
+struct task_struct *__sched
+__switch_to(struct task_struct *prev_task, struct task_struct *next_task)
+{
+	unsigned int tmp;
+	unsigned int prev = (unsigned int)prev_task;
+	unsigned int next = (unsigned int)next_task;
+
+	__asm__ __volatile__(
+		/* FP/BLINK save generated by gcc (standard function prologue */
+		"st.a    r13, [sp, -4]   \n\t"
+		"st.a    r14, [sp, -4]   \n\t"
+		"st.a    r15, [sp, -4]   \n\t"
+		"st.a    r16, [sp, -4]   \n\t"
+		"st.a    r17, [sp, -4]   \n\t"
+		"st.a    r18, [sp, -4]   \n\t"
+		"st.a    r19, [sp, -4]   \n\t"
+		"st.a    r20, [sp, -4]   \n\t"
+		"st.a    r21, [sp, -4]   \n\t"
+		"st.a    r22, [sp, -4]   \n\t"
+		"st.a    r23, [sp, -4]   \n\t"
+		"st.a    r24, [sp, -4]   \n\t"
+#ifndef CONFIG_ARC_CURR_IN_REG
+		"st.a    r25, [sp, -4]   \n\t"
+#else
+		"sub     sp, sp, 4      \n\t"	/* usual r25 placeholder */
+#endif
+
+		/* set ksp of outgoing task in tsk->thread.ksp */
+#if KSP_WORD_OFF <= 255
+		"st.as   sp, [%3, %1]    \n\t"
+#else
+		/*
+		 * Workaround for NR_CPUS=4k
+		 * %1 is bigger than 255 (S9 offset for st.as)
+		 */
+		"add2    r24, %3, %1     \n\t"
+		"st      sp, [r24]       \n\t"
+#endif
+
+		/*
+		 * setup _current_task with incoming tsk.
+		 * optionally, set r25 to that as well
+		 * For SMP extra work to get to &_current_task[cpu]
+		 * (open coded SET_CURR_TASK_ON_CPU)
+		 */
+#ifndef CONFIG_SMP
+		"st  %2, [@_current_task]	\n\t"
+#else
+		"lr   r24, [identity]		\n\t"
+		"lsr  r24, r24, 8		\n\t"
+		"bmsk r24, r24, 7		\n\t"
+		"add2 r24, @_current_task, r24	\n\t"
+		"st   %2,  [r24]		\n\t"
+#endif
+#ifdef CONFIG_ARC_CURR_IN_REG
+		"mov r25, %2   \n\t"
+#endif
+
+		/* get ksp of incoming task from tsk->thread.ksp */
+		"ld.as  sp, [%2, %1]   \n\t"
+
+		/* start loading it's CALLEE reg file */
+
+#ifndef CONFIG_ARC_CURR_IN_REG
+		"ld.ab   r25, [sp, 4]   \n\t"
+#else
+		"add    sp, sp, 4       \n\t"
+#endif
+		"ld.ab   r24, [sp, 4]   \n\t"
+		"ld.ab   r23, [sp, 4]   \n\t"
+		"ld.ab   r22, [sp, 4]   \n\t"
+		"ld.ab   r21, [sp, 4]   \n\t"
+		"ld.ab   r20, [sp, 4]   \n\t"
+		"ld.ab   r19, [sp, 4]   \n\t"
+		"ld.ab   r18, [sp, 4]   \n\t"
+		"ld.ab   r17, [sp, 4]   \n\t"
+		"ld.ab   r16, [sp, 4]   \n\t"
+		"ld.ab   r15, [sp, 4]   \n\t"
+		"ld.ab   r14, [sp, 4]   \n\t"
+		"ld.ab   r13, [sp, 4]   \n\t"
+
+		/* last (ret value) = prev : although for ARC it mov r0, r0 */
+		"mov     %0, %3        \n\t"
+
+		/* FP/BLINK restore generated by gcc (standard func epilogue */
+
+		: "=r"(tmp)
+		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
+		: "blink"
+	);
+
+	return (struct task_struct *)tmp;
+}
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
new file mode 100644
index 0000000000..02c4614847
--- /dev/null
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Vineetg: Aug 2009
+ *  -Moved core context switch macro out of entry.S into this file.
+ *  -This is the more "natural" hand written assembler
+ */
+
+#include <linux/linkage.h>
+#include <asm/entry.h>       /* For the SAVE_* macros */
+#include <asm/asm-offsets.h>
+
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
+;################### Low Level Context Switch ##########################
+
+	.section .sched.text,"ax",@progbits
+	.align 4
+	.global __switch_to
+	.type   __switch_to, @function
+__switch_to:
+	CFI_STARTPROC
+
+	/* Save regs on kernel mode stack of task */
+	st.a    blink, [sp, -4]
+	st.a    fp, [sp, -4]
+	SAVE_CALLEE_SAVED_KERNEL
+
+	/* Save the now KSP in task->thread.ksp */
+#if KSP_WORD_OFF  <= 255
+	st.as  sp, [r0, KSP_WORD_OFF]
+#else
+	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
+	add2	r24, r0, KSP_WORD_OFF
+	st	sp, [r24]
+#endif
+	/*
+	* Return last task in r0 (return reg)
+	* On ARC, Return reg = First Arg reg = r0.
+	* Since we already have last task in r0,
+	* don't need to do anything special to return it
+	*/
+
+	/*
+	 * switch to new task, contained in r1
+	 * Temp reg r3 is required to get the ptr to store val
+	 */
+	SET_CURR_TASK_ON_CPU  r1, r3
+
+	/* reload SP with kernel mode stack pointer in task->thread.ksp */
+	ld.as  sp, [r1, (TASK_THREAD + THREAD_KSP)/4]
+
+	/* restore the registers */
+	RESTORE_CALLEE_SAVED_KERNEL
+	ld.ab   fp, [sp, 4]
+	ld.ab   blink, [sp, 4]
+	j       [blink]
+
+END_CFI(__switch_to)
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
new file mode 100644
index 0000000000..721d465f15
--- /dev/null
+++ b/arch/arc/kernel/devtree.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Based on reduced version of METAG
+ */
+
+
+#include <linux/init.h>
+#include <linux/reboot.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <asm/mach_desc.h>
+
+#ifdef CONFIG_SERIAL_EARLYCON
+
+static unsigned int __initdata arc_base_baud;
+
+unsigned int __init arc_early_base_baud(void)
+{
+	return arc_base_baud/16;
+}
+
+static void __init arc_set_early_base_baud(unsigned long dt_root)
+{
+	if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x"))
+		arc_base_baud = 166666666;	/* Fixed 166.6MHz clk (TB10x) */
+	else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") ||
+		 of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
+		arc_base_baud = 33333333;	/* Fixed 33MHz clk (AXS10x & HSDK) */
+	else
+		arc_base_baud = 50000000;	/* Fixed default 50MHz */
+}
+#else
+#define arc_set_early_base_baud(dt_root)
+#endif
+
+static const void * __init arch_get_next_mach(const char *const **match)
+{
+	static const struct machine_desc *mdesc = __arch_info_begin;
+	const struct machine_desc *m = mdesc;
+
+	if (m >= __arch_info_end)
+		return NULL;
+
+	mdesc++;
+	*match = m->dt_compat;
+	return m;
+}
+
+/**
+ * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
+ * @dt:		virtual address pointer to dt blob
+ *
+ * If a dtb was passed to the kernel, then use it to choose the correct
+ * machine_desc and to setup the system.
+ */
+const struct machine_desc * __init setup_machine_fdt(void *dt)
+{
+	const struct machine_desc *mdesc;
+	unsigned long dt_root;
+
+	if (!early_init_dt_scan(dt))
+		return NULL;
+
+	mdesc = of_flat_dt_match_machine(NULL, arch_get_next_mach);
+	if (!mdesc)
+		machine_halt();
+
+	dt_root = of_get_flat_dt_root();
+	arc_set_early_base_baud(dt_root);
+
+	return mdesc;
+}
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
new file mode 100644
index 0000000000..03f8b1be0c
--- /dev/null
+++ b/arch/arc/kernel/disasm.c
@@ -0,0 +1,535 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * several functions that help interpret ARC instructions
+ * used for unaligned accesses, kprobes and kgdb
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/disasm.h>
+
+#if defined(CONFIG_KGDB) || defined(CONFIG_ARC_EMUL_UNALIGNED) || \
+	defined(CONFIG_KPROBES)
+
+/* disasm_instr: Analyses instruction at addr, stores
+ * findings in *state
+ */
+void __kprobes disasm_instr(unsigned long addr, struct disasm_state *state,
+	int userspace, struct pt_regs *regs, struct callee_regs *cregs)
+{
+	int fieldA = 0;
+	int fieldC = 0, fieldCisReg = 0;
+	uint16_t word1 = 0, word0 = 0;
+	int subopcode, is_linked, op_format;
+	uint16_t *ins_ptr;
+	uint16_t ins_buf[4];
+	int bytes_not_copied = 0;
+
+	memset(state, 0, sizeof(struct disasm_state));
+
+	/* This fetches the upper part of the 32 bit instruction
+	 * in both the cases of Little Endian or Big Endian configurations. */
+	if (userspace) {
+		bytes_not_copied = copy_from_user(ins_buf,
+						(const void __user *) addr, 8);
+		if (bytes_not_copied > 6)
+			goto fault;
+		ins_ptr = ins_buf;
+	} else {
+		ins_ptr = (uint16_t *) addr;
+	}
+
+	word1 = *((uint16_t *)addr);
+
+	state->major_opcode = (word1 >> 11) & 0x1F;
+
+	/* Check if the instruction is 32 bit or 16 bit instruction */
+	if (state->major_opcode < 0x0B) {
+		if (bytes_not_copied > 4)
+			goto fault;
+		state->instr_len = 4;
+		word0 = *((uint16_t *)(addr+2));
+		state->words[0] = (word1 << 16) | word0;
+	} else {
+		state->instr_len = 2;
+		state->words[0] = word1;
+	}
+
+	/* Read the second word in case of limm */
+	word1 = *((uint16_t *)(addr + state->instr_len));
+	word0 = *((uint16_t *)(addr + state->instr_len + 2));
+	state->words[1] = (word1 << 16) | word0;
+
+	switch (state->major_opcode) {
+	case op_Bcc:
+		state->is_branch = 1;
+
+		/* unconditional branch s25, conditional branch s21 */
+		fieldA = (IS_BIT(state->words[0], 16)) ?
+			FIELD_s25(state->words[0]) :
+			FIELD_s21(state->words[0]);
+
+		state->delay_slot = IS_BIT(state->words[0], 5);
+		state->target = fieldA + (addr & ~0x3);
+		state->flow = direct_jump;
+		break;
+
+	case op_BLcc:
+		if (IS_BIT(state->words[0], 16)) {
+			/* Branch and Link*/
+			/* unconditional branch s25, conditional branch s21 */
+			fieldA = (IS_BIT(state->words[0], 17)) ?
+				(FIELD_s25(state->words[0]) & ~0x3) :
+				FIELD_s21(state->words[0]);
+
+			state->flow = direct_call;
+		} else {
+			/*Branch On Compare */
+			fieldA = FIELD_s9(state->words[0]) & ~0x3;
+			state->flow = direct_jump;
+		}
+
+		state->delay_slot = IS_BIT(state->words[0], 5);
+		state->target = fieldA + (addr & ~0x3);
+		state->is_branch = 1;
+		break;
+
+	case op_LD:  /* LD<zz> a,[b,s9] */
+		state->write = 0;
+		state->di = BITS(state->words[0], 11, 11);
+		if (state->di)
+			break;
+		state->x = BITS(state->words[0], 6, 6);
+		state->zz = BITS(state->words[0], 7, 8);
+		state->aa = BITS(state->words[0], 9, 10);
+		state->wb_reg = FIELD_B(state->words[0]);
+		if (state->wb_reg == REG_LIMM) {
+			state->instr_len += 4;
+			state->aa = 0;
+			state->src1 = state->words[1];
+		} else {
+			state->src1 = get_reg(state->wb_reg, regs, cregs);
+		}
+		state->src2 = FIELD_s9(state->words[0]);
+		state->dest = FIELD_A(state->words[0]);
+		state->pref = (state->dest == REG_LIMM);
+		break;
+
+	case op_ST:
+		state->write = 1;
+		state->di = BITS(state->words[0], 5, 5);
+		if (state->di)
+			break;
+		state->aa = BITS(state->words[0], 3, 4);
+		state->zz = BITS(state->words[0], 1, 2);
+		state->src1 = FIELD_C(state->words[0]);
+		if (state->src1 == REG_LIMM) {
+			state->instr_len += 4;
+			state->src1 = state->words[1];
+		} else {
+			state->src1 = get_reg(state->src1, regs, cregs);
+		}
+		state->wb_reg = FIELD_B(state->words[0]);
+		if (state->wb_reg == REG_LIMM) {
+			state->aa = 0;
+			state->instr_len += 4;
+			state->src2 = state->words[1];
+		} else {
+			state->src2 = get_reg(state->wb_reg, regs, cregs);
+		}
+		state->src3 = FIELD_s9(state->words[0]);
+		break;
+
+	case op_MAJOR_4:
+		subopcode = MINOR_OPCODE(state->words[0]);
+		switch (subopcode) {
+		case 32:	/* Jcc */
+		case 33:	/* Jcc.D */
+		case 34:	/* JLcc */
+		case 35:	/* JLcc.D */
+			is_linked = 0;
+
+			if (subopcode == 33 || subopcode == 35)
+				state->delay_slot = 1;
+
+			if (subopcode == 34 || subopcode == 35)
+				is_linked = 1;
+
+			fieldCisReg = 0;
+			op_format = BITS(state->words[0], 22, 23);
+			if (op_format == 0 || ((op_format == 3) &&
+				(!IS_BIT(state->words[0], 5)))) {
+				fieldC = FIELD_C(state->words[0]);
+
+				if (fieldC == REG_LIMM) {
+					fieldC = state->words[1];
+					state->instr_len += 4;
+				} else {
+					fieldCisReg = 1;
+				}
+			} else if (op_format == 1 || ((op_format == 3)
+				&& (IS_BIT(state->words[0], 5)))) {
+				fieldC = FIELD_C(state->words[0]);
+			} else  {
+				/* op_format == 2 */
+				fieldC = FIELD_s12(state->words[0]);
+			}
+
+			if (!fieldCisReg) {
+				state->target = fieldC;
+				state->flow = is_linked ?
+					direct_call : direct_jump;
+			} else {
+				state->target = get_reg(fieldC, regs, cregs);
+				state->flow = is_linked ?
+					indirect_call : indirect_jump;
+			}
+			state->is_branch = 1;
+			break;
+
+		case 40:	/* LPcc */
+			if (BITS(state->words[0], 22, 23) == 3) {
+				/* Conditional LPcc u7 */
+				fieldC = FIELD_C(state->words[0]);
+
+				fieldC = fieldC << 1;
+				fieldC += (addr & ~0x03);
+				state->is_branch = 1;
+				state->flow = direct_jump;
+				state->target = fieldC;
+			}
+			/* For Unconditional lp, next pc is the fall through
+			 * which is updated */
+			break;
+
+		case 48 ... 55:	/* LD a,[b,c] */
+			state->di = BITS(state->words[0], 15, 15);
+			if (state->di)
+				break;
+			state->x = BITS(state->words[0], 16, 16);
+			state->zz = BITS(state->words[0], 17, 18);
+			state->aa = BITS(state->words[0], 22, 23);
+			state->wb_reg = FIELD_B(state->words[0]);
+			if (state->wb_reg == REG_LIMM) {
+				state->instr_len += 4;
+				state->src1 = state->words[1];
+			} else {
+				state->src1 = get_reg(state->wb_reg, regs,
+						cregs);
+			}
+			state->src2 = FIELD_C(state->words[0]);
+			if (state->src2 == REG_LIMM) {
+				state->instr_len += 4;
+				state->src2 = state->words[1];
+			} else {
+				state->src2 = get_reg(state->src2, regs,
+					cregs);
+			}
+			state->dest = FIELD_A(state->words[0]);
+			if (state->dest == REG_LIMM)
+				state->pref = 1;
+			break;
+
+		case 10:	/* MOV */
+			/* still need to check for limm to extract instr len */
+			/* MOV is special case because it only takes 2 args */
+			switch (BITS(state->words[0], 22, 23)) {
+			case 0: /* OP a,b,c */
+				if (FIELD_C(state->words[0]) == REG_LIMM)
+					state->instr_len += 4;
+				break;
+			case 1: /* OP a,b,u6 */
+				break;
+			case 2: /* OP b,b,s12 */
+				break;
+			case 3: /* OP.cc b,b,c/u6 */
+				if ((!IS_BIT(state->words[0], 5)) &&
+				    (FIELD_C(state->words[0]) == REG_LIMM))
+					state->instr_len += 4;
+				break;
+			}
+			break;
+
+
+		default:
+			/* Not a Load, Jump or Loop instruction */
+			/* still need to check for limm to extract instr len */
+			switch (BITS(state->words[0], 22, 23)) {
+			case 0: /* OP a,b,c */
+				if ((FIELD_B(state->words[0]) == REG_LIMM) ||
+				    (FIELD_C(state->words[0]) == REG_LIMM))
+					state->instr_len += 4;
+				break;
+			case 1: /* OP a,b,u6 */
+				break;
+			case 2: /* OP b,b,s12 */
+				break;
+			case 3: /* OP.cc b,b,c/u6 */
+				if ((!IS_BIT(state->words[0], 5)) &&
+				   ((FIELD_B(state->words[0]) == REG_LIMM) ||
+				    (FIELD_C(state->words[0]) == REG_LIMM)))
+					state->instr_len += 4;
+				break;
+			}
+			break;
+		}
+		break;
+
+	/* 16 Bit Instructions */
+	case op_LD_ADD: /* LD_S|LDB_S|LDW_S a,[b,c] */
+		state->zz = BITS(state->words[0], 3, 4);
+		state->src1 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src2 = get_reg(FIELD_S_C(state->words[0]), regs, cregs);
+		state->dest = FIELD_S_A(state->words[0]);
+		break;
+
+	case op_ADD_MOV_CMP:
+		/* check for limm, ignore mov_s h,b (== mov_s 0,b) */
+		if ((BITS(state->words[0], 3, 4) < 3) &&
+		    (FIELD_S_H(state->words[0]) == REG_LIMM))
+			state->instr_len += 4;
+		break;
+
+	case op_S:
+		subopcode = BITS(state->words[0], 5, 7);
+		switch (subopcode) {
+		case 0:	/* j_s */
+		case 1:	/* j_s.d */
+		case 2:	/* jl_s */
+		case 3:	/* jl_s.d */
+			state->target = get_reg(FIELD_S_B(state->words[0]),
+						regs, cregs);
+			state->delay_slot = subopcode & 1;
+			state->flow = (subopcode >= 2) ?
+				direct_call : indirect_jump;
+			break;
+		case 7:
+			switch (BITS(state->words[0], 8, 10)) {
+			case 4:	/* jeq_s [blink] */
+			case 5:	/* jne_s [blink] */
+			case 6:	/* j_s [blink] */
+			case 7:	/* j_s.d [blink] */
+				state->delay_slot = (subopcode == 7);
+				state->flow = indirect_jump;
+				state->target = get_reg(31, regs, cregs);
+			default:
+				break;
+			}
+		default:
+			break;
+		}
+		break;
+
+	case op_LD_S:	/* LD_S c, [b, u7] */
+		state->src1 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src2 = FIELD_S_u7(state->words[0]);
+		state->dest = FIELD_S_C(state->words[0]);
+		break;
+
+	case op_LDB_S:
+	case op_STB_S:
+		/* no further handling required as byte accesses should not
+		 * cause an unaligned access exception */
+		state->zz = 1;
+		break;
+
+	case op_LDWX_S:	/* LDWX_S c, [b, u6] */
+		state->x = 1;
+		fallthrough;
+
+	case op_LDW_S:	/* LDW_S c, [b, u6] */
+		state->zz = 2;
+		state->src1 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src2 = FIELD_S_u6(state->words[0]);
+		state->dest = FIELD_S_C(state->words[0]);
+		break;
+
+	case op_ST_S:	/* ST_S c, [b, u7] */
+		state->write = 1;
+		state->src1 = get_reg(FIELD_S_C(state->words[0]), regs, cregs);
+		state->src2 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src3 = FIELD_S_u7(state->words[0]);
+		break;
+
+	case op_STW_S:	/* STW_S c,[b,u6] */
+		state->write = 1;
+		state->zz = 2;
+		state->src1 = get_reg(FIELD_S_C(state->words[0]), regs, cregs);
+		state->src2 = get_reg(FIELD_S_B(state->words[0]), regs, cregs);
+		state->src3 = FIELD_S_u6(state->words[0]);
+		break;
+
+	case op_SP:	/* LD_S|LDB_S b,[sp,u7], ST_S|STB_S b,[sp,u7] */
+		/* note: we are ignoring possibility of:
+		 * ADD_S, SUB_S, PUSH_S, POP_S as these should not
+		 * cause unaliged exception anyway */
+		state->write = BITS(state->words[0], 6, 6);
+		state->zz = BITS(state->words[0], 5, 5);
+		if (state->zz)
+			break;	/* byte accesses should not come here */
+		if (!state->write) {
+			state->src1 = get_reg(28, regs, cregs);
+			state->src2 = FIELD_S_u7(state->words[0]);
+			state->dest = FIELD_S_B(state->words[0]);
+		} else {
+			state->src1 = get_reg(FIELD_S_B(state->words[0]), regs,
+					cregs);
+			state->src2 = get_reg(28, regs, cregs);
+			state->src3 = FIELD_S_u7(state->words[0]);
+		}
+		break;
+
+	case op_GP:	/* LD_S|LDB_S|LDW_S r0,[gp,s11/s9/s10] */
+		/* note: ADD_S r0, gp, s11 is ignored */
+		state->zz = BITS(state->words[0], 9, 10);
+		state->src1 = get_reg(26, regs, cregs);
+		state->src2 = state->zz ? FIELD_S_s10(state->words[0]) :
+			FIELD_S_s11(state->words[0]);
+		state->dest = 0;
+		break;
+
+	case op_Pcl:	/* LD_S b,[pcl,u10] */
+		state->src1 = regs->ret & ~3;
+		state->src2 = FIELD_S_u10(state->words[0]);
+		state->dest = FIELD_S_B(state->words[0]);
+		break;
+
+	case op_BR_S:
+		state->target = FIELD_S_s8(state->words[0]) + (addr & ~0x03);
+		state->flow = direct_jump;
+		state->is_branch = 1;
+		break;
+
+	case op_B_S:
+		fieldA = (BITS(state->words[0], 9, 10) == 3) ?
+			FIELD_S_s7(state->words[0]) :
+			FIELD_S_s10(state->words[0]);
+		state->target = fieldA + (addr & ~0x03);
+		state->flow = direct_jump;
+		state->is_branch = 1;
+		break;
+
+	case op_BL_S:
+		state->target = FIELD_S_s13(state->words[0]) + (addr & ~0x03);
+		state->flow = direct_call;
+		state->is_branch = 1;
+		break;
+
+	default:
+		break;
+	}
+
+	if (bytes_not_copied <= (8 - state->instr_len))
+		return;
+
+fault:	state->fault = 1;
+}
+
+long __kprobes get_reg(int reg, struct pt_regs *regs,
+		       struct callee_regs *cregs)
+{
+	long *p;
+
+	if (reg <= 12) {
+		p = &regs->r0;
+		return p[-reg];
+	}
+
+	if (cregs && (reg <= 25)) {
+		p = &cregs->r13;
+		return p[13-reg];
+	}
+
+	if (reg == 26)
+		return regs->r26;
+	if (reg == 27)
+		return regs->fp;
+	if (reg == 28)
+		return regs->sp;
+	if (reg == 31)
+		return regs->blink;
+
+	return 0;
+}
+
+void __kprobes set_reg(int reg, long val, struct pt_regs *regs,
+		struct callee_regs *cregs)
+{
+	long *p;
+
+	switch (reg) {
+	case 0 ... 12:
+		p = &regs->r0;
+		p[-reg] = val;
+		break;
+	case 13 ... 25:
+		if (cregs) {
+			p = &cregs->r13;
+			p[13-reg] = val;
+		}
+		break;
+	case 26:
+		regs->r26 = val;
+		break;
+	case 27:
+		regs->fp = val;
+		break;
+	case 28:
+		regs->sp = val;
+		break;
+	case 31:
+		regs->blink = val;
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Disassembles the insn at @pc and sets @next_pc to next PC (which could be
+ * @pc +2/4/6 (ARCompact ISA allows free intermixing of 16/32 bit insns).
+ *
+ * If @pc is a branch
+ *	-@tgt_if_br is set to branch target.
+ *	-If branch has delay slot, @next_pc updated with actual next PC.
+ */
+int __kprobes disasm_next_pc(unsigned long pc, struct pt_regs *regs,
+			     struct callee_regs *cregs,
+			     unsigned long *next_pc, unsigned long *tgt_if_br)
+{
+	struct disasm_state instr;
+
+	memset(&instr, 0, sizeof(struct disasm_state));
+	disasm_instr(pc, &instr, 0, regs, cregs);
+
+	*next_pc = pc + instr.instr_len;
+
+	/* Instruction with possible two targets branch, jump and loop */
+	if (instr.is_branch)
+		*tgt_if_br = instr.target;
+
+	/* For the instructions with delay slots, the fall through is the
+	 * instruction following the instruction in delay slot.
+	 */
+	 if (instr.delay_slot) {
+		struct disasm_state instr_d;
+
+		disasm_instr(*next_pc, &instr_d, 0, regs, cregs);
+
+		*next_pc += instr_d.instr_len;
+	 }
+
+	 /* Zero Overhead Loop - end of the loop */
+	if (!(regs->status32 & STATUS32_L) && (*next_pc == regs->lp_end)
+		&& (regs->lp_count > 1)) {
+		*next_pc = regs->lp_start;
+	}
+
+	return instr.is_branch;
+}
+
+#endif /* CONFIG_KGDB || CONFIG_ARC_EMUL_UNALIGNED || CONFIG_KPROBES */
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
new file mode 100644
index 0000000000..a7e6a21741
--- /dev/null
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/linkage.h>   /* ARC_{EXTRY,EXIT} */
+#include <asm/entry.h>       /* SAVE_ALL_{INT1,INT2,TRAP...} */
+#include <asm/errno.h>
+#include <asm/arcregs.h>
+#include <asm/irqflags.h>
+#include <asm/mmu.h>
+
+; A maximum number of supported interrupts in the core interrupt controller.
+; This number is not equal to the maximum interrupt number (256) because
+; first 16 lines are reserved for exceptions and are not configurable.
+#define NR_CPU_IRQS	240
+
+	.cpu HS
+
+#define VECTOR	.word
+
+;############################ Vector Table #################################
+
+	.section .vector,"a",@progbits
+	.align 4
+
+# Initial 16 slots are Exception Vectors
+VECTOR	res_service		; Reset Vector
+VECTOR	mem_service		; Mem exception
+VECTOR	instr_service		; Instrn Error
+VECTOR	EV_MachineCheck		; Fatal Machine check
+VECTOR	EV_TLBMissI		; Intruction TLB miss
+VECTOR	EV_TLBMissD		; Data TLB miss
+VECTOR	EV_TLBProtV		; Protection Violation
+VECTOR	EV_PrivilegeV		; Privilege Violation
+VECTOR	EV_SWI			; Software Breakpoint
+VECTOR	EV_Trap			; Trap exception
+VECTOR	EV_Extension		; Extn Instruction Exception
+VECTOR	EV_DivZero		; Divide by Zero
+VECTOR	EV_DCError		; Data Cache Error
+VECTOR	EV_Misaligned		; Misaligned Data Access
+VECTOR	reserved		; Reserved slots
+VECTOR	reserved		; Reserved slots
+
+# Begin Interrupt Vectors
+VECTOR	handle_interrupt	; (16) Timer0
+VECTOR	handle_interrupt	; unused (Timer1)
+VECTOR	handle_interrupt	; unused (WDT)
+VECTOR	handle_interrupt	; (19) Inter core Interrupt (IPI)
+VECTOR	handle_interrupt	; (20) perf Interrupt
+VECTOR	handle_interrupt	; (21) Software Triggered Intr (Self IPI)
+VECTOR	handle_interrupt	; unused
+VECTOR	handle_interrupt	; (23) unused
+# End of fixed IRQs
+
+.rept NR_CPU_IRQS - 8
+	VECTOR	handle_interrupt
+.endr
+
+	.section .text, "ax",@progbits
+
+reserved:
+	flag 1		; Unexpected event, halt
+
+;##################### Interrupt Handling ##############################
+
+ENTRY(handle_interrupt)
+
+	INTERRUPT_PROLOGUE
+
+	# irq control APIs local_irq_save/restore/disable/enable fiddle with
+	# global interrupt enable bits in STATUS32 (.IE for 1 prio, .E[] for 2 prio)
+	# However a taken interrupt doesn't clear these bits. Thus irqs_disabled()
+	# query in hard ISR path would return false (since .IE is set) which would
+	# trips genirq interrupt handling asserts.
+	#
+	# So do a "soft" disable of interrutps here.
+	#
+	# Note this disable is only for consistent book-keeping as further interrupts
+	# will be disabled anyways even w/o this. Hardware tracks active interrupts
+	# seperately in AUX_IRQ_ACT.active and will not take new interrupts
+	# unless this one returns (or higher prio becomes pending in 2-prio scheme)
+
+	IRQ_DISABLE
+
+	; icause is banked: one per priority level
+	; so a higher prio interrupt taken here won't clobber prev prio icause
+	lr  r0, [ICAUSE]
+	mov   blink, ret_from_exception
+
+	b.d  arch_do_IRQ
+	mov r1, sp
+
+END(handle_interrupt)
+
+;################### Non TLB Exception Handling #############################
+
+ENTRY(EV_SWI)
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
+END(EV_SWI)
+
+ENTRY(EV_DivZero)
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
+END(EV_DivZero)
+
+ENTRY(EV_DCError)
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
+END(EV_DCError)
+
+; ---------------------------------------------
+; Memory Error Exception Handler
+;   - Unlike ARCompact, handles Bus errors for both User/Kernel mode,
+;     Instruction fetch or Data access, under a single Exception Vector
+; ---------------------------------------------
+
+ENTRY(mem_service)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	bl  do_memory_error
+	b   ret_from_exception
+END(mem_service)
+
+ENTRY(EV_Misaligned)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]	; Faulting Data address
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	SAVE_CALLEE_SAVED_USER
+	mov r2, sp              ; callee_regs
+
+	bl  do_misaligned_access
+
+	; TBD: optimize - do this only if a callee reg was involved
+	; either a dst of emulated LD/ST or src with address-writeback
+	RESTORE_CALLEE_SAVED_USER
+
+	b   ret_from_exception
+END(EV_Misaligned)
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]	; Faulting Data address
+	mov r1, sp	; pt_regs
+
+	FAKE_RET_FROM_EXCPN
+
+	mov blink, ret_from_exception
+	b   do_page_fault
+
+END(EV_TLBProtV)
+
+; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they
+; need to call do_page_fault().
+; ECR in pt_regs provides whether access was R/W/X
+
+.global        call_do_page_fault
+.set call_do_page_fault, EV_TLBProtV
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+restore_regs:
+
+	# Interrpts are actually disabled from this point on, but will get
+	# reenabled after we return from interrupt/exception.
+	# But irq tracer needs to be told now...
+	TRACE_ASM_IRQ_ENABLE
+
+	ld	r0, [sp, PT_status32]	; U/K mode at time of entry
+	lr	r10, [AUX_IRQ_ACT]
+
+	bmsk	r11, r10, 15		; extract AUX_IRQ_ACT.active
+	breq	r11, 0, .Lexcept_ret	; No intr active, ret from Exception
+
+;####### Return from Intr #######
+
+.Lisr_ret:
+
+debug_marker_l1:
+	; bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot
+	btst	r0, STATUS_DE_BIT		; Z flag set if bit clear
+	bnz	.Lintr_ret_to_delay_slot	; branch if STATUS_DE_BIT set
+
+	; Handle special case #1: (Entry via Exception, Return via IRQ)
+	;
+	; Exception in U mode, preempted in kernel, Intr taken (K mode), orig
+	; task now returning to U mode (riding the Intr)
+	; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP
+	; won't be switched to correct U mode value (from AUX_SP)
+	; So force AUX_IRQ_ACT.U for such a case
+
+	btst	r0, STATUS_U_BIT		; Z flag set if K (Z clear for U)
+	bset.nz	r11, r11, AUX_IRQ_ACT_BIT_U	; NZ means U
+	sr	r11, [AUX_IRQ_ACT]
+
+	INTERRUPT_EPILOGUE
+	rtie
+
+;####### Return from Exception / pure kernel mode #######
+
+.Lexcept_ret:	; Expects r0 has PT_status32
+
+debug_marker_syscall:
+	EXCEPTION_EPILOGUE
+	rtie
+
+;####### Return from Intr to insn in delay slot #######
+
+; Handle special case #2: (Entry via Exception in Delay Slot, Return via IRQ)
+;
+; Intr returning to a Delay Slot (DS) insn
+; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig
+; entry was via Exception in DS which got preempted in kernel).
+;
+; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround
+;
+; Solution is to drop out of interrupt context into pure kernel mode
+; and return from pure kernel mode which does right things for delay slot
+
+.Lintr_ret_to_delay_slot:
+debug_marker_ds:
+
+	ld	r2, [@intr_to_DE_cnt]
+	add	r2, r2, 1
+	st	r2, [@intr_to_DE_cnt]
+
+	; drop out of interrupt context (clear AUX_IRQ_ACT.active)
+	bmskn	r11, r10, 15
+	sr	r11, [AUX_IRQ_ACT]
+	b	.Lexcept_ret
+
+END(ret_from_exception)
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
new file mode 100644
index 0000000000..5cb0cd7e4e
--- /dev/null
+++ b/arch/arc/kernel/entry-compact.S
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * vineetg: May 2011
+ *  -Userspace unaligned access emulation
+ *
+ * vineetg: Feb 2011 (ptrace low level code fixes)
+ *  -traced syscall return code (r0) was not saved into pt_regs for restoring
+ *   into user reg-file when traded task rets to user space.
+ *  -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
+ *   were not invoking post-syscall trace hook (jumping directly into
+ *   ret_from_system_call)
+ *
+ * vineetg: Nov 2010:
+ *  -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
+ *  -To maintain the slot size of 8 bytes/vector, added nop, which is
+ *   not executed at runtime.
+ *
+ * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
+ *  -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
+ *  -Wrappers for sys_{,rt_}sigsuspend() no longer needed as they don't
+ *   need ptregs anymore
+ *
+ * Vineetg: Oct 2009
+ *  -In a rare scenario, Process gets a Priv-V exception and gets scheduled
+ *   out. Since we don't do FAKE RTIE for Priv-V, CPU exception state remains
+ *   active (AE bit enabled).  This causes a double fault for a subseq valid
+ *   exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
+ *   Instr Error could also cause similar scenario, so same there as well.
+ *
+ * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
+ *
+ * Vineetg: Aug 28th 2008: Bug #94984
+ *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
+ *   Normally CPU does this automatically, however when doing FAKE rtie,
+ *   we need to explicitly do this. The problem in macros
+ *   FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
+ *   was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
+ *   setting it and not clearing it clears ZOL context
+ *
+ * Vineetg: May 16th, 2008
+ *  - r25 now contains the Current Task when in kernel
+ *
+ * Vineetg: Dec 22, 2007
+ *    Minor Surgery of Low Level ISR to make it SMP safe
+ *    - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
+ *    - _current_task is made an array of NR_CPUS
+ *    - Access of _current_task wrapped inside a macro so that if hardware
+ *       team agrees for a dedicated reg, no other code is touched
+ *
+ * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
+ */
+
+#include <linux/errno.h>
+#include <linux/linkage.h>	/* {ENTRY,EXIT} */
+#include <asm/entry.h>
+#include <asm/irqflags.h>
+
+	.cpu A7
+
+;############################ Vector Table #################################
+
+.macro VECTOR  lbl
+#if 1   /* Just in case, build breaks */
+	j   \lbl
+#else
+	b   \lbl
+	nop
+#endif
+.endm
+
+	.section .vector, "ax",@progbits
+	.align 4
+
+/* Each entry in the vector table must occupy 2 words. Since it is a jump
+ * across sections (.vector to .text) we are guaranteed that 'j somewhere'
+ * will use the 'j limm' form of the instruction as long as somewhere is in
+ * a section other than .vector.
+ */
+
+; ********* Critical System Events **********************
+VECTOR   res_service             ; 0x0, Reset Vector	(0x0)
+VECTOR   mem_service             ; 0x8, Mem exception   (0x1)
+VECTOR   instr_service           ; 0x10, Instrn Error   (0x2)
+
+; ******************** Device ISRs **********************
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+VECTOR   handle_interrupt_level2
+#else
+VECTOR   handle_interrupt_level1
+#endif
+
+.rept   28
+VECTOR   handle_interrupt_level1 ; Other devices
+.endr
+
+/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
+
+; ******************** Exceptions **********************
+VECTOR   EV_MachineCheck         ; 0x100, Fatal Machine check   (0x20)
+VECTOR   EV_TLBMissI             ; 0x108, Instruction TLB miss  (0x21)
+VECTOR   EV_TLBMissD             ; 0x110, Data TLB miss         (0x22)
+VECTOR   EV_TLBProtV             ; 0x118, Protection Violation  (0x23)
+				 ;         or Misaligned Access
+VECTOR   EV_PrivilegeV           ; 0x120, Privilege Violation   (0x24)
+VECTOR   EV_Trap                 ; 0x128, Trap exception        (0x25)
+VECTOR   EV_Extension            ; 0x130, Extn Instruction Excp (0x26)
+
+.rept   24
+VECTOR   reserved                ; Reserved Exceptions
+.endr
+
+
+;##################### Scratch Mem for IRQ stack switching #############
+
+ARCFP_DATA int1_saved_reg
+	.align 32
+	.type   int1_saved_reg, @object
+	.size   int1_saved_reg, 4
+int1_saved_reg:
+	.zero 4
+
+/* Each Interrupt level needs its own scratch */
+ARCFP_DATA int2_saved_reg
+	.type   int2_saved_reg, @object
+	.size   int2_saved_reg, 4
+int2_saved_reg:
+	.zero 4
+
+; ---------------------------------------------
+	.section .text, "ax",@progbits
+
+
+reserved:
+	flag 1		; Unexpected event, halt
+
+;##################### Interrupt Handling ##############################
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+; ---------------------------------------------
+;  Level 2 ISR: Can interrupt a Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level2)
+
+	INTERRUPT_PROLOGUE 2
+
+	;------------------------------------------------------
+	; if L2 IRQ interrupted a L1 ISR, disable preemption
+	;
+	; This is to avoid a potential L1-L2-L1 scenario
+	;  -L1 IRQ taken
+	;  -L2 interrupts L1 (before L1 ISR could run)
+	;  -preemption off IRQ, user task in syscall picked to run
+	;  -RTIE to userspace
+	;	Returns from L2 context fine
+	;	But both L1 and L2 re-enabled, so another L1 can be taken
+	;	while prev L1 is still unserviced
+	;
+	;------------------------------------------------------
+
+	; L2 interrupting L1 implies both L2 and L1 active
+	; However both A2 and A1 are NOT set in STATUS32, thus
+	; need to check STATUS32_L2 to determine if L1 was active
+
+	ld r9, [sp, PT_status32]        ; get statu32_l2 (saved in pt_regs)
+	bbit0 r9, STATUS_A1_BIT, 1f     ; L1 not active when L2 IRQ, so normal
+
+	; bump thread_info->preempt_count (Disable preemption)
+	GET_CURR_THR_INFO_FROM_SP   r10
+	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+	add     r9, r9, 1
+	st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+1:
+	;------------------------------------------------------
+	; setup params for Linux common ISR and invoke it
+	;------------------------------------------------------
+	lr  r0, [icause2]
+	and r0, r0, 0x1f
+
+	bl.d  @arch_do_IRQ
+	mov r1, sp
+
+	mov r8,0x2
+	sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
+
+	b   ret_from_exception
+
+END(handle_interrupt_level2)
+
+#endif
+
+; ---------------------------------------------
+; User Mode Memory Bus Error Interrupt Handler
+; (Kernel mode memory errors handled via separate exception vectors)
+; ---------------------------------------------
+ENTRY(mem_service)
+
+	INTERRUPT_PROLOGUE 2
+
+	mov r0, ilink2
+	mov r1, sp
+
+	; User process needs to be killed with SIGBUS, but first need to get
+	; out of the L2 interrupt context (drop to pure kernel mode) and jump
+	; off to "C" code where SIGBUS in enqueued
+	lr  r3, [status32]
+	bclr r3, r3, STATUS_A2_BIT
+	or  r3, r3, (STATUS_E1_MASK|STATUS_E2_MASK)
+	sr  r3, [status32_l2]
+	mov ilink2, 1f
+	rtie
+1:
+	bl  do_memory_error
+	b   ret_from_exception
+END(mem_service)
+
+; ---------------------------------------------
+;  Level 1 ISR
+; ---------------------------------------------
+ENTRY(handle_interrupt_level1)
+
+	INTERRUPT_PROLOGUE 1
+
+	lr  r0, [icause1]
+	and r0, r0, 0x1f
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	; icause1 needs to be read early, before calling tracing, which
+	; can clobber scratch regs, hence use of stack to stash it
+	push r0
+	TRACE_ASM_IRQ_DISABLE
+	pop  r0
+#endif
+
+	bl.d  @arch_do_IRQ
+	mov r1, sp
+
+	mov r8,0x1
+	sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
+
+	b   ret_from_exception
+END(handle_interrupt_level1)
+
+;################### Non TLB Exception Handling #############################
+
+; ---------------------------------------------
+; Protection Violation Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_TLBProtV)
+
+	EXCEPTION_PROLOGUE
+
+	mov r2, r10	; ECR set into r10 already
+	lr  r0, [efa]	; Faulting Data address (not part of pt_regs saved above)
+
+	; Exception auto-disables further Intr/exceptions.
+	; Re-enable them by pretending to return from exception
+	; (so rest of handler executes in pure K mode)
+
+	FAKE_RET_FROM_EXCPN
+
+	mov   r1, sp	; Handle to pt_regs
+
+	;------ (5) Type of Protection Violation? ----------
+	;
+	; ProtV Hardware Exception is triggered for Access Faults of 2 types
+	;   -Access Violation	: 00_23_(00|01|02|03)_00
+	;			         x  r  w  r+w
+	;   -Unaligned Access	: 00_23_04_00
+	;
+	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+
+	;========= (6a) Access Violation Processing ========
+	bl  do_page_fault
+	b   ret_from_exception
+
+	;========== (6b) Non aligned access ============
+4:
+
+	SAVE_CALLEE_SAVED_USER
+	mov r2, sp              ; callee_regs
+
+	bl  do_misaligned_access
+
+	; TBD: optimize - do this only if a callee reg was involved
+	; either a dst of emulated LD/ST or src with address-writeback
+	RESTORE_CALLEE_SAVED_USER
+
+	b   ret_from_exception
+
+END(EV_TLBProtV)
+
+; Wrapper for Linux page fault handler called from EV_TLBMiss*
+; Very similar to ProtV handler case (6a) above, but avoids the extra checks
+; for Misaligned access
+;
+ENTRY(call_do_page_fault)
+
+	EXCEPTION_PROLOGUE
+	lr  r0, [efa]	; Faulting Data address
+	mov   r1, sp
+	FAKE_RET_FROM_EXCPN
+
+	mov blink, ret_from_exception
+	b  do_page_fault
+
+END(call_do_page_fault)
+
+;############# Common Handlers for ARCompact and ARCv2 ##############
+
+#include "entry.S"
+
+;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
+;
+; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
+; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
+
+.Lrestore_regs:
+
+	# Interrupts are actually disabled from this point on, but will get
+	# reenabled after we return from interrupt/exception.
+	# But irq tracer needs to be told now...
+	TRACE_ASM_IRQ_ENABLE
+
+	lr	r10, [status32]
+
+	; Restore REG File. In case multiple Events outstanding,
+	; use the same priority as rtie: EXCPN, L2 IRQ, L1 IRQ, None
+	; Note that we use realtime STATUS32 (not pt_regs->status32) to
+	; decide that.
+
+	and.f	0, r10, (STATUS_A1_MASK|STATUS_A2_MASK)
+	bz	.Lexcep_or_pure_K_ret
+
+	; Returning from Interrupts (Level 1 or 2)
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
+
+	; Level 2 interrupt return Path - from hardware standpoint
+	bbit0  r10, STATUS_A2_BIT, not_level2_interrupt
+
+	;------------------------------------------------------------------
+	; However the context returning might not have taken L2 intr itself
+	; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
+	; Special considerations needed for the context which took L2 intr
+
+	ld   r9, [sp, PT_event]        ; Ensure this is L2 intr context
+	brne r9, event_IRQ2, 149f
+
+	;------------------------------------------------------------------
+	; if L2 IRQ interrupted an L1 ISR,  we'd disabled preemption earlier
+	; so that sched doesn't move to new task, causing L1 to be delayed
+	; undeterministically. Now that we've achieved that, let's reset
+	; things to what they were, before returning from L2 context
+	;----------------------------------------------------------------
+
+	ld r9, [sp, PT_status32]       ; get statu32_l2 (saved in pt_regs)
+	bbit0 r9, STATUS_A1_BIT, 149f  ; L1 not active when L2 IRQ, so normal
+
+	; decrement thread_info->preempt_count (re-enable preemption)
+	GET_CURR_THR_INFO_FROM_SP   r10
+	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+	; paranoid check, given A1 was active when A2 happened, preempt count
+	; must not be 0 because we would have incremented it.
+	; If this does happen we simply HALT as it means a BUG !!!
+	cmp     r9, 0
+	bnz     2f
+	flag 1
+
+2:
+	sub     r9, r9, 1
+	st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
+
+149:
+	INTERRUPT_EPILOGUE 2	; return from level 2 interrupt
+debug_marker_l2:
+	rtie
+
+not_level2_interrupt:
+
+#endif
+
+	INTERRUPT_EPILOGUE 1	; return from level 1 interrupt
+debug_marker_l1:
+	rtie
+
+.Lexcep_or_pure_K_ret:
+
+	;this case is for syscalls or Exceptions or pure kernel mode
+
+	EXCEPTION_EPILOGUE
+debug_marker_syscall:
+	rtie
+
+END(ret_from_exception)
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
new file mode 100644
index 0000000000..dd77a0c8f7
--- /dev/null
+++ b/arch/arc/kernel/entry.S
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
+ * (included from entry-<isa>.S
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*------------------------------------------------------------------
+ *    Function                            ABI
+ *------------------------------------------------------------------
+ *
+ *  Arguments                           r0 - r7
+ *  Caller Saved Registers              r0 - r12
+ *  Callee Saved Registers              r13- r25
+ *  Global Pointer (gp)                 r26
+ *  Frame Pointer (fp)                  r27
+ *  Stack Pointer (sp)                  r28
+ *  Branch link register (blink)        r31
+ *------------------------------------------------------------------
+ */
+
+;################### Special Sys Call Wrappers ##########################
+
+ENTRY(sys_clone_wrapper)
+	SAVE_CALLEE_SAVED_USER
+	bl  @sys_clone
+	DISCARD_CALLEE_SAVED_USER
+
+	GET_CURR_THR_INFO_FLAGS   r10
+	btst r10, TIF_SYSCALL_TRACE
+	bnz  tracesys_exit
+
+	b .Lret_from_system_call
+END(sys_clone_wrapper)
+
+ENTRY(sys_clone3_wrapper)
+	SAVE_CALLEE_SAVED_USER
+	bl  @sys_clone3
+	DISCARD_CALLEE_SAVED_USER
+
+	GET_CURR_THR_INFO_FLAGS   r10
+	btst r10, TIF_SYSCALL_TRACE
+	bnz  tracesys_exit
+
+	b .Lret_from_system_call
+END(sys_clone3_wrapper)
+
+ENTRY(ret_from_fork)
+	; when the forked child comes here from the __switch_to function
+	; r0 has the last task pointer.
+	; put last task in scheduler queue
+	jl   @schedule_tail
+
+	ld   r9, [sp, PT_status32]
+	brne r9, 0, 1f
+
+	jl.d [r14]		; kernel thread entry point
+	mov  r0, r13		; (see PF_KTHREAD block in copy_thread)
+
+1:
+	; Return to user space
+	; 1. Any forked task (Reach here via BRne above)
+	; 2. First ever init task (Reach here via return from JL above)
+	;    This is the historic "kernel_execve" use-case, to return to init
+	;    user mode, in a round about way since that is always done from
+	;    a kernel thread which is executed via JL above but always returns
+	;    out whenever kernel_execve (now inline do_fork()) is involved
+	b    ret_from_exception
+END(ret_from_fork)
+
+;################### Non TLB Exception Handling #############################
+
+; ---------------------------------------------
+; Instruction Error Exception Handler
+; ---------------------------------------------
+
+ENTRY(instr_service)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	bl  do_insterror_or_kprobe
+	b   ret_from_exception
+END(instr_service)
+
+; ---------------------------------------------
+; Machine Check Exception Handler
+; ---------------------------------------------
+
+ENTRY(EV_MachineCheck)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r2, [ecr]
+	lr  r0, [efa]
+	mov r1, sp
+
+	; MC excpetions disable MMU
+	ARC_MMU_REENABLE r3
+
+	lsr  	r3, r2, 8
+	bmsk 	r3, r3, 7
+	brne    r3, ECR_C_MCHK_DUP_TLB, 1f
+
+	bl      do_tlb_overlap_fault
+	b       ret_from_exception
+
+1:
+	; DEAD END: can't do much, display Regs and HALT
+	SAVE_CALLEE_SAVED_USER
+
+	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
+	st  sp, [r10, THREAD_CALLEE_REG]
+
+	j  do_machine_check_fault
+
+END(EV_MachineCheck)
+
+; ---------------------------------------------
+; Privilege Violation Exception Handler
+; ---------------------------------------------
+ENTRY(EV_PrivilegeV)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	bl  do_privilege_fault
+	b   ret_from_exception
+END(EV_PrivilegeV)
+
+; ---------------------------------------------
+; Extension Instruction Exception Handler
+; ---------------------------------------------
+ENTRY(EV_Extension)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r0, [efa]
+	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN
+
+	bl  do_extension_fault
+	b   ret_from_exception
+END(EV_Extension)
+
+;################ Trap Handling (Syscall, Breakpoint) ##################
+
+; ---------------------------------------------
+; syscall Tracing
+; ---------------------------------------------
+tracesys:
+	; save EFA in case tracer wants the PC of traced task
+	; using ERET won't work since next-PC has already committed
+	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r11
+	st  r12, [r11, THREAD_FAULT_ADDR]	; thread.fault_address
+
+	; PRE Sys Call Ptrace hook
+	mov r0, sp			; pt_regs needed
+	bl  @syscall_trace_entry
+
+	; Tracing code now returns the syscall num (orig or modif)
+	mov r8, r0
+
+	; Do the Sys Call as we normally would.
+	; Validate the Sys Call number
+	cmp     r8,  NR_syscalls - 1
+	mov.hi  r0, -ENOSYS
+	bhi     tracesys_exit
+
+	; Restore the sys-call args. Mere invocation of the hook abv could have
+	; clobbered them (since they are in scratch regs). The tracer could also
+	; have deliberately changed the syscall args: r0-r7
+	ld  r0, [sp, PT_r0]
+	ld  r1, [sp, PT_r1]
+	ld  r2, [sp, PT_r2]
+	ld  r3, [sp, PT_r3]
+	ld  r4, [sp, PT_r4]
+	ld  r5, [sp, PT_r5]
+	ld  r6, [sp, PT_r6]
+	ld  r7, [sp, PT_r7]
+	ld.as   r9, [sys_call_table, r8]
+	jl      [r9]        ; Entry into Sys Call Handler
+
+tracesys_exit:
+	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
+
+	;POST Sys Call Ptrace Hook
+	bl  @syscall_trace_exit
+	b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
+	; we'd done before calling post hook above
+
+; ---------------------------------------------
+; Breakpoint TRAP
+; ---------------------------------------------
+trap_with_param:
+	mov r0, r12	; EFA in case ptracer/gdb wants stop_pc
+	mov r1, sp
+
+	; Save callee regs in case gdb wants to have a look
+	; SP will grow up by size of CALLEE Reg-File
+	; NOTE: clobbers r12
+	SAVE_CALLEE_SAVED_USER
+
+	; save location of saved Callee Regs @ thread_struct->pc
+	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
+	st  sp, [r10, THREAD_CALLEE_REG]
+
+	; Call the trap handler
+	bl  do_non_swi_trap
+
+	; unwind stack to discard Callee saved Regs
+	DISCARD_CALLEE_SAVED_USER
+
+	b   ret_from_exception
+
+; ---------------------------------------------
+; syscall TRAP
+; ABI: (r0-r7) upto 8 args, (r8) syscall number
+; ---------------------------------------------
+
+ENTRY(EV_Trap)
+
+	EXCEPTION_PROLOGUE
+
+	lr  r12, [efa]
+
+	FAKE_RET_FROM_EXCPN
+
+	;============ TRAP 1   :breakpoints
+	; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
+	bmsk.f 0, r10, 7
+	bnz    trap_with_param
+
+	;============ TRAP  (no param): syscall top level
+
+	; If syscall tracing ongoing, invoke pre-post-hooks
+	GET_CURR_THR_INFO_FLAGS   r10
+	btst r10, TIF_SYSCALL_TRACE
+	bnz tracesys  ; this never comes back
+
+	;============ Normal syscall case
+
+	; syscall num shd not exceed the total system calls avail
+	cmp     r8,  NR_syscalls - 1
+	mov.hi  r0, -ENOSYS
+	bhi     .Lret_from_system_call
+
+	; Offset into the syscall_table and call handler
+	ld.as   r9,[sys_call_table, r8]
+	jl      [r9]        ; Entry into Sys Call Handler
+
+.Lret_from_system_call:
+
+	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
+
+	; fall through to ret_from_exception
+END(EV_Trap)
+
+;############# Return from Intr/Excp/Trap (Linux Specifics) ##############
+;
+; If ret to user mode do we need to handle signals, schedule() et al.
+
+ENTRY(ret_from_exception)
+
+	; Pre-{IRQ,Trap,Exception} K/U mode from pt_regs->status32
+	ld  r8, [sp, PT_status32]   ; returning to User/Kernel Mode
+
+	bbit0  r8, STATUS_U_BIT, resume_kernel_mode
+
+	; Before returning to User mode check-for-and-complete any pending work
+	; such as rescheduling/signal-delivery etc.
+resume_user_mode_begin:
+
+	; Disable IRQs to ensures that chk for pending work itself is atomic
+	; (and we don't end up missing a NEED_RESCHED/SIGPENDING due to an
+	; interim IRQ).
+	IRQ_DISABLE	r10
+
+	; Fast Path return to user mode if no pending work
+	GET_CURR_THR_INFO_FLAGS   r9
+	and.f  0,  r9, _TIF_WORK_MASK
+	bz     .Lrestore_regs
+
+	; --- (Slow Path #1) task preemption ---
+	bbit0  r9, TIF_NEED_RESCHED, .Lchk_pend_signals
+	mov    blink, resume_user_mode_begin  ; tail-call to U mode ret chks
+	j      @schedule 	; BTST+Bnz causes relo error in link
+
+.Lchk_pend_signals:
+	IRQ_ENABLE	r10
+
+	; --- (Slow Path #2) pending signal  ---
+	mov r0, sp	; pt_regs for arg to do_signal()/do_notify_resume()
+
+	GET_CURR_THR_INFO_FLAGS   r9
+	and.f  0,  r9, _TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL
+	bz .Lchk_notify_resume
+
+	; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs
+	; in pt_reg since the "C" ABI (kernel code) will automatically
+	; save/restore callee-saved regs.
+	;
+	; However, here we need to explicitly save callee regs because
+	; (i)  If this signal causes coredump - full regfile needed
+	; (ii) If signal is SIGTRAP/SIGSTOP, task is being traced thus
+	;      tracer might call PEEKUSR(CALLEE reg)
+	;
+	; NOTE: SP will grow up by size of CALLEE Reg-File
+	SAVE_CALLEE_SAVED_USER		; clobbers r12
+
+	; save location of saved Callee Regs @ thread_struct->callee
+	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
+	st  sp, [r10, THREAD_CALLEE_REG]
+
+	bl  @do_signal
+
+	; Ideally we want to discard the Callee reg above, however if this was
+	; a tracing signal, tracer could have done a POKEUSR(CALLEE reg)
+	RESTORE_CALLEE_SAVED_USER
+
+	b      resume_user_mode_begin	; loop back to start of U mode ret
+
+	; --- (Slow Path #3) notify_resume ---
+.Lchk_notify_resume:
+	btst   r9, TIF_NOTIFY_RESUME
+	blnz   @do_notify_resume
+	b      resume_user_mode_begin	; unconditionally back to U mode ret chks
+					; for single exit point from this block
+
+resume_kernel_mode:
+
+	; Disable Interrupts from this point on
+	; CONFIG_PREEMPTION: This is a must for preempt_schedule_irq()
+	; !CONFIG_PREEMPTION: To ensure restore_regs is intr safe
+	IRQ_DISABLE	r9
+
+#ifdef CONFIG_PREEMPTION
+
+	; Can't preempt if preemption disabled
+	GET_CURR_THR_INFO_FROM_SP   r10
+	ld  r8, [r10, THREAD_INFO_PREEMPT_COUNT]
+	brne  r8, 0, .Lrestore_regs
+
+	; check if this task's NEED_RESCHED flag set
+	ld  r9, [r10, THREAD_INFO_FLAGS]
+	bbit0  r9, TIF_NEED_RESCHED, .Lrestore_regs
+
+	; Invoke PREEMPTION
+	jl      preempt_schedule_irq
+
+	; preempt_schedule_irq() always returns with IRQ disabled
+#endif
+
+	b	.Lrestore_regs
+
+##### DONT ADD CODE HERE - .Lrestore_regs actually follows in entry-<isa>.S
+
diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c
new file mode 100644
index 0000000000..ec640219d9
--- /dev/null
+++ b/arch/arc/kernel/fpu.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * fpu.c - save/restore of Floating Point Unit Registers on task switch
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/sched.h>
+#include <asm/fpu.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+/*
+ * To save/restore FPU regs, simplest scheme would use LR/SR insns.
+ * However since SR serializes the pipeline, an alternate "hack" can be used
+ * which uses the FPU Exchange insn (DEXCL) to r/w FPU regs.
+ *
+ * Store to 64bit dpfp1 reg from a pair of core regs:
+ *   dexcl1 0, r1, r0  ; where r1:r0 is the 64 bit val
+ *
+ * Read from dpfp1 into pair of core regs (w/o clobbering dpfp1)
+ *   mov_s    r3, 0
+ *   daddh11  r1, r3, r3   ; get "hi" into r1 (dpfp1 unchanged)
+ *   dexcl1   r0, r1, r3   ; get "low" into r0 (dpfp1 low clobbered)
+ *   dexcl1    0, r1, r0   ; restore dpfp1 to orig value
+ *
+ * However we can tweak the read, so that read-out of outgoing task's FPU regs
+ * and write of incoming task's regs happen in one shot. So all the work is
+ * done before context switch
+ */
+
+void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
+{
+	unsigned int *saveto = &prev->thread.fpu.aux_dpfp[0].l;
+	unsigned int *readfrom = &next->thread.fpu.aux_dpfp[0].l;
+
+	const unsigned int zero = 0;
+
+	__asm__ __volatile__(
+		"daddh11  %0, %2, %2\n"
+		"dexcl1   %1, %3, %4\n"
+		: "=&r" (*(saveto + 1)), /* early clobber must here */
+		  "=&r" (*(saveto))
+		: "r" (zero), "r" (*(readfrom + 1)), "r" (*(readfrom))
+	);
+
+	__asm__ __volatile__(
+		"daddh22  %0, %2, %2\n"
+		"dexcl2   %1, %3, %4\n"
+		: "=&r"(*(saveto + 3)),	/* early clobber must here */
+		  "=&r"(*(saveto + 2))
+		: "r" (zero), "r" (*(readfrom + 3)), "r" (*(readfrom + 2))
+	);
+}
+
+#else
+
+void fpu_init_task(struct pt_regs *regs)
+{
+	const unsigned int fwe = 0x80000000;
+
+	/* default rounding mode */
+	write_aux_reg(ARC_REG_FPU_CTRL, 0x100);
+
+	/* Initialize to zero: setting requires FWE be set */
+	write_aux_reg(ARC_REG_FPU_STATUS, fwe);
+}
+
+void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
+{
+	struct arc_fpu *save = &prev->thread.fpu;
+	struct arc_fpu *restore = &next->thread.fpu;
+	const unsigned int fwe = 0x80000000;
+
+	save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL);
+	save->status = read_aux_reg(ARC_REG_FPU_STATUS);
+
+	write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl);
+	write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status));
+}
+
+#endif
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
new file mode 100644
index 0000000000..9152782444
--- /dev/null
+++ b/arch/arc/kernel/head.S
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * ARC CPU startup Code
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Vineetg: Dec 2007
+ *  -Check if we are running on Simulator or on real hardware
+ *      to skip certain things during boot on simulator
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/entry.h>
+#include <asm/arcregs.h>
+#include <asm/cache.h>
+#include <asm/dsp-impl.h>
+#include <asm/irqflags.h>
+
+.macro CPU_EARLY_SETUP
+
+	; Setting up Vectror Table (in case exception happens in early boot
+	sr	@_int_vec_base_lds, [AUX_INTR_VEC_BASE]
+
+	; Disable I-cache/D-cache if kernel so configured
+	lr	r5, [ARC_REG_IC_BCR]
+	breq    r5, 0, 1f		; I$ doesn't exist
+	lr	r5, [ARC_REG_IC_CTRL]
+#ifdef CONFIG_ARC_HAS_ICACHE
+	bclr	r5, r5, 0		; 0 - Enable, 1 is Disable
+#else
+	bset	r5, r5, 0		; I$ exists, but is not used
+#endif
+	sr	r5, [ARC_REG_IC_CTRL]
+
+1:
+	lr	r5, [ARC_REG_DC_BCR]
+	breq    r5, 0, 1f		; D$ doesn't exist
+	lr	r5, [ARC_REG_DC_CTRL]
+	bclr	r5, r5, 6		; Invalidate (discard w/o wback)
+#ifdef CONFIG_ARC_HAS_DCACHE
+	bclr	r5, r5, 0		; Enable (+Inv)
+#else
+	bset	r5, r5, 0		; Disable (+Inv)
+#endif
+	sr	r5, [ARC_REG_DC_CTRL]
+
+1:
+
+#ifdef CONFIG_ISA_ARCV2
+	; Unaligned access is disabled at reset, so re-enable early as
+	; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access
+	; by default
+	lr	r5, [status32]
+#ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS
+	bset	r5, r5, STATUS_AD_BIT
+#else
+	; Although disabled at reset, bootloader might have enabled it
+	bclr	r5, r5, STATUS_AD_BIT
+#endif
+	kflag	r5
+
+#ifdef CONFIG_ARC_LPB_DISABLE
+	lr	r5, [ARC_REG_LPB_BUILD]
+	breq    r5, 0, 1f		; LPB doesn't exist
+	mov	r5, 1
+	sr	r5, [ARC_REG_LPB_CTRL]
+1:
+#endif /* CONFIG_ARC_LPB_DISABLE */
+
+	/* On HSDK, CCMs need to remapped super early */
+#ifdef CONFIG_ARC_SOC_HSDK
+	mov	r6, 0x60000000
+	lr	r5, [ARC_REG_ICCM_BUILD]
+	breq	r5, 0, 1f
+	sr	r6, [ARC_REG_AUX_ICCM]
+1:
+	lr	r5, [ARC_REG_DCCM_BUILD]
+	breq	r5, 0, 2f
+	sr	r6, [ARC_REG_AUX_DCCM]
+2:
+#endif	/* CONFIG_ARC_SOC_HSDK */
+
+#endif	/* CONFIG_ISA_ARCV2 */
+
+	; Config DSP_CTRL properly, so kernel may use integer multiply,
+	; multiply-accumulate, and divide operations
+	DSP_EARLY_INIT
+.endm
+
+	.section .init.text, "ax",@progbits
+
+;----------------------------------------------------------------
+; Default Reset Handler (jumped into from Reset vector)
+; - Don't clobber r0,r1,r2 as they might have u-boot provided args
+; - Platforms can override this weak version if needed
+;----------------------------------------------------------------
+WEAK(res_service)
+	j	stext
+END(res_service)
+
+;----------------------------------------------------------------
+; Kernel Entry point
+;----------------------------------------------------------------
+ENTRY(stext)
+
+	CPU_EARLY_SETUP
+
+#ifdef CONFIG_SMP
+	GET_CPU_ID  r5
+	cmp	r5, 0
+	mov.nz	r0, r5
+	bz	.Lmaster_proceed
+
+	; Non-Masters wait for Master to boot enough and bring them up
+	; when they resume, tail-call to entry point
+	mov	blink, @first_lines_of_secondary
+	j	arc_platform_smp_wait_to_boot
+
+.Lmaster_proceed:
+#endif
+
+	; Clear BSS before updating any globals
+	; XXX: use ZOL here
+	mov	r5, __bss_start
+	sub	r6, __bss_stop, r5
+	lsr.f	lp_count, r6, 2
+	lpnz	1f
+	st.ab   0, [r5, 4]
+1:
+
+	; Uboot - kernel ABI
+	;    r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
+	;    r1 = magic number (always zero as of now)
+	;    r2 = pointer to uboot provided cmdline or external DTB in mem
+	; These are handled later in handle_uboot_args()
+	st	r0, [@uboot_tag]
+	st      r1, [@uboot_magic]
+	st	r2, [@uboot_arg]
+
+	; setup "current" tsk and optionally cache it in dedicated r25
+	mov	r9, @init_task
+	SET_CURR_TASK_ON_CPU  r9, r0	; r9 = tsk, r0 = scratch
+
+	; setup stack (fp, sp)
+	mov	fp, 0
+
+	; tsk->thread_info is really a PAGE, whose bottom hoists stack
+	GET_TSK_STACK_BASE r9, sp	; r9 = tsk, sp = stack base(output)
+
+	j	start_kernel	; "C" entry point
+END(stext)
+
+#ifdef CONFIG_SMP
+;----------------------------------------------------------------
+;     First lines of code run by secondary before jumping to 'C'
+;----------------------------------------------------------------
+	.section .text, "ax",@progbits
+ENTRY(first_lines_of_secondary)
+
+	; setup per-cpu idle task as "current" on this CPU
+	ld	r0, [@secondary_idle_tsk]
+	SET_CURR_TASK_ON_CPU  r0, r1
+
+	; setup stack (fp, sp)
+	mov	fp, 0
+
+	; set it's stack base to tsk->thread_info bottom
+	GET_TSK_STACK_BASE r0, sp
+
+	j	start_kernel_secondary
+END(first_lines_of_secondary)
+#endif
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
new file mode 100644
index 0000000000..5cda19d0aa
--- /dev/null
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2014 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+
+#define NR_EXCEPTIONS	16
+
+struct bcr_irq_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:3, firq:1, prio:4, exts:8, irqs:8, ver:8;
+#else
+	unsigned int ver:8, irqs:8, exts:8, prio:4, firq:1, pad:3;
+#endif
+};
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ * -Platform Independent (must for any ARC Core)
+ * -Needed for each CPU (hence not foldable into init_IRQ)
+ */
+void arc_init_IRQ(void)
+{
+	unsigned int tmp, irq_prio, i;
+	struct bcr_irq_arcv2 irq_bcr;
+
+	struct aux_irq_ctrl {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int res3:18, save_idx_regs:1, res2:1,
+			     save_u_to_u:1, save_lp_regs:1, save_blink:1,
+			     res:4, save_nr_gpr_pairs:5;
+#else
+		unsigned int save_nr_gpr_pairs:5, res:4,
+			     save_blink:1, save_lp_regs:1, save_u_to_u:1,
+			     res2:1, save_idx_regs:1, res3:18;
+#endif
+	} ictrl;
+
+	*(unsigned int *)&ictrl = 0;
+
+#ifndef CONFIG_ARC_IRQ_NO_AUTOSAVE
+	ictrl.save_nr_gpr_pairs = 6;	/* r0 to r11 (r12 saved manually) */
+	ictrl.save_blink = 1;
+	ictrl.save_lp_regs = 1;		/* LP_COUNT, LP_START, LP_END */
+	ictrl.save_u_to_u = 0;		/* user ctxt saved on kernel stack */
+	ictrl.save_idx_regs = 1;	/* JLI, LDI, EI */
+#endif
+
+	WRITE_AUX(AUX_IRQ_CTRL, ictrl);
+
+	/*
+	 * ARCv2 core intc provides multiple interrupt priorities (upto 16).
+	 * Typical builds though have only two levels (0-high, 1-low)
+	 * Linux by default uses lower prio 1 for most irqs, reserving 0 for
+	 * NMI style interrupts in future (say perf)
+	 */
+
+	READ_BCR(ARC_REG_IRQ_BCR, irq_bcr);
+
+	irq_prio = irq_bcr.prio;	/* Encoded as N-1 for N levels */
+	pr_info("archs-intc\t: %d priority levels (default %d)%s\n",
+		irq_prio + 1, ARCV2_IRQ_DEF_PRIO,
+		irq_bcr.firq ? " FIRQ (not used)":"");
+
+	/*
+	 * Set a default priority for all available interrupts to prevent
+	 * switching of register banks if Fast IRQ and multiple register banks
+	 * are supported by CPU.
+	 * Also disable private-per-core IRQ lines so faulty external HW won't
+	 * trigger interrupt that kernel is not ready to handle.
+	 */
+	for (i = NR_EXCEPTIONS; i < irq_bcr.irqs + NR_EXCEPTIONS; i++) {
+		write_aux_reg(AUX_IRQ_SELECT, i);
+		write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+
+		/*
+		 * Only mask cpu private IRQs here.
+		 * "common" interrupts are masked at IDU, otherwise it would
+		 * need to be unmasked at each cpu, with IPIs
+		 */
+		if (i < FIRST_EXT_IRQ)
+			write_aux_reg(AUX_IRQ_ENABLE, 0);
+	}
+
+	/* setup status32, don't enable intr yet as kernel doesn't want */
+	tmp = read_aux_reg(ARC_REG_STATUS32);
+	tmp |= ARCV2_IRQ_DEF_PRIO << 1;
+	tmp &= ~STATUS_IE_MASK;
+	asm volatile("kflag %0	\n"::"r"(tmp));
+}
+
+static void arcv2_irq_mask(struct irq_data *data)
+{
+	write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
+	write_aux_reg(AUX_IRQ_ENABLE, 0);
+}
+
+static void arcv2_irq_unmask(struct irq_data *data)
+{
+	write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
+	write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+void arcv2_irq_enable(struct irq_data *data)
+{
+	/* set default priority */
+	write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
+	write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+
+	/*
+	 * hw auto enables (linux unmask) all by default
+	 * So no need to do IRQ_ENABLE here
+	 * XXX: However OSCI LAN need it
+	 */
+	write_aux_reg(AUX_IRQ_ENABLE, 1);
+}
+
+static struct irq_chip arcv2_irq_chip = {
+	.name           = "ARCv2 core Intc",
+	.irq_mask	= arcv2_irq_mask,
+	.irq_unmask	= arcv2_irq_unmask,
+	.irq_enable	= arcv2_irq_enable
+};
+
+static int arcv2_irq_map(struct irq_domain *d, unsigned int irq,
+			 irq_hw_number_t hw)
+{
+	/*
+	 * core intc IRQs [16, 23]:
+	 * Statically assigned always private-per-core (Timers, WDT, IPI, PCT)
+	 */
+	if (hw < FIRST_EXT_IRQ) {
+		/*
+		 * A subsequent request_percpu_irq() fails if percpu_devid is
+		 * not set. That in turns sets NOAUTOEN, meaning each core needs
+		 * to call enable_percpu_irq()
+		 */
+		irq_set_percpu_devid(irq);
+		irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq);
+	} else {
+		irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq);
+	}
+
+	return 0;
+}
+
+static const struct irq_domain_ops arcv2_irq_ops = {
+	.xlate = irq_domain_xlate_onecell,
+	.map = arcv2_irq_map,
+};
+
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+	struct irq_domain *root_domain;
+	struct bcr_irq_arcv2 irq_bcr;
+	unsigned int nr_cpu_irqs;
+
+	READ_BCR(ARC_REG_IRQ_BCR, irq_bcr);
+	nr_cpu_irqs = irq_bcr.irqs + NR_EXCEPTIONS;
+
+	if (parent)
+		panic("DeviceTree incore intc not a root irq controller\n");
+
+	root_domain = irq_domain_add_linear(intc, nr_cpu_irqs, &arcv2_irq_ops, NULL);
+	if (!root_domain)
+		panic("root irq domain not avail\n");
+
+	/*
+	 * Needed for primary domain lookup to succeed
+	 * This is a primary irqchip, and can never have a parent
+	 */
+	irq_set_default_host(root_domain);
+
+#ifdef CONFIG_SMP
+	irq_create_mapping(root_domain, IPI_IRQ);
+#endif
+	irq_create_mapping(root_domain, SOFTIRQ_IRQ);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,archs-intc", init_onchip_IRQ);
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
new file mode 100644
index 0000000000..6885e42287
--- /dev/null
+++ b/arch/arc/kernel/intc-compact.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+
+#define NR_CPU_IRQS	32	/* number of irq lines coming in */
+#define TIMER0_IRQ	3	/* Fixed by ISA */
+
+/*
+ * Early Hardware specific Interrupt setup
+ * -Platform independent, needed for each CPU (not foldable into init_IRQ)
+ * -Called very early (start_kernel -> setup_arch -> setup_processor)
+ *
+ * what it does ?
+ * -Optionally, setup the High priority Interrupts as Level 2 IRQs
+ */
+void arc_init_IRQ(void)
+{
+	unsigned int level_mask = 0, i;
+
+       /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */
+	level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ;
+
+	/*
+	 * Write to register, even if no LV2 IRQs configured to reset it
+	 * in case bootloader had mucked with it
+	 */
+	write_aux_reg(AUX_IRQ_LEV, level_mask);
+
+	if (level_mask)
+		pr_info("Level-2 interrupts bitset %x\n", level_mask);
+
+	/*
+	 * Disable all IRQ lines so faulty external hardware won't
+	 * trigger interrupt that kernel is not ready to handle.
+	 */
+	for (i = TIMER0_IRQ; i < NR_CPU_IRQS; i++) {
+		unsigned int ienb;
+
+		ienb = read_aux_reg(AUX_IENABLE);
+		ienb &= ~(1 << i);
+		write_aux_reg(AUX_IENABLE, ienb);
+	}
+}
+
+/*
+ * ARC700 core includes a simple on-chip intc supporting
+ * -per IRQ enable/disable
+ * -2 levels of interrupts (high/low)
+ * -all interrupts being level triggered
+ *
+ * To reduce platform code, we assume all IRQs directly hooked-up into intc.
+ * Platforms with external intc, hence cascaded IRQs, are free to over-ride
+ * below, per IRQ.
+ */
+
+static void arc_irq_mask(struct irq_data *data)
+{
+	unsigned int ienb;
+
+	ienb = read_aux_reg(AUX_IENABLE);
+	ienb &= ~(1 << data->hwirq);
+	write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static void arc_irq_unmask(struct irq_data *data)
+{
+	unsigned int ienb;
+
+	ienb = read_aux_reg(AUX_IENABLE);
+	ienb |= (1 << data->hwirq);
+	write_aux_reg(AUX_IENABLE, ienb);
+}
+
+static struct irq_chip onchip_intc = {
+	.name           = "ARC In-core Intc",
+	.irq_mask	= arc_irq_mask,
+	.irq_unmask	= arc_irq_unmask,
+};
+
+static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
+			       irq_hw_number_t hw)
+{
+	switch (hw) {
+	case TIMER0_IRQ:
+		irq_set_percpu_devid(irq);
+		irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
+		break;
+	default:
+		irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
+	}
+	return 0;
+}
+
+static const struct irq_domain_ops arc_intc_domain_ops = {
+	.xlate = irq_domain_xlate_onecell,
+	.map = arc_intc_domain_map,
+};
+
+static int __init
+init_onchip_IRQ(struct device_node *intc, struct device_node *parent)
+{
+	struct irq_domain *root_domain;
+
+	if (parent)
+		panic("DeviceTree incore intc not a root irq controller\n");
+
+	root_domain = irq_domain_add_linear(intc, NR_CPU_IRQS,
+					    &arc_intc_domain_ops, NULL);
+	if (!root_domain)
+		panic("root irq domain not avail\n");
+
+	/*
+	 * Needed for primary domain lookup to succeed
+	 * This is a primary irqchip, and can never have a parent
+	 */
+	irq_set_default_host(root_domain);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
+
+/*
+ * arch_local_irq_enable - Enable interrupts.
+ *
+ * 1. Explicitly called to re-enable interrupts
+ * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc
+ *    which maybe in hard ISR itself
+ *
+ * Semantics of this function change depending on where it is called from:
+ *
+ * -If called from hard-ISR, it must not invert interrupt priorities
+ *  e.g. suppose TIMER is high priority (Level 2) IRQ
+ *    Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
+ *    Here local_irq_enable( ) shd not re-enable lower priority interrupts
+ * -If called from soft-ISR, it must re-enable all interrupts
+ *    soft ISR are low priority jobs which can be very slow, thus all IRQs
+ *    must be enabled while they run.
+ *    Now hardware context wise we may still be in L2 ISR (not done rtie)
+ *    still we must re-enable both L1 and L2 IRQs
+ *  Another twist is prev scenario with flow being
+ *     L1 ISR ==> interrupted by L2 ISR  ==> L2 soft ISR
+ *     here we must not re-enable Ll as prev Ll Interrupt's h/w context will get
+ *     over-written (this is deficiency in ARC700 Interrupt mechanism)
+ */
+
+#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS	/* Complex version for 2 IRQ levels */
+
+void arch_local_irq_enable(void)
+{
+	unsigned long flags = arch_local_save_flags();
+
+	if (flags & STATUS_A2_MASK)
+		flags |= STATUS_E2_MASK;
+	else if (flags & STATUS_A1_MASK)
+		flags |= STATUS_E1_MASK;
+
+	arch_local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(arch_local_irq_enable);
+#endif
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
new file mode 100644
index 0000000000..dd09b58ff8
--- /dev/null
+++ b/arch/arc/kernel/irq.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip.h>
+#include <asm/mach_desc.h>
+
+#include <asm/irq_regs.h>
+#include <asm/smp.h>
+
+/*
+ * Late Interrupt system init called from start_kernel for Boot CPU only
+ *
+ * Since slab must already be initialized, platforms can start doing any
+ * needed request_irq( )s
+ */
+void __init init_IRQ(void)
+{
+	/*
+	 * process the entire interrupt tree in one go
+	 * Any external intc will be setup provided DT chains them
+	 * properly
+	 */
+	irqchip_init();
+
+#ifdef CONFIG_SMP
+	/* a SMP H/w block could do IPI IRQ request here */
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(smp_processor_id());
+#endif
+
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(smp_processor_id());
+}
+
+/*
+ * "C" Entry point for any ARC ISR, called from low level vector handler
+ * @irq is the vector number read from ICAUSE reg of on-chip intc
+ */
+void arch_do_IRQ(unsigned int hwirq, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	irq_enter();
+	old_regs = set_irq_regs(regs);
+	generic_handle_domain_irq(NULL, hwirq);
+	set_irq_regs(old_regs);
+	irq_exit();
+}
diff --git a/arch/arc/kernel/jump_label.c b/arch/arc/kernel/jump_label.c
new file mode 100644
index 0000000000..b8600dc325
--- /dev/null
+++ b/arch/arc/kernel/jump_label.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+
+#include "asm/cacheflush.h"
+
+#define JUMPLABEL_ERR	"ARC: jump_label: ERROR: "
+
+/* Halt system on fatal error to make debug easier */
+#define arc_jl_fatal(format...)						\
+({									\
+	pr_err(JUMPLABEL_ERR format);					\
+	BUG();								\
+})
+
+static inline u32 arc_gen_nop(void)
+{
+	/* 1x 32bit NOP in middle endian */
+	return 0x7000264a;
+}
+
+/*
+ * Atomic update of patched instruction is only available if this
+ * instruction doesn't cross L1 cache line boundary. You can read about
+ * the way we achieve this in arc/include/asm/jump_label.h
+ */
+static inline void instruction_align_assert(void *addr, int len)
+{
+	unsigned long a = (unsigned long)addr;
+
+	if ((a >> L1_CACHE_SHIFT) != ((a + len - 1) >> L1_CACHE_SHIFT))
+		arc_jl_fatal("instruction (addr %px) cross L1 cache line border",
+			     addr);
+}
+
+/*
+ * ARCv2 'Branch unconditionally' instruction:
+ * 00000ssssssssss1SSSSSSSSSSNRtttt
+ * s S[n:0] lower bits signed immediate (number is bitfield size)
+ * S S[m:n+1] upper bits signed immediate (number is bitfield size)
+ * t S[24:21] upper bits signed immediate (branch unconditionally far)
+ * N N <.d> delay slot mode
+ * R R Reserved
+ */
+static inline u32 arc_gen_branch(jump_label_t pc, jump_label_t target)
+{
+	u32 instruction_l, instruction_r;
+	u32 pcl = pc & GENMASK(31, 2);
+	u32 u_offset = target - pcl;
+	u32 s, S, t;
+
+	/*
+	 * Offset in 32-bit branch instruction must to fit into s25.
+	 * Something is terribly broken if we get such huge offset within one
+	 * function.
+	 */
+	if ((s32)u_offset < -16777216 || (s32)u_offset > 16777214)
+		arc_jl_fatal("gen branch with offset (%d) not fit in s25",
+			     (s32)u_offset);
+
+	/*
+	 * All instructions are aligned by 2 bytes so we should never get offset
+	 * here which is not 2 bytes aligned.
+	 */
+	if (u_offset & 0x1)
+		arc_jl_fatal("gen branch with offset (%d) unaligned to 2 bytes",
+			     (s32)u_offset);
+
+	s = (u_offset >> 1)  & GENMASK(9, 0);
+	S = (u_offset >> 11) & GENMASK(9, 0);
+	t = (u_offset >> 21) & GENMASK(3, 0);
+
+	/* 00000ssssssssss1 */
+	instruction_l = (s << 1) | 0x1;
+	/* SSSSSSSSSSNRtttt */
+	instruction_r = (S << 6) | t;
+
+	return (instruction_r << 16) | (instruction_l & GENMASK(15, 0));
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	jump_label_t *instr_addr = (jump_label_t *)entry->code;
+	u32 instr;
+
+	instruction_align_assert(instr_addr, JUMP_LABEL_NOP_SIZE);
+
+	if (type == JUMP_LABEL_JMP)
+		instr = arc_gen_branch(entry->code, entry->target);
+	else
+		instr = arc_gen_nop();
+
+	WRITE_ONCE(*instr_addr, instr);
+	flush_icache_range(entry->code, entry->code + JUMP_LABEL_NOP_SIZE);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+	/*
+	 * We use only one NOP type (1x, 4 byte) in arch_static_branch, so
+	 * there's no need to patch an identical NOP over the top of it here.
+	 * The generic code calls 'arch_jump_label_transform' if the NOP needs
+	 * to be replaced by a branch, so 'arch_jump_label_transform_static' is
+	 * never called with type other than JUMP_LABEL_NOP.
+	 */
+	BUG_ON(type != JUMP_LABEL_NOP);
+}
+
+#ifdef CONFIG_ARC_DBG_JUMP_LABEL
+#define SELFTEST_MSG	"ARC: instruction generation self-test: "
+
+struct arc_gen_branch_testdata {
+	jump_label_t pc;
+	jump_label_t target_address;
+	u32 expected_instr;
+};
+
+static __init int branch_gen_test(const struct arc_gen_branch_testdata *test)
+{
+	u32 instr_got;
+
+	instr_got = arc_gen_branch(test->pc, test->target_address);
+	if (instr_got == test->expected_instr)
+		return 0;
+
+	pr_err(SELFTEST_MSG "FAIL:\n arc_gen_branch(0x%08x, 0x%08x) != 0x%08x, got 0x%08x\n",
+	       test->pc, test->target_address,
+	       test->expected_instr, instr_got);
+
+	return -EFAULT;
+}
+
+/*
+ * Offset field in branch instruction is not continuous. Test all
+ * available offset field and sign combinations. Test data is generated
+ * from real working code.
+ */
+static const struct arc_gen_branch_testdata arcgenbr_test_data[] __initconst = {
+	{0x90007548, 0x90007514, 0xffcf07cd}, /* tiny (-52) offs */
+	{0x9000c9c0, 0x9000c782, 0xffcf05c3}, /* tiny (-574) offs */
+	{0x9000cc1c, 0x9000c782, 0xffcf0367}, /* tiny (-1178) offs */
+	{0x9009dce0, 0x9009d106, 0xff8f0427}, /* small (-3034) offs */
+	{0x9000f5de, 0x90007d30, 0xfc0f0755}, /* big  (-30892) offs */
+	{0x900a2444, 0x90035f64, 0xc9cf0321}, /* huge (-443616) offs */
+	{0x90007514, 0x9000752c, 0x00000019}, /* tiny (+24) offs */
+	{0x9001a578, 0x9001a77a, 0x00000203}, /* tiny (+514) offs */
+	{0x90031ed8, 0x90032634, 0x0000075d}, /* tiny (+1884) offs */
+	{0x9008c7f2, 0x9008d3f0, 0x00400401}, /* small (+3072) offs */
+	{0x9000bb38, 0x9003b340, 0x17c00009}, /* big  (+194568) offs */
+	{0x90008f44, 0x90578d80, 0xb7c2063d}  /* huge (+5701180) offs */
+};
+
+static __init int instr_gen_test(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arcgenbr_test_data); i++)
+		if (branch_gen_test(&arcgenbr_test_data[i]))
+			return -EFAULT;
+
+	pr_info(SELFTEST_MSG "OK\n");
+
+	return 0;
+}
+early_initcall(instr_gen_test);
+
+#endif /* CONFIG_ARC_DBG_JUMP_LABEL */
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
new file mode 100644
index 0000000000..345a000055
--- /dev/null
+++ b/arch/arc/kernel/kgdb.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kgdb support for ARC
+ *
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/kgdb.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm/disasm.h>
+#include <asm/cacheflush.h>
+
+static void to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs,
+			struct callee_regs *cregs)
+{
+	int regno;
+
+	for (regno = 0; regno <= 26; regno++)
+		gdb_regs[_R0 + regno] = get_reg(regno, kernel_regs, cregs);
+
+	for (regno = 27; regno < GDB_MAX_REGS; regno++)
+		gdb_regs[regno] = 0;
+
+	gdb_regs[_FP]		= kernel_regs->fp;
+	gdb_regs[__SP]		= kernel_regs->sp;
+	gdb_regs[_BLINK]	= kernel_regs->blink;
+	gdb_regs[_RET]		= kernel_regs->ret;
+	gdb_regs[_STATUS32]	= kernel_regs->status32;
+	gdb_regs[_LP_COUNT]	= kernel_regs->lp_count;
+	gdb_regs[_LP_END]	= kernel_regs->lp_end;
+	gdb_regs[_LP_START]	= kernel_regs->lp_start;
+	gdb_regs[_BTA]		= kernel_regs->bta;
+	gdb_regs[_STOP_PC]	= kernel_regs->ret;
+}
+
+static void from_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs,
+			struct callee_regs *cregs)
+{
+	int regno;
+
+	for (regno = 0; regno <= 26; regno++)
+		set_reg(regno, gdb_regs[regno + _R0], kernel_regs, cregs);
+
+	kernel_regs->fp		= gdb_regs[_FP];
+	kernel_regs->sp		= gdb_regs[__SP];
+	kernel_regs->blink	= gdb_regs[_BLINK];
+	kernel_regs->ret	= gdb_regs[_RET];
+	kernel_regs->status32	= gdb_regs[_STATUS32];
+	kernel_regs->lp_count	= gdb_regs[_LP_COUNT];
+	kernel_regs->lp_end	= gdb_regs[_LP_END];
+	kernel_regs->lp_start	= gdb_regs[_LP_START];
+	kernel_regs->bta	= gdb_regs[_BTA];
+}
+
+
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs)
+{
+	to_gdb_regs(gdb_regs, kernel_regs, (struct callee_regs *)
+		current->thread.callee_reg);
+}
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs)
+{
+	from_gdb_regs(gdb_regs, kernel_regs, (struct callee_regs *)
+		current->thread.callee_reg);
+}
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
+				 struct task_struct *task)
+{
+	if (task)
+		to_gdb_regs(gdb_regs, task_pt_regs(task),
+			(struct callee_regs *) task->thread.callee_reg);
+}
+
+struct single_step_data_t {
+	uint16_t opcode[2];
+	unsigned long address[2];
+	int is_branch;
+	int armed;
+} single_step_data;
+
+static void undo_single_step(struct pt_regs *regs)
+{
+	if (single_step_data.armed) {
+		int i;
+
+		for (i = 0; i < (single_step_data.is_branch ? 2 : 1); i++) {
+			memcpy((void *) single_step_data.address[i],
+				&single_step_data.opcode[i],
+				BREAK_INSTR_SIZE);
+
+			flush_icache_range(single_step_data.address[i],
+				single_step_data.address[i] +
+				BREAK_INSTR_SIZE);
+		}
+		single_step_data.armed = 0;
+	}
+}
+
+static void place_trap(unsigned long address, void *save)
+{
+	memcpy(save, (void *) address, BREAK_INSTR_SIZE);
+	memcpy((void *) address, &arch_kgdb_ops.gdb_bpt_instr,
+		BREAK_INSTR_SIZE);
+	flush_icache_range(address, address + BREAK_INSTR_SIZE);
+}
+
+static void do_single_step(struct pt_regs *regs)
+{
+	single_step_data.is_branch = disasm_next_pc((unsigned long)
+		regs->ret, regs, (struct callee_regs *)
+		current->thread.callee_reg,
+		&single_step_data.address[0],
+		&single_step_data.address[1]);
+
+	place_trap(single_step_data.address[0], &single_step_data.opcode[0]);
+
+	if (single_step_data.is_branch) {
+		place_trap(single_step_data.address[1],
+			&single_step_data.opcode[1]);
+	}
+
+	single_step_data.armed++;
+}
+
+int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
+			       char *remcomInBuffer, char *remcomOutBuffer,
+			       struct pt_regs *regs)
+{
+	unsigned long addr;
+	char *ptr;
+
+	undo_single_step(regs);
+
+	switch (remcomInBuffer[0]) {
+	case 's':
+	case 'c':
+		ptr = &remcomInBuffer[1];
+		if (kgdb_hex2long(&ptr, &addr))
+			regs->ret = addr;
+		fallthrough;
+
+	case 'D':
+	case 'k':
+		atomic_set(&kgdb_cpu_doing_single_step, -1);
+
+		if (remcomInBuffer[0] == 's') {
+			do_single_step(regs);
+			atomic_set(&kgdb_cpu_doing_single_step,
+				   smp_processor_id());
+		}
+
+		return 0;
+	}
+	return -1;
+}
+
+int kgdb_arch_init(void)
+{
+	single_step_data.armed = 0;
+	return 0;
+}
+
+void kgdb_trap(struct pt_regs *regs)
+{
+	/* trap_s 3 is used for breakpoints that overwrite existing
+	 * instructions, while trap_s 4 is used for compiled breakpoints.
+	 *
+	 * with trap_s 3 breakpoints the original instruction needs to be
+	 * restored and continuation needs to start at the location of the
+	 * breakpoint.
+	 *
+	 * with trap_s 4 (compiled) breakpoints, continuation needs to
+	 * start after the breakpoint.
+	 */
+	if (regs->ecr_param == 3)
+		instruction_pointer(regs) -= BREAK_INSTR_SIZE;
+
+	kgdb_handle_exception(1, SIGTRAP, 0, regs);
+}
+
+void kgdb_arch_exit(void)
+{
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	instruction_pointer(regs) = ip;
+}
+
+void kgdb_call_nmi_hook(void *ignored)
+{
+	/* Default implementation passes get_irq_regs() but we don't */
+	kgdb_nmicallback(raw_smp_processor_id(), NULL);
+}
+
+const struct kgdb_arch arch_kgdb_ops = {
+	/* breakpoint instruction: TRAP_S 0x3 */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	.gdb_bpt_instr		= {0x78, 0x7e},
+#else
+	.gdb_bpt_instr		= {0x7e, 0x78},
+#endif
+};
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
new file mode 100644
index 0000000000..e71d64119d
--- /dev/null
+++ b/arch/arc/kernel/kprobes.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/current.h>
+#include <asm/disasm.h>
+
+#define MIN_STACK_SIZE(addr)	min((unsigned long)MAX_STACK_SIZE, \
+		(unsigned long)current_thread_info() + THREAD_SIZE - (addr))
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	/* Attempt to probe at unaligned address */
+	if ((unsigned long)p->addr & 0x01)
+		return -EINVAL;
+
+	/* Address should not be in exception handling code */
+
+	p->ainsn.is_short = is_short_instr((unsigned long)p->addr);
+	p->opcode = *p->addr;
+
+	return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = UNIMP_S_INSTRUCTION;
+
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	*p->addr = p->opcode;
+
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	arch_disarm_kprobe(p);
+
+	/* Can we remove the kprobe in the middle of kprobe handling? */
+	if (p->ainsn.t1_addr) {
+		*(p->ainsn.t1_addr) = p->ainsn.t1_opcode;
+
+		flush_icache_range((unsigned long)p->ainsn.t1_addr,
+				   (unsigned long)p->ainsn.t1_addr +
+				   sizeof(kprobe_opcode_t));
+
+		p->ainsn.t1_addr = NULL;
+	}
+
+	if (p->ainsn.t2_addr) {
+		*(p->ainsn.t2_addr) = p->ainsn.t2_opcode;
+
+		flush_icache_range((unsigned long)p->ainsn.t2_addr,
+				   (unsigned long)p->ainsn.t2_addr +
+				   sizeof(kprobe_opcode_t));
+
+		p->ainsn.t2_addr = NULL;
+	}
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static inline void __kprobes set_current_kprobe(struct kprobe *p)
+{
+	__this_cpu_write(current_kprobe, p);
+}
+
+static void __kprobes resume_execution(struct kprobe *p, unsigned long addr,
+				       struct pt_regs *regs)
+{
+	/* Remove the trap instructions inserted for single step and
+	 * restore the original instructions
+	 */
+	if (p->ainsn.t1_addr) {
+		*(p->ainsn.t1_addr) = p->ainsn.t1_opcode;
+
+		flush_icache_range((unsigned long)p->ainsn.t1_addr,
+				   (unsigned long)p->ainsn.t1_addr +
+				   sizeof(kprobe_opcode_t));
+
+		p->ainsn.t1_addr = NULL;
+	}
+
+	if (p->ainsn.t2_addr) {
+		*(p->ainsn.t2_addr) = p->ainsn.t2_opcode;
+
+		flush_icache_range((unsigned long)p->ainsn.t2_addr,
+				   (unsigned long)p->ainsn.t2_addr +
+				   sizeof(kprobe_opcode_t));
+
+		p->ainsn.t2_addr = NULL;
+	}
+
+	return;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long next_pc;
+	unsigned long tgt_if_br = 0;
+	int is_branch;
+	unsigned long bta;
+
+	/* Copy the opcode back to the kprobe location and execute the
+	 * instruction. Because of this we will not be able to get into the
+	 * same kprobe until this kprobe is done
+	 */
+	*(p->addr) = p->opcode;
+
+	flush_icache_range((unsigned long)p->addr,
+			   (unsigned long)p->addr + sizeof(kprobe_opcode_t));
+
+	/* Now we insert the trap at the next location after this instruction to
+	 * single step. If it is a branch we insert the trap at possible branch
+	 * targets
+	 */
+
+	bta = regs->bta;
+
+	if (regs->status32 & 0x40) {
+		/* We are in a delay slot with the branch taken */
+
+		next_pc = bta & ~0x01;
+
+		if (!p->ainsn.is_short) {
+			if (bta & 0x01)
+				regs->blink += 2;
+			else {
+				/* Branch not taken */
+				next_pc += 2;
+
+				/* next pc is taken from bta after executing the
+				 * delay slot instruction
+				 */
+				regs->bta += 2;
+			}
+		}
+
+		is_branch = 0;
+	} else
+		is_branch =
+		    disasm_next_pc((unsigned long)p->addr, regs,
+			(struct callee_regs *) current->thread.callee_reg,
+			&next_pc, &tgt_if_br);
+
+	p->ainsn.t1_addr = (kprobe_opcode_t *) next_pc;
+	p->ainsn.t1_opcode = *(p->ainsn.t1_addr);
+	*(p->ainsn.t1_addr) = TRAP_S_2_INSTRUCTION;
+
+	flush_icache_range((unsigned long)p->ainsn.t1_addr,
+			   (unsigned long)p->ainsn.t1_addr +
+			   sizeof(kprobe_opcode_t));
+
+	if (is_branch) {
+		p->ainsn.t2_addr = (kprobe_opcode_t *) tgt_if_br;
+		p->ainsn.t2_opcode = *(p->ainsn.t2_addr);
+		*(p->ainsn.t2_addr) = TRAP_S_2_INSTRUCTION;
+
+		flush_icache_range((unsigned long)p->ainsn.t2_addr,
+				   (unsigned long)p->ainsn.t2_addr +
+				   sizeof(kprobe_opcode_t));
+	}
+}
+
+int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+
+	preempt_disable();
+
+	kcb = get_kprobe_ctlblk();
+	p = get_kprobe((unsigned long *)addr);
+
+	if (p) {
+		/*
+		 * We have reentered the kprobe_handler, since another kprobe
+		 * was hit while within the handler, we save the original
+		 * kprobes and single step on the instruction of the new probe
+		 * without calling any user handlers to avoid recursive
+		 * kprobes.
+		 */
+		if (kprobe_running()) {
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p);
+			kprobes_inc_nmissed_count(p);
+			setup_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
+		}
+
+		set_current_kprobe(p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+		/* If we have no pre-handler or it returned 0, we continue with
+		 * normal processing. If we have a pre-handler and it returned
+		 * non-zero - which means user handler setup registers to exit
+		 * to another instruction, we must skip the single stepping.
+		 */
+		if (!p->pre_handler || !p->pre_handler(p, regs)) {
+			setup_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_HIT_SS;
+		} else {
+			reset_current_kprobe();
+			preempt_enable_no_resched();
+		}
+
+		return 1;
+	}
+
+	/* no_kprobe: */
+	preempt_enable_no_resched();
+	return 0;
+}
+
+static int __kprobes arc_post_kprobe_handler(unsigned long addr,
+					 struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	resume_execution(cur, addr, regs);
+
+	/* Rearm the kprobe */
+	arch_arm_kprobe(cur);
+
+	/*
+	 * When we return from trap instruction we go to the next instruction
+	 * We restored the actual instruction in resume_exectuiont and we to
+	 * return to the same address and execute it
+	 */
+	regs->ret = addr;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+
+	reset_current_kprobe();
+
+out:
+	preempt_enable_no_resched();
+	return 1;
+}
+
+/*
+ * Fault can be for the instruction being single stepped or for the
+ * pre/post handlers in the module.
+ * This is applicable for applications like user probes, where we have the
+ * probe in user space and the handlers in the kernel
+ */
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single stepped
+		 * caused the fault. We reset the current kprobe and allow the
+		 * exception handler as if it is regular exception. In our
+		 * case it doesn't matter because the system will be halted
+		 */
+		resume_execution(cur, (unsigned long)cur->addr, regs);
+
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+
+		preempt_enable_no_resched();
+		break;
+
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We are here because the instructions in the pre/post handler
+		 * caused the fault.
+		 */
+
+		/*
+		 * In case the user-specified fault handler returned zero,
+		 * try to fix up.
+		 */
+		if (fixup_exception(regs))
+			return 1;
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+
+	default:
+		break;
+	}
+	return 0;
+}
+
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	unsigned long addr = args->err;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_IERR:
+		if (arc_kprobe_handler(addr, args->regs))
+			return NOTIFY_STOP;
+		break;
+
+	case DIE_TRAP:
+		if (arc_post_kprobe_handler(addr, args->regs))
+			return NOTIFY_STOP;
+		break;
+
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void __used kretprobe_trampoline_holder(void)
+{
+	__asm__ __volatile__(".global __kretprobe_trampoline\n"
+			     "__kretprobe_trampoline:\n"
+			     "nop\n");
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+				      struct pt_regs *regs)
+{
+
+	ri->ret_addr = (kprobe_opcode_t *) regs->blink;
+	ri->fp = NULL;
+
+	/* Replace the return addr with trampoline addr */
+	regs->blink = (unsigned long)&__kretprobe_trampoline;
+}
+
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+					      struct pt_regs *regs)
+{
+	regs->ret = __kretprobe_trampoline_handler(regs, NULL);
+
+	/* By returning a non zero value, we are telling the kprobe handler
+	 * that we don't want the post_handler to run
+	 */
+	return 1;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	/* Registering the trampoline code for the kret probe */
+	return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline)
+		return 1;
+
+	return 0;
+}
+
+void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
+{
+	notify_die(DIE_TRAP, "kprobe_trap", regs, address, 0, SIGTRAP);
+}
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
new file mode 100644
index 0000000000..f9fdb557c2
--- /dev/null
+++ b/arch/arc/kernel/mcip.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARC ARConnect (MultiCore IP) support (formerly known as MCIP)
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/spinlock.h>
+#include <soc/arc/mcip.h>
+#include <asm/irqflags-arcv2.h>
+#include <asm/setup.h>
+
+static DEFINE_RAW_SPINLOCK(mcip_lock);
+
+#ifdef CONFIG_SMP
+
+static char smp_cpuinfo_buf[128];
+
+/*
+ * Set mask to halt GFRC if any online core in SMP cluster is halted.
+ * Only works for ARC HS v3.0+, on earlier versions has no effect.
+ */
+static void mcip_update_gfrc_halt_mask(int cpu)
+{
+	struct bcr_generic gfrc;
+	unsigned long flags;
+	u32 gfrc_halt_mask;
+
+	READ_BCR(ARC_REG_GFRC_BUILD, gfrc);
+
+	/*
+	 * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in
+	 * GFRC 0x3 version.
+	 */
+	if (gfrc.ver < 0x3)
+		return;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	__mcip_cmd(CMD_GFRC_READ_CORE, 0);
+	gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+	gfrc_halt_mask |= BIT(cpu);
+	__mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_update_debug_halt_mask(int cpu)
+{
+	u32 mcip_mask = 0;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	/*
+	 * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK
+	 * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK
+	 * and CMD_DEBUG_READ_SELECT.
+	 */
+	__mcip_cmd(CMD_DEBUG_READ_SELECT, 0);
+	mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+
+	mcip_mask |= BIT(cpu);
+
+	__mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask);
+	/*
+	 * Parameter specified halt cause:
+	 * STATUS32[H]/actionpoint/breakpoint/self-halt
+	 * We choose all of them (0xF).
+	 */
+	__mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_setup_per_cpu(int cpu)
+{
+	struct mcip_bcr mp;
+
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+	smp_ipi_irq_setup(cpu, IPI_IRQ);
+	smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
+
+	/* Update GFRC halt mask as new CPU came online */
+	if (mp.gfrc)
+		mcip_update_gfrc_halt_mask(cpu);
+
+	/* Update MCIP debug mask as new CPU came online */
+	if (mp.dbg)
+		mcip_update_debug_halt_mask(cpu);
+}
+
+static void mcip_ipi_send(int cpu)
+{
+	unsigned long flags;
+	int ipi_was_pending;
+
+	/* ARConnect can only send IPI to others */
+	if (unlikely(cpu == raw_smp_processor_id())) {
+		arc_softirq_trigger(SOFTIRQ_IRQ);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	/*
+	 * If receiver already has a pending interrupt, elide sending this one.
+	 * Linux cross core calling works well with concurrent IPIs
+	 * coalesced into one
+	 * see arch/arc/kernel/smp.c: ipi_send_msg_one()
+	 */
+	__mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+	ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+	if (!ipi_was_pending)
+		__mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_ipi_clear(int irq)
+{
+	unsigned int cpu, c;
+	unsigned long flags;
+
+	if (unlikely(irq == SOFTIRQ_IRQ)) {
+		arc_softirq_clear(irq);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	/* Who sent the IPI */
+	__mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
+
+	cpu = read_aux_reg(ARC_REG_MCIP_READBACK);	/* 1,2,4,8... */
+
+	/*
+	 * In rare case, multiple concurrent IPIs sent to same target can
+	 * possibly be coalesced by MCIP into 1 asserted IRQ, so @cpus can be
+	 * "vectored" (multiple bits sets) as opposed to typical single bit
+	 */
+	do {
+		c = __ffs(cpu);			/* 0,1,2,3 */
+		__mcip_cmd(CMD_INTRPT_GENERATE_ACK, c);
+		cpu &= ~(1U << c);
+	} while (cpu);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_probe_n_setup(void)
+{
+	struct mcip_bcr mp;
+
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+	sprintf(smp_cpuinfo_buf,
+		"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+		mp.ver, mp.num_cores,
+		IS_AVAIL1(mp.ipi, "IPI "),
+		IS_AVAIL1(mp.idu, "IDU "),
+		IS_AVAIL1(mp.dbg, "DEBUG "),
+		IS_AVAIL1(mp.gfrc, "GFRC"));
+
+	cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
+}
+
+struct plat_smp_ops plat_smp_ops = {
+	.info		= smp_cpuinfo_buf,
+	.init_early_smp	= mcip_probe_n_setup,
+	.init_per_cpu	= mcip_setup_per_cpu,
+	.ipi_send	= mcip_ipi_send,
+	.ipi_clear	= mcip_ipi_clear,
+};
+
+#endif
+
+/***************************************************************************
+ * ARCv2 Interrupt Distribution Unit (IDU)
+ *
+ * Connects external "COMMON" IRQs to core intc, providing:
+ *  -dynamic routing (IRQ affinity)
+ *  -load balancing (Round Robin interrupt distribution)
+ *  -1:N distribution
+ *
+ * It physically resides in the MCIP hw block
+ */
+
+#include <linux/irqchip.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+/*
+ * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core)
+ */
+static void idu_set_dest(unsigned int cmn_irq, unsigned int cpu_mask)
+{
+	__mcip_cmd_data(CMD_IDU_SET_DEST, cmn_irq, cpu_mask);
+}
+
+static void idu_set_mode(unsigned int cmn_irq, bool set_lvl, unsigned int lvl,
+			 bool set_distr, unsigned int distr)
+{
+	union {
+		unsigned int word;
+		struct {
+			unsigned int distr:2, pad:2, lvl:1, pad2:27;
+		};
+	} data;
+
+	data.word = __mcip_cmd_read(CMD_IDU_READ_MODE, cmn_irq);
+	if (set_distr)
+		data.distr = distr;
+	if (set_lvl)
+		data.lvl = lvl;
+	__mcip_cmd_data(CMD_IDU_SET_MODE, cmn_irq, data.word);
+}
+
+static void idu_irq_mask_raw(irq_hw_number_t hwirq)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+	__mcip_cmd_data(CMD_IDU_SET_MASK, hwirq, 1);
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void idu_irq_mask(struct irq_data *data)
+{
+	idu_irq_mask_raw(data->hwirq);
+}
+
+static void idu_irq_unmask(struct irq_data *data)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+	__mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 0);
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void idu_irq_ack(struct irq_data *data)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+	__mcip_cmd(CMD_IDU_ACK_CIRQ, data->hwirq);
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void idu_irq_mask_ack(struct irq_data *data)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+	__mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 1);
+	__mcip_cmd(CMD_IDU_ACK_CIRQ, data->hwirq);
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static int
+idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
+		     bool force)
+{
+	unsigned long flags;
+	cpumask_t online;
+	unsigned int destination_bits;
+	unsigned int distribution_mode;
+
+	/* errout if no online cpu per @cpumask */
+	if (!cpumask_and(&online, cpumask, cpu_online_mask))
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	destination_bits = cpumask_bits(&online)[0];
+	idu_set_dest(data->hwirq, destination_bits);
+
+	if (ffs(destination_bits) == fls(destination_bits))
+		distribution_mode = IDU_M_DISTRI_DEST;
+	else
+		distribution_mode = IDU_M_DISTRI_RR;
+
+	idu_set_mode(data->hwirq, false, 0, true, distribution_mode);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static int idu_irq_set_type(struct irq_data *data, u32 type)
+{
+	unsigned long flags;
+
+	/*
+	 * ARCv2 IDU HW does not support inverse polarity, so these are the
+	 * only interrupt types supported.
+	 */
+	if (type & ~(IRQ_TYPE_EDGE_RISING | IRQ_TYPE_LEVEL_HIGH))
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	idu_set_mode(data->hwirq, true,
+		     type & IRQ_TYPE_EDGE_RISING ? IDU_M_TRIG_EDGE :
+						   IDU_M_TRIG_LEVEL,
+		     false, 0);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+	return 0;
+}
+
+static void idu_irq_enable(struct irq_data *data)
+{
+	/*
+	 * By default send all common interrupts to all available online CPUs.
+	 * The affinity of common interrupts in IDU must be set manually since
+	 * in some cases the kernel will not call irq_set_affinity() by itself:
+	 *   1. When the kernel is not configured with support of SMP.
+	 *   2. When the kernel is configured with support of SMP but upper
+	 *      interrupt controllers does not support setting of the affinity
+	 *      and cannot propagate it to IDU.
+	 */
+	idu_irq_set_affinity(data, cpu_online_mask, false);
+	idu_irq_unmask(data);
+}
+
+static struct irq_chip idu_irq_chip = {
+	.name			= "MCIP IDU Intc",
+	.irq_mask		= idu_irq_mask,
+	.irq_unmask		= idu_irq_unmask,
+	.irq_ack		= idu_irq_ack,
+	.irq_mask_ack		= idu_irq_mask_ack,
+	.irq_enable		= idu_irq_enable,
+	.irq_set_type		= idu_irq_set_type,
+#ifdef CONFIG_SMP
+	.irq_set_affinity       = idu_irq_set_affinity,
+#endif
+
+};
+
+static void idu_cascade_isr(struct irq_desc *desc)
+{
+	struct irq_domain *idu_domain = irq_desc_get_handler_data(desc);
+	struct irq_chip *core_chip = irq_desc_get_chip(desc);
+	irq_hw_number_t core_hwirq = irqd_to_hwirq(irq_desc_get_irq_data(desc));
+	irq_hw_number_t idu_hwirq = core_hwirq - FIRST_EXT_IRQ;
+
+	chained_irq_enter(core_chip, desc);
+	generic_handle_domain_irq(idu_domain, idu_hwirq);
+	chained_irq_exit(core_chip, desc);
+}
+
+static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
+	irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
+
+	return 0;
+}
+
+static const struct irq_domain_ops idu_irq_ops = {
+	.xlate	= irq_domain_xlate_onetwocell,
+	.map	= idu_irq_map,
+};
+
+/*
+ * [16, 23]: Statically assigned always private-per-core (Timers, WDT, IPI)
+ * [24, 23+C]: If C > 0 then "C" common IRQs
+ * [24+C, N]: Not statically assigned, private-per-core
+ */
+
+
+static int __init
+idu_of_init(struct device_node *intc, struct device_node *parent)
+{
+	struct irq_domain *domain;
+	int nr_irqs;
+	int i, virq;
+	struct mcip_bcr mp;
+	struct mcip_idu_bcr idu_bcr;
+
+	READ_BCR(ARC_REG_MCIP_BCR, mp);
+
+	if (!mp.idu)
+		panic("IDU not detected, but DeviceTree using it");
+
+	READ_BCR(ARC_REG_MCIP_IDU_BCR, idu_bcr);
+	nr_irqs = mcip_idu_bcr_to_nr_irqs(idu_bcr);
+
+	pr_info("MCIP: IDU supports %u common irqs\n", nr_irqs);
+
+	domain = irq_domain_add_linear(intc, nr_irqs, &idu_irq_ops, NULL);
+
+	/* Parent interrupts (core-intc) are already mapped */
+
+	for (i = 0; i < nr_irqs; i++) {
+		/* Mask all common interrupts by default */
+		idu_irq_mask_raw(i);
+
+		/*
+		 * Return parent uplink IRQs (towards core intc) 24,25,.....
+		 * this step has been done before already
+		 * however we need it to get the parent virq and set IDU handler
+		 * as first level isr
+		 */
+		virq = irq_create_mapping(NULL, i + FIRST_EXT_IRQ);
+		BUG_ON(!virq);
+		irq_set_chained_handler_and_data(virq, idu_cascade_isr, domain);
+	}
+
+	__mcip_cmd(CMD_IDU_ENABLE, 0);
+
+	return 0;
+}
+IRQCHIP_DECLARE(arcv2_idu_intc, "snps,archs-idu-intc", idu_of_init);
diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c
new file mode 100644
index 0000000000..c90c279047
--- /dev/null
+++ b/arch/arc/kernel/module.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/module.h>
+#include <linux/moduleloader.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <asm/unwind.h>
+
+static inline void arc_write_me(unsigned short *addr, unsigned long value)
+{
+	*addr = (value & 0xffff0000) >> 16;
+	*(addr + 1) = (value & 0xffff);
+}
+
+/*
+ * This gets called before relocation loop in generic loader
+ * Make a note of the section index of unwinding section
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+			      char *secstr, struct module *mod)
+{
+#ifdef CONFIG_ARC_DW2_UNWIND
+	mod->arch.unw_sec_idx = 0;
+	mod->arch.unw_info = NULL;
+#endif
+	mod->arch.secstr = secstr;
+	return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+#ifdef CONFIG_ARC_DW2_UNWIND
+	if (mod->arch.unw_info)
+		unwind_remove_table(mod->arch.unw_info, 0);
+#endif
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,	/* sec index for sym tbl */
+		       unsigned int relsec,	/* sec index for relo sec */
+		       struct module *module)
+{
+	int i, n, relo_type;
+	Elf32_Rela *rel_entry = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym_entry, *sym_sec;
+	Elf32_Addr relocation, location, tgt_addr;
+	unsigned int tgtsec;
+
+	/*
+	 * @relsec has relocations e.g. .rela.init.text
+	 * @tgtsec is section to patch e.g. .init.text
+	 */
+	tgtsec = sechdrs[relsec].sh_info;
+	tgt_addr = sechdrs[tgtsec].sh_addr;
+	sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr;
+	n = sechdrs[relsec].sh_size / sizeof(*rel_entry);
+
+	pr_debug("\nSection to fixup %s @%x\n",
+		 module->arch.secstr + sechdrs[tgtsec].sh_name, tgt_addr);
+	pr_debug("=========================================================\n");
+	pr_debug("r_off\tr_add\tst_value ADDRESS  VALUE\n");
+	pr_debug("=========================================================\n");
+
+	/* Loop thru entries in relocation section */
+	for (i = 0; i < n; i++) {
+		const char *s;
+
+		/* This is where to make the change */
+		location = tgt_addr + rel_entry[i].r_offset;
+
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym_entry = sym_sec + ELF32_R_SYM(rel_entry[i].r_info);
+
+		relocation = sym_entry->st_value + rel_entry[i].r_addend;
+
+		if (sym_entry->st_name == 0 && ELF_ST_TYPE (sym_entry->st_info) == STT_SECTION) {
+			s = module->arch.secstr + sechdrs[sym_entry->st_shndx].sh_name;
+		} else {
+			s = strtab + sym_entry->st_name;
+		}
+
+		pr_debug("   %x\t%x\t%x %x %x [%s]\n",
+			 rel_entry[i].r_offset, rel_entry[i].r_addend,
+			 sym_entry->st_value, location, relocation, s);
+
+		/* This assumes modules are built with -mlong-calls
+		 * so any branches/jumps are absolute 32 bit jmps
+		 * global data access again is abs 32 bit.
+		 * Both of these are handled by same relocation type
+		 */
+		relo_type = ELF32_R_TYPE(rel_entry[i].r_info);
+
+		if (likely(R_ARC_32_ME == relo_type))	/* ME ( S + A ) */
+			arc_write_me((unsigned short *)location, relocation);
+		else if (R_ARC_32 == relo_type)		/* ( S + A ) */
+			*((Elf32_Addr *) location) = relocation;
+		else if (R_ARC_32_PCREL == relo_type)	/* ( S + A ) - PDATA ) */
+			*((Elf32_Addr *) location) = relocation - location;
+		else
+			goto relo_err;
+
+	}
+
+#ifdef CONFIG_ARC_DW2_UNWIND
+	if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0)
+		module->arch.unw_sec_idx = tgtsec;
+#endif
+
+	return 0;
+
+relo_err:
+	pr_err("%s: unknown relocation: %u\n",
+		module->name, ELF32_R_TYPE(rel_entry[i].r_info));
+	return -ENOEXEC;
+
+}
+
+/* Just before lift off: After sections have been relocated, we add the
+ * dwarf section to unwinder table pool
+ * This couldn't be done in module_frob_arch_sections() because
+ * relocations had not been applied by then
+ */
+int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
+		    struct module *mod)
+{
+#ifdef CONFIG_ARC_DW2_UNWIND
+	void *unw;
+	int unwsec = mod->arch.unw_sec_idx;
+
+	if (unwsec) {
+		unw = unwind_add_table(mod, (void *)sechdrs[unwsec].sh_addr,
+				       sechdrs[unwsec].sh_size);
+		mod->arch.unw_info = unw;
+	}
+#endif
+	return 0;
+}
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
new file mode 100644
index 0000000000..adff957962
--- /dev/null
+++ b/arch/arc/kernel/perf_event.c
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Linux performance counter support for ARC CPUs.
+// This code is inspired by the perf support of various other architectures.
+//
+// Copyright (C) 2013-2018 Synopsys, Inc. (www.synopsys.com)
+
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <asm/arcregs.h>
+#include <asm/stacktrace.h>
+
+/* HW holds 8 symbols + one for null terminator */
+#define ARCPMU_EVENT_NAME_LEN	9
+
+/*
+ * Some ARC pct quirks:
+ *
+ * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+ * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+ *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
+ *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
+ *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
+ *	STALLED_CYCLES_FRONTEND.
+ *
+ *	We could start multiple performance counters and combine everything
+ *	afterwards, but that makes it complicated.
+ *
+ *	Note that I$ cache misses aren't counted by either of the two!
+ */
+
+/*
+ * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
+ * (based on a specific RTL build)
+ * Below is the static map between perf generic/arc specific event_id and
+ * h/w condition names.
+ * At the time of probe, we loop thru each index and find it's name to
+ * complete the mapping of perf event_id to h/w index as latter is needed
+ * to program the counter really
+ */
+static const char * const arc_pmu_ev_hw_map[] = {
+	/* count cycles */
+	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
+	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
+
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
+
+	/* counts condition */
+	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+	/* All jump instructions that are taken */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
+#ifdef CONFIG_ISA_ARCV2
+	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
+#else
+	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
+	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+#endif
+	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */
+	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */
+
+	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */
+	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */
+	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
+	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
+	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */
+
+	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */
+	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */
+};
+
+#define C(_x)			PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED	0xffff
+
+static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
+		},
+			/* DTLB LD/ST Miss not segregated by h/w*/
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
+		},
+	},
+};
+
+enum arc_pmu_attr_groups {
+	ARCPMU_ATTR_GR_EVENTS,
+	ARCPMU_ATTR_GR_FORMATS,
+	ARCPMU_NR_ATTR_GR
+};
+
+struct arc_pmu_raw_event_entry {
+	char name[ARCPMU_EVENT_NAME_LEN];
+};
+
+struct arc_pmu {
+	struct pmu	pmu;
+	unsigned int	irq;
+	int		n_counters;
+	int		n_events;
+	u64		max_period;
+	int		ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
+
+	struct arc_pmu_raw_event_entry	*raw_entry;
+	struct attribute		**attrs;
+	struct perf_pmu_events_attr	*attr;
+	const struct attribute_group	*attr_groups[ARCPMU_NR_ATTR_GR + 1];
+};
+
+struct arc_pmu_cpu {
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	unsigned long	used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)];
+
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS];
+};
+
+struct arc_callchain_trace {
+	int depth;
+	void *perf_stuff;
+};
+
+static int callchain_trace(unsigned int addr, void *data)
+{
+	struct arc_callchain_trace *ctrl = data;
+	struct perf_callchain_entry_ctx *entry = ctrl->perf_stuff;
+
+	perf_callchain_store(entry, addr);
+
+	if (ctrl->depth++ < 3)
+		return 0;
+
+	return -1;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	struct arc_callchain_trace ctrl = {
+		.depth = 0,
+		.perf_stuff = entry,
+	};
+
+	arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
+}
+
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+			 struct pt_regs *regs)
+{
+	/*
+	 * User stack can't be unwound trivially with kernel dwarf unwinder
+	 * So for now just record the user PC
+	 */
+	perf_callchain_store(entry, instruction_pointer(regs));
+}
+
+static struct arc_pmu *arc_pmu;
+static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);
+
+/* read counter #idx; note that counter# != event# on ARC! */
+static u64 arc_pmu_read_counter(int idx)
+{
+	u32 tmp;
+	u64 result;
+
+	/*
+	 * ARC supports making 'snapshots' of the counters, so we don't
+	 * need to care about counters wrapping to 0 underneath our feet
+	 */
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN);
+	result = (u64) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
+	result |= read_aux_reg(ARC_REG_PCT_SNAPL);
+
+	return result;
+}
+
+static void arc_perf_event_update(struct perf_event *event,
+				  struct hw_perf_event *hwc, int idx)
+{
+	u64 prev_raw_count = local64_read(&hwc->prev_count);
+	u64 new_raw_count = arc_pmu_read_counter(idx);
+	s64 delta = new_raw_count - prev_raw_count;
+
+	/*
+	 * We aren't afraid of hwc->prev_count changing beneath our feet
+	 * because there's no way for us to re-enter this function anytime.
+	 */
+	local64_set(&hwc->prev_count, new_raw_count);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static void arc_pmu_read(struct perf_event *event)
+{
+	arc_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static int arc_pmu_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	int ret;
+
+	cache_type	= (config >>  0) & 0xff;
+	cache_op	= (config >>  8) & 0xff;
+	cache_result	= (config >> 16) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ret = arc_pmu_cache_map[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n",
+		 cache_type, cache_op, cache_result, ret,
+		 arc_pmu_ev_hw_map[ret]);
+
+	return ret;
+}
+
+/* initializes hw_perf_event structure if event is supported */
+static int arc_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int ret;
+
+	if (!is_sampling_event(event)) {
+		hwc->sample_period = arc_pmu->max_period;
+		hwc->last_period = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	hwc->config = 0;
+
+	if (is_isa_arcv2()) {
+		/* "exclude user" means "count only kernel" */
+		if (event->attr.exclude_user)
+			hwc->config |= ARC_REG_PCT_CONFIG_KERN;
+
+		/* "exclude kernel" means "count only user" */
+		if (event->attr.exclude_kernel)
+			hwc->config |= ARC_REG_PCT_CONFIG_USER;
+	}
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config >= PERF_COUNT_HW_MAX)
+			return -ENOENT;
+		if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
+			return -ENOENT;
+		hwc->config |= arc_pmu->ev_hw_idx[event->attr.config];
+		pr_debug("init event %d with h/w %08x \'%s\'\n",
+			 (int)event->attr.config, (int)hwc->config,
+			 arc_pmu_ev_hw_map[event->attr.config]);
+		return 0;
+
+	case PERF_TYPE_HW_CACHE:
+		ret = arc_pmu_cache_event(event->attr.config);
+		if (ret < 0)
+			return ret;
+		hwc->config |= arc_pmu->ev_hw_idx[ret];
+		pr_debug("init cache event with h/w %08x \'%s\'\n",
+			 (int)hwc->config, arc_pmu_ev_hw_map[ret]);
+		return 0;
+
+	case PERF_TYPE_RAW:
+		if (event->attr.config >= arc_pmu->n_events)
+			return -ENOENT;
+
+		hwc->config |= event->attr.config;
+		pr_debug("init raw event with idx %lld \'%s\'\n",
+			 event->attr.config,
+			 arc_pmu->raw_entry[event->attr.config].name);
+
+		return 0;
+
+	default:
+		return -ENOENT;
+	}
+}
+
+/* starts all counters */
+static void arc_pmu_enable(struct pmu *pmu)
+{
+	u32 tmp;
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1);
+}
+
+/* stops all counters */
+static void arc_pmu_disable(struct pmu *pmu)
+{
+	u32 tmp;
+	tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
+	write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0);
+}
+
+static int arc_pmu_event_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int idx = hwc->idx;
+	int overflow = 0;
+	u64 value;
+
+	if (unlikely(left <= -period)) {
+		/* left underflowed by more than period. */
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	} else if (unlikely(left <= 0)) {
+		/* left underflowed by less than period. */
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		overflow = 1;
+	}
+
+	if (left > arc_pmu->max_period)
+		left = arc_pmu->max_period;
+
+	value = arc_pmu->max_period - left;
+	local64_set(&hwc->prev_count, value);
+
+	/* Select counter */
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+
+	/* Write value */
+	write_aux_reg(ARC_REG_PCT_COUNTL, lower_32_bits(value));
+	write_aux_reg(ARC_REG_PCT_COUNTH, upper_32_bits(value));
+
+	perf_event_update_userpage(event);
+
+	return overflow;
+}
+
+/*
+ * Assigns hardware counter to hardware condition.
+ * Note that there is no separate start/stop mechanism;
+ * stopping is achieved by assigning the 'never' condition
+ */
+static void arc_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+
+	arc_pmu_event_set_period(event);
+
+	/* Enable interrupt for this counter */
+	if (is_sampling_event(event))
+		write_aux_reg(ARC_REG_PCT_INT_CTRL,
+			      read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));
+
+	/* enable ARC pmu here */
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);		/* counter # */
+	write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config);	/* condition */
+}
+
+static void arc_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	/* Disable interrupt for this counter */
+	if (is_sampling_event(event)) {
+		/*
+		 * Reset interrupt flag by writing of 1. This is required
+		 * to make sure pending interrupt was not left.
+		 */
+		write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
+		write_aux_reg(ARC_REG_PCT_INT_CTRL,
+			      read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~BIT(idx));
+	}
+
+	if (!(event->hw.state & PERF_HES_STOPPED)) {
+		/* stop hw counter here */
+		write_aux_reg(ARC_REG_PCT_INDEX, idx);
+
+		/* condition code #0 is always "never" */
+		write_aux_reg(ARC_REG_PCT_CONFIG, 0);
+
+		event->hw.state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) &&
+	    !(event->hw.state & PERF_HES_UPTODATE)) {
+		arc_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+static void arc_pmu_del(struct perf_event *event, int flags)
+{
+	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+
+	arc_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, pmu_cpu->used_mask);
+
+	pmu_cpu->act_counter[event->hw.idx] = 0;
+
+	perf_event_update_userpage(event);
+}
+
+/* allocate hardware counter and optionally start counting */
+static int arc_pmu_add(struct perf_event *event, int flags)
+{
+	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	idx = ffz(pmu_cpu->used_mask[0]);
+	if (idx == arc_pmu->n_counters)
+		return -EAGAIN;
+
+	__set_bit(idx, pmu_cpu->used_mask);
+	hwc->idx = idx;
+
+	write_aux_reg(ARC_REG_PCT_INDEX, idx);
+
+	pmu_cpu->act_counter[idx] = event;
+
+	if (is_sampling_event(event)) {
+		/* Mimic full counter overflow as other arches do */
+		write_aux_reg(ARC_REG_PCT_INT_CNTL,
+			      lower_32_bits(arc_pmu->max_period));
+		write_aux_reg(ARC_REG_PCT_INT_CNTH,
+			      upper_32_bits(arc_pmu->max_period));
+	}
+
+	write_aux_reg(ARC_REG_PCT_CONFIG, 0);
+	write_aux_reg(ARC_REG_PCT_COUNTL, 0);
+	write_aux_reg(ARC_REG_PCT_COUNTH, 0);
+	local64_set(&hwc->prev_count, 0);
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		arc_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+#ifdef CONFIG_ISA_ARCV2
+static irqreturn_t arc_pmu_intr(int irq, void *dev)
+{
+	struct perf_sample_data data;
+	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+	struct pt_regs *regs;
+	unsigned int active_ints;
+	int idx;
+
+	arc_pmu_disable(&arc_pmu->pmu);
+
+	active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT);
+	if (!active_ints)
+		goto done;
+
+	regs = get_irq_regs();
+
+	do {
+		struct perf_event *event;
+		struct hw_perf_event *hwc;
+
+		idx = __ffs(active_ints);
+
+		/* Reset interrupt flag by writing of 1 */
+		write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
+
+		/*
+		 * On reset of "interrupt active" bit corresponding
+		 * "interrupt enable" bit gets automatically reset as well.
+		 * Now we need to re-enable interrupt for the counter.
+		 */
+		write_aux_reg(ARC_REG_PCT_INT_CTRL,
+			read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));
+
+		event = pmu_cpu->act_counter[idx];
+		hwc = &event->hw;
+
+		WARN_ON_ONCE(hwc->idx != idx);
+
+		arc_perf_event_update(event, &event->hw, event->hw.idx);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (arc_pmu_event_set_period(event)) {
+			if (perf_event_overflow(event, &data, regs))
+				arc_pmu_stop(event, 0);
+		}
+
+		active_ints &= ~BIT(idx);
+	} while (active_ints);
+
+done:
+	arc_pmu_enable(&arc_pmu->pmu);
+
+	return IRQ_HANDLED;
+}
+#else
+
+static irqreturn_t arc_pmu_intr(int irq, void *dev)
+{
+	return IRQ_NONE;
+}
+
+#endif /* CONFIG_ISA_ARCV2 */
+
+static void arc_cpu_pmu_irq_init(void *data)
+{
+	int irq = *(int *)data;
+
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+
+	/* Clear all pending interrupt flags */
+	write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
+}
+
+/* Event field occupies the bottom 15 bits of our config field */
+PMU_FORMAT_ATTR(event, "config:0-14");
+static struct attribute *arc_pmu_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group arc_pmu_format_attr_gr = {
+	.name = "format",
+	.attrs = arc_pmu_format_attrs,
+};
+
+static ssize_t arc_pmu_events_sysfs_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
+
+/*
+ * We don't add attrs here as we don't have pre-defined list of perf events.
+ * We will generate and add attrs dynamically in probe() after we read HW
+ * configuration.
+ */
+static struct attribute_group arc_pmu_events_attr_gr = {
+	.name = "events",
+};
+
+static void arc_pmu_add_raw_event_attr(int j, char *str)
+{
+	memmove(arc_pmu->raw_entry[j].name, str, ARCPMU_EVENT_NAME_LEN - 1);
+	arc_pmu->attr[j].attr.attr.name = arc_pmu->raw_entry[j].name;
+	arc_pmu->attr[j].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(0444);
+	arc_pmu->attr[j].attr.show = arc_pmu_events_sysfs_show;
+	arc_pmu->attr[j].id = j;
+	arc_pmu->attrs[j] = &(arc_pmu->attr[j].attr.attr);
+}
+
+static int arc_pmu_raw_alloc(struct device *dev)
+{
+	arc_pmu->attr = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
+		sizeof(*arc_pmu->attr), GFP_KERNEL | __GFP_ZERO);
+	if (!arc_pmu->attr)
+		return -ENOMEM;
+
+	arc_pmu->attrs = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
+		sizeof(*arc_pmu->attrs), GFP_KERNEL | __GFP_ZERO);
+	if (!arc_pmu->attrs)
+		return -ENOMEM;
+
+	arc_pmu->raw_entry = devm_kmalloc_array(dev, arc_pmu->n_events,
+		sizeof(*arc_pmu->raw_entry), GFP_KERNEL | __GFP_ZERO);
+	if (!arc_pmu->raw_entry)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static inline bool event_in_hw_event_map(int i, char *name)
+{
+	if (!arc_pmu_ev_hw_map[i])
+		return false;
+
+	if (!strlen(arc_pmu_ev_hw_map[i]))
+		return false;
+
+	if (strcmp(arc_pmu_ev_hw_map[i], name))
+		return false;
+
+	return true;
+}
+
+static void arc_pmu_map_hw_event(int j, char *str)
+{
+	int i;
+
+	/* See if HW condition has been mapped to a perf event_id */
+	for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
+		if (event_in_hw_event_map(i, str)) {
+			pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
+				 i, str, j);
+			arc_pmu->ev_hw_idx[i] = j;
+		}
+	}
+}
+
+static int arc_pmu_device_probe(struct platform_device *pdev)
+{
+	struct arc_reg_pct_build pct_bcr;
+	struct arc_reg_cc_build cc_bcr;
+	int i, has_interrupts, irq = -1;
+	int counter_size;	/* in bits */
+
+	union cc_name {
+		struct {
+			u32 word0, word1;
+			char sentinel;
+		} indiv;
+		char str[ARCPMU_EVENT_NAME_LEN];
+	} cc_name;
+
+
+	READ_BCR(ARC_REG_PCT_BUILD, pct_bcr);
+	if (!pct_bcr.v) {
+		pr_err("This core does not have performance counters!\n");
+		return -ENODEV;
+	}
+	BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32);
+	if (WARN_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS))
+		return -EINVAL;
+
+	READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
+	if (WARN(!cc_bcr.v, "Counters exist but No countable conditions?"))
+		return -EINVAL;
+
+	arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
+	if (!arc_pmu)
+		return -ENOMEM;
+
+	arc_pmu->n_events = cc_bcr.c;
+
+	if (arc_pmu_raw_alloc(&pdev->dev))
+		return -ENOMEM;
+
+	has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0;
+
+	arc_pmu->n_counters = pct_bcr.c;
+	counter_size = 32 + (pct_bcr.s << 4);
+
+	arc_pmu->max_period = (1ULL << counter_size) / 2 - 1ULL;
+
+	pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n",
+		arc_pmu->n_counters, counter_size, cc_bcr.c,
+		has_interrupts ? ", [overflow IRQ support]" : "");
+
+	cc_name.str[ARCPMU_EVENT_NAME_LEN - 1] = 0;
+	for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
+		arc_pmu->ev_hw_idx[i] = -1;
+
+	/* loop thru all available h/w condition indexes */
+	for (i = 0; i < cc_bcr.c; i++) {
+		write_aux_reg(ARC_REG_CC_INDEX, i);
+		cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0));
+		cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1));
+
+		arc_pmu_map_hw_event(i, cc_name.str);
+		arc_pmu_add_raw_event_attr(i, cc_name.str);
+	}
+
+	arc_pmu_events_attr_gr.attrs = arc_pmu->attrs;
+	arc_pmu->attr_groups[ARCPMU_ATTR_GR_EVENTS] = &arc_pmu_events_attr_gr;
+	arc_pmu->attr_groups[ARCPMU_ATTR_GR_FORMATS] = &arc_pmu_format_attr_gr;
+
+	arc_pmu->pmu = (struct pmu) {
+		.pmu_enable	= arc_pmu_enable,
+		.pmu_disable	= arc_pmu_disable,
+		.event_init	= arc_pmu_event_init,
+		.add		= arc_pmu_add,
+		.del		= arc_pmu_del,
+		.start		= arc_pmu_start,
+		.stop		= arc_pmu_stop,
+		.read		= arc_pmu_read,
+		.attr_groups	= arc_pmu->attr_groups,
+	};
+
+	if (has_interrupts) {
+		irq = platform_get_irq(pdev, 0);
+		if (irq >= 0) {
+			int ret;
+
+			arc_pmu->irq = irq;
+
+			/* intc map function ensures irq_set_percpu_devid() called */
+			ret = request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
+						 this_cpu_ptr(&arc_pmu_cpu));
+
+			if (!ret)
+				on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
+			else
+				irq = -1;
+		}
+
+	}
+
+	if (irq == -1)
+		arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	/*
+	 * perf parser doesn't really like '-' symbol in events name, so let's
+	 * use '_' in arc pct name as it goes to kernel PMU event prefix.
+	 */
+	return perf_pmu_register(&arc_pmu->pmu, "arc_pct", PERF_TYPE_RAW);
+}
+
+static const struct of_device_id arc_pmu_match[] = {
+	{ .compatible = "snps,arc700-pct" },
+	{ .compatible = "snps,archs-pct" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, arc_pmu_match);
+
+static struct platform_driver arc_pmu_driver = {
+	.driver	= {
+		.name		= "arc-pct",
+		.of_match_table = of_match_ptr(arc_pmu_match),
+	},
+	.probe		= arc_pmu_device_probe,
+};
+
+module_platform_driver(arc_pmu_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mischa Jonker <mjonker@synopsys.com>");
+MODULE_DESCRIPTION("ARC PMU driver");
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
new file mode 100644
index 0000000000..8e90052f6f
--- /dev/null
+++ b/arch/arc/kernel/process.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Amit Bhor, Kanika Nema: Codito Technologies 2004
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/elf.h>
+#include <linux/tick.h>
+
+#include <asm/fpu.h>
+
+SYSCALL_DEFINE1(arc_settls, void *, user_tls_data_ptr)
+{
+	task_thread_info(current)->thr_ptr = (unsigned int)user_tls_data_ptr;
+	return 0;
+}
+
+/*
+ * We return the user space TLS data ptr as sys-call return code
+ * Ideally it should be copy to user.
+ * However we can cheat by the fact that some sys-calls do return
+ * absurdly high values
+ * Since the tls dat aptr is not going to be in range of 0xFFFF_xxxx
+ * it won't be considered a sys-call error
+ * and it will be loads better than copy-to-user, which is a definite
+ * D-TLB Miss
+ */
+SYSCALL_DEFINE0(arc_gettls)
+{
+	return task_thread_info(current)->thr_ptr;
+}
+
+SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
+{
+	struct pt_regs *regs = current_pt_regs();
+	u32 uval;
+	int ret;
+
+	/*
+	 * This is only for old cores lacking LLOCK/SCOND, which by definition
+	 * can't possibly be SMP. Thus doesn't need to be SMP safe.
+	 * And this also helps reduce the overhead for serializing in
+	 * the UP case
+	 */
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
+
+	/* Z indicates to userspace if operation succeeded */
+	regs->status32 &= ~STATUS_Z_MASK;
+
+	ret = access_ok(uaddr, sizeof(*uaddr));
+	if (!ret)
+		 goto fail;
+
+again:
+	preempt_disable();
+
+	ret = __get_user(uval, uaddr);
+	if (ret)
+		 goto fault;
+
+	if (uval != expected)
+		 goto out;
+
+	ret = __put_user(new, uaddr);
+	if (ret)
+		 goto fault;
+
+	regs->status32 |= STATUS_Z_MASK;
+
+out:
+	preempt_enable();
+	return uval;
+
+fault:
+	preempt_enable();
+
+	if (unlikely(ret != -EFAULT))
+		 goto fail;
+
+	mmap_read_lock(current->mm);
+	ret = fixup_user_fault(current->mm, (unsigned long) uaddr,
+			       FAULT_FLAG_WRITE, NULL);
+	mmap_read_unlock(current->mm);
+
+	if (likely(!ret))
+		 goto again;
+
+fail:
+	force_sig(SIGSEGV);
+	return ret;
+}
+
+#ifdef CONFIG_ISA_ARCV2
+
+void arch_cpu_idle(void)
+{
+	/* Re-enable interrupts <= default irq priority before committing SLEEP */
+	const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
+
+	__asm__ __volatile__(
+		"sleep %0	\n"
+		:
+		:"I"(arg)); /* can't be "r" has to be embedded const */
+}
+
+#else	/* ARC700 */
+
+void arch_cpu_idle(void)
+{
+	/* sleep, but enable both set E1/E2 (levels of interrupts) before committing */
+	__asm__ __volatile__("sleep 0x3	\n");
+}
+
+#endif
+
+asmlinkage void ret_from_fork(void);
+
+/*
+ * Copy architecture-specific thread state
+ *
+ * Layout of Child kernel mode stack as setup at the end of this function is
+ *
+ * |     ...        |
+ * |     ...        |
+ * |    unused      |
+ * |                |
+ * ------------------
+ * |     r25        |   <==== top of Stack (thread.ksp)
+ * ~                ~
+ * |    --to--      |   (CALLEE Regs of kernel mode)
+ * |     r13        |
+ * ------------------
+ * |     fp         |
+ * |    blink       |   @ret_from_fork
+ * ------------------
+ * |                |
+ * ~                ~
+ * ~                ~
+ * |                |
+ * ------------------
+ * |     r12        |
+ * ~                ~
+ * |    --to--      |   (scratch Regs of user mode)
+ * |     r0         |
+ * ------------------
+ * |      SP        |
+ * |    orig_r0     |
+ * |    event/ECR   |
+ * |    user_r25    |
+ * ------------------  <===== END of PAGE
+ */
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+		unsigned long kthread_arg, struct task_struct *p,
+		unsigned long tls)
+{
+	struct pt_regs *c_regs;        /* child's pt_regs */
+	unsigned long *childksp;       /* to unwind out of __switch_to() */
+	struct callee_regs *c_callee;  /* child's callee regs */
+	struct callee_regs *parent_callee;  /* paren't callee */
+	struct pt_regs *regs = current_pt_regs();
+
+	/* Mark the specific anchors to begin with (see pic above) */
+	c_regs = task_pt_regs(p);
+	childksp = (unsigned long *)c_regs - 2;  /* 2 words for FP/BLINK */
+	c_callee = ((struct callee_regs *)childksp) - 1;
+
+	/*
+	 * __switch_to() uses thread.ksp to start unwinding stack
+	 * For kernel threads we don't need to create callee regs, the
+	 * stack layout nevertheless needs to remain the same.
+	 * Also, since __switch_to anyways unwinds callee regs, we use
+	 * this to populate kernel thread entry-pt/args into callee regs,
+	 * so that ret_from_kernel_thread() becomes simpler.
+	 */
+	p->thread.ksp = (unsigned long)c_callee;	/* THREAD_KSP */
+
+	/* __switch_to expects FP(0), BLINK(return addr) at top */
+	childksp[0] = 0;			/* fp */
+	childksp[1] = (unsigned long)ret_from_fork; /* blink */
+
+	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+		memset(c_regs, 0, sizeof(struct pt_regs));
+
+		c_callee->r13 = kthread_arg;
+		c_callee->r14 = usp;  /* function */
+
+		return 0;
+	}
+
+	/*--------- User Task Only --------------*/
+
+	/* __switch_to expects FP(0), BLINK(return addr) at top of stack */
+	childksp[0] = 0;				/* for POP fp */
+	childksp[1] = (unsigned long)ret_from_fork;	/* for POP blink */
+
+	/* Copy parents pt regs on child's kernel mode stack */
+	*c_regs = *regs;
+
+	if (usp)
+		c_regs->sp = usp;
+
+	c_regs->r0 = 0;		/* fork returns 0 in child */
+
+	parent_callee = ((struct callee_regs *)regs) - 1;
+	*c_callee = *parent_callee;
+
+	if (unlikely(clone_flags & CLONE_SETTLS)) {
+		/*
+		 * set task's userland tls data ptr from 4th arg
+		 * clone C-lib call is difft from clone sys-call
+		 */
+		task_thread_info(p)->thr_ptr = tls;
+	} else {
+		/* Normal fork case: set parent's TLS ptr in child */
+		task_thread_info(p)->thr_ptr =
+		task_thread_info(current)->thr_ptr;
+	}
+
+
+	/*
+	 * setup usermode thread pointer #1:
+	 * when child is picked by scheduler, __switch_to() uses @c_callee to
+	 * populate usermode callee regs: this works (despite being in a kernel
+	 * function) since special return path for child @ret_from_fork()
+	 * ensures those regs are not clobbered all the way to RTIE to usermode
+	 */
+	c_callee->r25 = task_thread_info(p)->thr_ptr;
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/*
+	 * setup usermode thread pointer #2:
+	 * however for this special use of r25 in kernel, __switch_to() sets
+	 * r25 for kernel needs and only in the final return path is usermode
+	 * r25 setup, from pt_regs->user_r25. So set that up as well
+	 */
+	c_regs->user_r25 = c_callee->r25;
+#endif
+
+	return 0;
+}
+
+/*
+ * Do necessary setup to start up a new user task
+ */
+void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
+{
+	regs->sp = usp;
+	regs->ret = pc;
+
+	/*
+	 * [U]ser Mode bit set
+	 * [L] ZOL loop inhibited to begin with - cleared by a LP insn
+	 * Interrupts enabled
+	 */
+	regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
+
+	fpu_init_task(regs);
+
+	/* bogus seed values for debugging */
+	regs->lp_start = 0x10;
+	regs->lp_end = 0x80;
+}
+
+/*
+ * Some archs flush debug and FPU info here
+ */
+void flush_thread(void)
+{
+}
+
+int elf_check_arch(const struct elf32_hdr *x)
+{
+	unsigned int eflags;
+
+	if (x->e_machine != EM_ARC_INUSE) {
+		pr_err("ELF not built for %s ISA\n",
+			is_isa_arcompact() ? "ARCompact":"ARCv2");
+		return 0;
+	}
+
+	eflags = x->e_flags;
+	if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
+		pr_err("ABI mismatch - you need newer toolchain\n");
+		force_fatal_sig(SIGSEGV);
+		return 0;
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(elf_check_arch);
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
new file mode 100644
index 0000000000..883391977f
--- /dev/null
+++ b/arch/arc/kernel/ptrace.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/ptrace.h>
+#include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
+#include <linux/regset.h>
+#include <linux/unistd.h>
+#include <linux/elf.h>
+
+static struct callee_regs *task_callee_regs(struct task_struct *tsk)
+{
+	struct callee_regs *tmp = (struct callee_regs *)tsk->thread.callee_reg;
+	return tmp;
+}
+
+static int genregs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       struct membuf to)
+{
+	const struct pt_regs *ptregs = task_pt_regs(target);
+	const struct callee_regs *cregs = task_callee_regs(target);
+	unsigned int stop_pc_val;
+
+	membuf_zero(&to, 4);	// pad
+	membuf_store(&to, ptregs->bta);
+	membuf_store(&to, ptregs->lp_start);
+	membuf_store(&to, ptregs->lp_end);
+	membuf_store(&to, ptregs->lp_count);
+	membuf_store(&to, ptregs->status32);
+	membuf_store(&to, ptregs->ret);
+	membuf_store(&to, ptregs->blink);
+	membuf_store(&to, ptregs->fp);
+	membuf_store(&to, ptregs->r26);	// gp
+	membuf_store(&to, ptregs->r12);
+	membuf_store(&to, ptregs->r11);
+	membuf_store(&to, ptregs->r10);
+	membuf_store(&to, ptregs->r9);
+	membuf_store(&to, ptregs->r8);
+	membuf_store(&to, ptregs->r7);
+	membuf_store(&to, ptregs->r6);
+	membuf_store(&to, ptregs->r5);
+	membuf_store(&to, ptregs->r4);
+	membuf_store(&to, ptregs->r3);
+	membuf_store(&to, ptregs->r2);
+	membuf_store(&to, ptregs->r1);
+	membuf_store(&to, ptregs->r0);
+	membuf_store(&to, ptregs->sp);
+	membuf_zero(&to, 4);	// pad2
+	membuf_store(&to, cregs->r25);
+	membuf_store(&to, cregs->r24);
+	membuf_store(&to, cregs->r23);
+	membuf_store(&to, cregs->r22);
+	membuf_store(&to, cregs->r21);
+	membuf_store(&to, cregs->r20);
+	membuf_store(&to, cregs->r19);
+	membuf_store(&to, cregs->r18);
+	membuf_store(&to, cregs->r17);
+	membuf_store(&to, cregs->r16);
+	membuf_store(&to, cregs->r15);
+	membuf_store(&to, cregs->r14);
+	membuf_store(&to, cregs->r13);
+	membuf_store(&to, target->thread.fault_address); // efa
+
+	if (in_brkpt_trap(ptregs)) {
+		stop_pc_val = target->thread.fault_address;
+		pr_debug("\t\tstop_pc (brk-pt)\n");
+	} else {
+		stop_pc_val = ptregs->ret;
+		pr_debug("\t\tstop_pc (others)\n");
+	}
+
+	return membuf_store(&to, stop_pc_val); // stop_pc
+}
+
+static int genregs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	const struct pt_regs *ptregs = task_pt_regs(target);
+	const struct callee_regs *cregs = task_callee_regs(target);
+	int ret = 0;
+
+#define REG_IN_CHUNK(FIRST, NEXT, PTR)	\
+	if (!ret)			\
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, \
+			(void *)(PTR), \
+			offsetof(struct user_regs_struct, FIRST), \
+			offsetof(struct user_regs_struct, NEXT));
+
+#define REG_IN_ONE(LOC, PTR)		\
+	if (!ret)			\
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, \
+			(void *)(PTR), \
+			offsetof(struct user_regs_struct, LOC), \
+			offsetof(struct user_regs_struct, LOC) + 4);
+
+#define REG_IGNORE_ONE(LOC)		\
+	if (!ret)			\
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, \
+			offsetof(struct user_regs_struct, LOC), \
+			offsetof(struct user_regs_struct, LOC) + 4);
+
+	REG_IGNORE_ONE(pad);
+
+	REG_IN_ONE(scratch.bta, &ptregs->bta);
+	REG_IN_ONE(scratch.lp_start, &ptregs->lp_start);
+	REG_IN_ONE(scratch.lp_end, &ptregs->lp_end);
+	REG_IN_ONE(scratch.lp_count, &ptregs->lp_count);
+
+	REG_IGNORE_ONE(scratch.status32);
+
+	REG_IN_ONE(scratch.ret, &ptregs->ret);
+	REG_IN_ONE(scratch.blink, &ptregs->blink);
+	REG_IN_ONE(scratch.fp, &ptregs->fp);
+	REG_IN_ONE(scratch.gp, &ptregs->r26);
+	REG_IN_ONE(scratch.r12, &ptregs->r12);
+	REG_IN_ONE(scratch.r11, &ptregs->r11);
+	REG_IN_ONE(scratch.r10, &ptregs->r10);
+	REG_IN_ONE(scratch.r9, &ptregs->r9);
+	REG_IN_ONE(scratch.r8, &ptregs->r8);
+	REG_IN_ONE(scratch.r7, &ptregs->r7);
+	REG_IN_ONE(scratch.r6, &ptregs->r6);
+	REG_IN_ONE(scratch.r5, &ptregs->r5);
+	REG_IN_ONE(scratch.r4, &ptregs->r4);
+	REG_IN_ONE(scratch.r3, &ptregs->r3);
+	REG_IN_ONE(scratch.r2, &ptregs->r2);
+	REG_IN_ONE(scratch.r1, &ptregs->r1);
+	REG_IN_ONE(scratch.r0, &ptregs->r0);
+	REG_IN_ONE(scratch.sp, &ptregs->sp);
+
+	REG_IGNORE_ONE(pad2);
+
+	REG_IN_ONE(callee.r25, &cregs->r25);
+	REG_IN_ONE(callee.r24, &cregs->r24);
+	REG_IN_ONE(callee.r23, &cregs->r23);
+	REG_IN_ONE(callee.r22, &cregs->r22);
+	REG_IN_ONE(callee.r21, &cregs->r21);
+	REG_IN_ONE(callee.r20, &cregs->r20);
+	REG_IN_ONE(callee.r19, &cregs->r19);
+	REG_IN_ONE(callee.r18, &cregs->r18);
+	REG_IN_ONE(callee.r17, &cregs->r17);
+	REG_IN_ONE(callee.r16, &cregs->r16);
+	REG_IN_ONE(callee.r15, &cregs->r15);
+	REG_IN_ONE(callee.r14, &cregs->r14);
+	REG_IN_ONE(callee.r13, &cregs->r13);
+
+	REG_IGNORE_ONE(efa);			/* efa update invalid */
+	REG_IGNORE_ONE(stop_pc);		/* PC updated via @ret */
+
+	return ret;
+}
+
+#ifdef CONFIG_ISA_ARCV2
+static int arcv2regs_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       struct membuf to)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS))
+		/*
+		 * itemized copy not needed like above as layout of regs (r30,r58,r59)
+		 * is exactly same in kernel (pt_regs) and userspace (user_regs_arcv2)
+		 */
+		return membuf_write(&to, &regs->r30, sizeof(struct user_regs_arcv2));
+
+
+	membuf_write(&to, &regs->r30, 4); /* r30 only */
+	return membuf_zero(&to, sizeof(struct user_regs_arcv2) - 4);
+}
+
+static int arcv2regs_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret, copy_sz;
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS))
+		copy_sz = sizeof(struct user_regs_arcv2);
+	else
+		copy_sz = 4;	/* r30 only */
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, (void *)&regs->r30,
+				  0, copy_sz);
+
+	return ret;
+}
+
+#endif
+
+enum arc_getset {
+	REGSET_CMN,
+	REGSET_ARCV2,
+};
+
+static const struct user_regset arc_regsets[] = {
+	[REGSET_CMN] = {
+	       .core_note_type = NT_PRSTATUS,
+	       .n = ELF_NGREG,
+	       .size = sizeof(unsigned long),
+	       .align = sizeof(unsigned long),
+	       .regset_get = genregs_get,
+	       .set = genregs_set,
+	},
+#ifdef CONFIG_ISA_ARCV2
+	[REGSET_ARCV2] = {
+	       .core_note_type = NT_ARC_V2,
+	       .n = ELF_ARCV2REG,
+	       .size = sizeof(unsigned long),
+	       .align = sizeof(unsigned long),
+	       .regset_get = arcv2regs_get,
+	       .set = arcv2regs_set,
+	},
+#endif
+};
+
+static const struct user_regset_view user_arc_view = {
+	.name		= "arc",
+	.e_machine	= EM_ARC_INUSE,
+	.regsets	= arc_regsets,
+	.n		= ARRAY_SIZE(arc_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_arc_view;
+}
+
+void ptrace_disable(struct task_struct *child)
+{
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	int ret = -EIO;
+
+	pr_debug("REQ=%ld: ADDR =0x%lx, DATA=0x%lx)\n", request, addr, data);
+
+	switch (request) {
+	case PTRACE_GET_THREAD_AREA:
+		ret = put_user(task_thread_info(child)->thr_ptr,
+			       (unsigned long __user *)data);
+		break;
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+asmlinkage int syscall_trace_entry(struct pt_regs *regs)
+{
+	if (tracehook_report_syscall_entry(regs))
+		return ULONG_MAX;
+
+	return regs->r8;
+}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+	tracehook_report_syscall_exit(regs, 0);
+}
diff --git a/arch/arc/kernel/reset.c b/arch/arc/kernel/reset.c
new file mode 100644
index 0000000000..fd6c3eb930
--- /dev/null
+++ b/arch/arc/kernel/reset.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/reboot.h>
+#include <linux/pm.h>
+
+void machine_halt(void)
+{
+	/* Halt the processor */
+	__asm__ __volatile__("flag  1\n");
+}
+
+void machine_restart(char *__unused)
+{
+	/* Soft reset : jump to reset vector */
+	pr_info("Put your restart handler here\n");
+	machine_halt();
+}
+
+void machine_power_off(void)
+{
+	/* FIXME ::  power off ??? */
+	machine_halt();
+}
+
+void (*pm_power_off) (void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
new file mode 100644
index 0000000000..41f07b3e59
--- /dev/null
+++ b/arch/arc/kernel/setup.c
@@ -0,0 +1,701 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/root_dev.h>
+#include <linux/clk.h>
+#include <linux/clocksource.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <linux/cpu.h>
+#include <linux/of_clk.h>
+#include <linux/of_fdt.h>
+#include <linux/of.h>
+#include <linux/cache.h>
+#include <uapi/linux/mount.h>
+#include <asm/sections.h>
+#include <asm/arcregs.h>
+#include <asm/asserts.h>
+#include <asm/tlb.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/irq.h>
+#include <asm/unwind.h>
+#include <asm/mach_desc.h>
+#include <asm/smp.h>
+#include <asm/dsp-impl.h>
+
+#define FIX_PTR(x)  __asm__ __volatile__(";" : "+r"(x))
+
+unsigned int intr_to_DE_cnt;
+
+/* Part of U-boot ABI: see head.S */
+int __initdata uboot_tag;
+int __initdata uboot_magic;
+char __initdata *uboot_arg;
+
+const struct machine_desc *machine_desc;
+
+struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
+
+struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
+
+static const struct id_to_str arc_legacy_rel[] = {
+	/* ID.ARCVER,	Release */
+#ifdef CONFIG_ISA_ARCOMPACT
+	{ 0x34, 	"R4.10"},
+	{ 0x35, 	"R4.11"},
+#else
+	{ 0x51, 	"R2.0" },
+	{ 0x52, 	"R2.1" },
+	{ 0x53,		"R3.0" },
+#endif
+	{ 0x00,		NULL   }
+};
+
+static const struct id_to_str arc_hs_ver54_rel[] = {
+	/* UARCH.MAJOR,	Release */
+	{  0,		"R3.10a"},
+	{  1,		"R3.50a"},
+	{  2,		"R3.60a"},
+	{  3,		"R4.00a"},
+	{  0xFF,	NULL   }
+};
+
+static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+{
+	if (is_isa_arcompact()) {
+		struct bcr_iccm_arcompact iccm;
+		struct bcr_dccm_arcompact dccm;
+
+		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+		if (iccm.ver) {
+			cpu->iccm.sz = 4096 << iccm.sz;	/* 8K to 512K */
+			cpu->iccm.base_addr = iccm.base << 16;
+		}
+
+		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+		if (dccm.ver) {
+			unsigned long base;
+			cpu->dccm.sz = 2048 << dccm.sz;	/* 2K to 256K */
+
+			base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+			cpu->dccm.base_addr = base & ~0xF;
+		}
+	} else {
+		struct bcr_iccm_arcv2 iccm;
+		struct bcr_dccm_arcv2 dccm;
+		unsigned long region;
+
+		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+		if (iccm.ver) {
+			cpu->iccm.sz = 256 << iccm.sz00;	/* 512B to 16M */
+			if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+				cpu->iccm.sz <<= iccm.sz01;
+
+			region = read_aux_reg(ARC_REG_AUX_ICCM);
+			cpu->iccm.base_addr = region & 0xF0000000;
+		}
+
+		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+		if (dccm.ver) {
+			cpu->dccm.sz = 256 << dccm.sz0;
+			if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+				cpu->dccm.sz <<= dccm.sz1;
+
+			region = read_aux_reg(ARC_REG_AUX_DCCM);
+			cpu->dccm.base_addr = region & 0xF0000000;
+		}
+	}
+}
+
+static void decode_arc_core(struct cpuinfo_arc *cpu)
+{
+	struct bcr_uarch_build_arcv2 uarch;
+	const struct id_to_str *tbl;
+
+	if (cpu->core.family < 0x54) { /* includes arc700 */
+
+		for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) {
+			if (cpu->core.family == tbl->id) {
+				cpu->release = tbl->str;
+				break;
+			}
+		}
+
+		if (is_isa_arcompact())
+			cpu->name = "ARC700";
+		else if (tbl->str)
+			cpu->name = "HS38";
+		else
+			cpu->name = cpu->release = "Unknown";
+
+		return;
+	}
+
+	/*
+	 * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
+	 * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
+	 * releases only update it.
+	 */
+	READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
+
+	if (uarch.prod == 4) {
+		cpu->name = "HS48";
+		cpu->extn.dual = 1;
+
+	} else {
+		cpu->name = "HS38";
+	}
+
+	for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
+		if (uarch.maj == tbl->id) {
+			cpu->release = tbl->str;
+			break;
+		}
+	}
+}
+
+static void read_arc_build_cfg_regs(void)
+{
+	struct bcr_timer timer;
+	struct bcr_generic bcr;
+	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+	struct bcr_isa_arcv2 isa;
+	struct bcr_actionpoint ap;
+
+	FIX_PTR(cpu);
+
+	READ_BCR(AUX_IDENTITY, cpu->core);
+	decode_arc_core(cpu);
+
+	READ_BCR(ARC_REG_TIMERS_BCR, timer);
+	cpu->extn.timer0 = timer.t0;
+	cpu->extn.timer1 = timer.t1;
+	cpu->extn.rtc = timer.rtc;
+
+	cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
+
+	READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
+
+	/* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
+	read_decode_ccm_bcr(cpu);
+
+	read_decode_mmu_bcr();
+	read_decode_cache_bcr();
+
+	if (is_isa_arcompact()) {
+		struct bcr_fp_arcompact sp, dp;
+		struct bcr_bpu_arcompact bpu;
+
+		READ_BCR(ARC_REG_FP_BCR, sp);
+		READ_BCR(ARC_REG_DPFP_BCR, dp);
+		cpu->extn.fpu_sp = sp.ver ? 1 : 0;
+		cpu->extn.fpu_dp = dp.ver ? 1 : 0;
+
+		READ_BCR(ARC_REG_BPU_BCR, bpu);
+		cpu->bpu.ver = bpu.ver;
+		cpu->bpu.full = bpu.fam ? 1 : 0;
+		if (bpu.ent) {
+			cpu->bpu.num_cache = 256 << (bpu.ent - 1);
+			cpu->bpu.num_pred = 256 << (bpu.ent - 1);
+		}
+	} else {
+		struct bcr_fp_arcv2 spdp;
+		struct bcr_bpu_arcv2 bpu;
+
+		READ_BCR(ARC_REG_FP_V2_BCR, spdp);
+		cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
+		cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
+
+		READ_BCR(ARC_REG_BPU_BCR, bpu);
+		cpu->bpu.ver = bpu.ver;
+		cpu->bpu.full = bpu.ft;
+		cpu->bpu.num_cache = 256 << bpu.bce;
+		cpu->bpu.num_pred = 2048 << bpu.pte;
+		cpu->bpu.ret_stk = 4 << bpu.rse;
+
+		/* if dual issue hardware, is it enabled ? */
+		if (cpu->extn.dual) {
+			unsigned int exec_ctrl;
+
+			READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
+			cpu->extn.dual_enb = !(exec_ctrl & 1);
+		}
+	}
+
+	READ_BCR(ARC_REG_AP_BCR, ap);
+	if (ap.ver) {
+		cpu->extn.ap_num = 2 << ap.num;
+		cpu->extn.ap_full = !ap.min;
+	}
+
+	READ_BCR(ARC_REG_SMART_BCR, bcr);
+	cpu->extn.smart = bcr.ver ? 1 : 0;
+
+	READ_BCR(ARC_REG_RTT_BCR, bcr);
+	cpu->extn.rtt = bcr.ver ? 1 : 0;
+
+	READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
+
+	/* some hacks for lack of feature BCR info in old ARC700 cores */
+	if (is_isa_arcompact()) {
+		if (!isa.ver)	/* ISA BCR absent, use Kconfig info */
+			cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+		else {
+			/* ARC700_BUILD only has 2 bits of isa info */
+			struct bcr_generic bcr = *(struct bcr_generic *)&isa;
+			cpu->isa.atomic = bcr.info & 1;
+		}
+
+		cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+
+		 /* there's no direct way to distinguish 750 vs. 770 */
+		if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
+			cpu->name = "ARC750";
+	} else {
+		cpu->isa = isa;
+	}
+}
+
+static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
+{
+	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+	struct bcr_identity *core = &cpu->core;
+	char mpy_opt[16];
+	int n = 0;
+
+	FIX_PTR(cpu);
+
+	n += scnprintf(buf + n, len - n,
+		       "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
+		       core->family, core->cpu_id, core->chip_id);
+
+	n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
+		       cpu_id, cpu->name, cpu->release,
+		       is_isa_arcompact() ? "ARCompact" : "ARCv2",
+		       IS_AVAIL1(cpu->isa.be, "[Big-Endian]"),
+		       IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue "));
+
+	n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ",
+		       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
+		       IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
+		       IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+		       IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT));
+
+	if (cpu->extn_mpy.ver) {
+		if (is_isa_arcompact()) {
+			scnprintf(mpy_opt, 16, "mpy");
+		} else {
+
+			int opt = 2;	/* stock MPY/MPYH */
+
+			if (cpu->extn_mpy.dsp)	/* OPT 7-9 */
+				opt = cpu->extn_mpy.dsp + 6;
+
+			scnprintf(mpy_opt, 16, "mpy[opt %d] ", opt);
+		}
+	}
+
+	n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
+		       IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+		       IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+		       IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
+		       IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt),
+		       IS_AVAIL1(cpu->isa.div_rem, "div_rem "));
+
+	if (cpu->bpu.ver) {
+		n += scnprintf(buf + n, len - n,
+			      "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
+			      IS_AVAIL1(cpu->bpu.full, "full"),
+			      IS_AVAIL1(!cpu->bpu.full, "partial"),
+			      cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
+
+		if (is_isa_arcv2()) {
+			struct bcr_lpb lpb;
+
+			READ_BCR(ARC_REG_LPB_BUILD, lpb);
+			if (lpb.ver) {
+				unsigned int ctl;
+				ctl = read_aux_reg(ARC_REG_LPB_CTRL);
+
+				n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s",
+					       lpb.entries,
+					       IS_DISABLED_RUN(!ctl));
+			}
+		}
+		n += scnprintf(buf + n, len - n, "\n");
+	}
+
+	return buf;
+}
+
+static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
+{
+	int n = 0;
+	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+
+	FIX_PTR(cpu);
+
+	n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
+
+	if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
+		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
+			       IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
+			       IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
+
+	if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
+		n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
+			       IS_AVAIL1(cpu->extn.smart, "smaRT "),
+			       IS_AVAIL1(cpu->extn.rtt, "RTT "));
+		if (cpu->extn.ap_num) {
+			n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
+				       cpu->extn.ap_num,
+				       cpu->extn.ap_full ? "full":"min");
+		}
+		n += scnprintf(buf + n, len - n, "\n");
+	}
+
+	if (cpu->dccm.sz || cpu->iccm.sz)
+		n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
+			       cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
+			       cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
+
+	if (is_isa_arcv2()) {
+
+		/* Error Protection: ECC/Parity */
+		struct bcr_erp erp;
+		READ_BCR(ARC_REG_ERP_BUILD, erp);
+
+		if (erp.ver) {
+			struct  ctl_erp ctl;
+			READ_BCR(ARC_REG_ERP_CTRL, ctl);
+
+			/* inverted bits: 0 means enabled */
+			n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
+				IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
+				IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
+				IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
+		}
+	}
+
+	return buf;
+}
+
+void chk_opt_strict(char *opt_name, bool hw_exists, bool opt_ena)
+{
+	if (hw_exists && !opt_ena)
+		pr_warn(" ! Enable %s for working apps\n", opt_name);
+	else if (!hw_exists && opt_ena)
+		panic("Disable %s, hardware NOT present\n", opt_name);
+}
+
+void chk_opt_weak(char *opt_name, bool hw_exists, bool opt_ena)
+{
+	if (!hw_exists && opt_ena)
+		panic("Disable %s, hardware NOT present\n", opt_name);
+}
+
+static void arc_chk_core_config(void)
+{
+	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+	int present = 0;
+
+	if (!cpu->extn.timer0)
+		panic("Timer0 is not present!\n");
+
+	if (!cpu->extn.timer1)
+		panic("Timer1 is not present!\n");
+
+#ifdef CONFIG_ARC_HAS_DCCM
+	/*
+	 * DCCM can be arbit placed in hardware.
+	 * Make sure it's placement/sz matches what Linux is built with
+	 */
+	if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr)
+		panic("Linux built with incorrect DCCM Base address\n");
+
+	if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz)
+		panic("Linux built with incorrect DCCM Size\n");
+#endif
+
+#ifdef CONFIG_ARC_HAS_ICCM
+	if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz)
+		panic("Linux built with incorrect ICCM Size\n");
+#endif
+
+	/*
+	 * FP hardware/software config sanity
+	 * -If hardware present, kernel needs to save/restore FPU state
+	 * -If not, it will crash trying to save/restore the non-existant regs
+	 */
+
+	if (is_isa_arcompact()) {
+		/* only DPDP checked since SP has no arch visible regs */
+		present = cpu->extn.fpu_dp;
+		CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
+	} else {
+		/* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */
+		present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp;
+		CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
+
+		dsp_config_check();
+	}
+}
+
+/*
+ * Initialize and setup the processor core
+ * This is called by all the CPUs thus should not do special case stuff
+ *    such as only for boot CPU etc
+ */
+
+void setup_processor(void)
+{
+	char str[512];
+	int cpu_id = smp_processor_id();
+
+	read_arc_build_cfg_regs();
+	arc_init_IRQ();
+
+	pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+
+	arc_mmu_init();
+	arc_cache_init();
+
+	pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
+	pr_info("%s", arc_platform_smp_cpuinfo());
+
+	arc_chk_core_config();
+}
+
+static inline bool uboot_arg_invalid(unsigned long addr)
+{
+	/*
+	 * Check that it is a untranslated address (although MMU is not enabled
+	 * yet, it being a high address ensures this is not by fluke)
+	 */
+	if (addr < PAGE_OFFSET)
+		return true;
+
+	/* Check that address doesn't clobber resident kernel image */
+	return addr >= (unsigned long)_stext && addr <= (unsigned long)_end;
+}
+
+#define IGNORE_ARGS		"Ignore U-boot args: "
+
+/* uboot_tag values for U-boot - kernel ABI revision 0; see head.S */
+#define UBOOT_TAG_NONE		0
+#define UBOOT_TAG_CMDLINE	1
+#define UBOOT_TAG_DTB		2
+/* We always pass 0 as magic from U-boot */
+#define UBOOT_MAGIC_VALUE	0
+
+void __init handle_uboot_args(void)
+{
+	bool use_embedded_dtb = true;
+	bool append_cmdline = false;
+
+	/* check that we know this tag */
+	if (uboot_tag != UBOOT_TAG_NONE &&
+	    uboot_tag != UBOOT_TAG_CMDLINE &&
+	    uboot_tag != UBOOT_TAG_DTB) {
+		pr_warn(IGNORE_ARGS "invalid uboot tag: '%08x'\n", uboot_tag);
+		goto ignore_uboot_args;
+	}
+
+	if (uboot_magic != UBOOT_MAGIC_VALUE) {
+		pr_warn(IGNORE_ARGS "non zero uboot magic\n");
+		goto ignore_uboot_args;
+	}
+
+	if (uboot_tag != UBOOT_TAG_NONE &&
+            uboot_arg_invalid((unsigned long)uboot_arg)) {
+		pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg);
+		goto ignore_uboot_args;
+	}
+
+	/* see if U-boot passed an external Device Tree blob */
+	if (uboot_tag == UBOOT_TAG_DTB) {
+		machine_desc = setup_machine_fdt((void *)uboot_arg);
+
+		/* external Device Tree blob is invalid - use embedded one */
+		use_embedded_dtb = !machine_desc;
+	}
+
+	if (uboot_tag == UBOOT_TAG_CMDLINE)
+		append_cmdline = true;
+
+ignore_uboot_args:
+
+	if (use_embedded_dtb) {
+		machine_desc = setup_machine_fdt(__dtb_start);
+		if (!machine_desc)
+			panic("Embedded DT invalid\n");
+	}
+
+	/*
+	 * NOTE: @boot_command_line is populated by setup_machine_fdt() so this
+	 * append processing can only happen after.
+	 */
+	if (append_cmdline) {
+		/* Ensure a whitespace between the 2 cmdlines */
+		strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		strlcat(boot_command_line, uboot_arg, COMMAND_LINE_SIZE);
+	}
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	handle_uboot_args();
+
+	/* Save unparsed command line copy for /proc/cmdline */
+	*cmdline_p = boot_command_line;
+
+	/* To force early parsing of things like mem=xxx */
+	parse_early_param();
+
+	/* Platform/board specific: e.g. early console registration */
+	if (machine_desc->init_early)
+		machine_desc->init_early();
+
+	smp_init_cpus();
+
+	setup_processor();
+	setup_arch_memory();
+
+	/* copy flat DT out of .init and then unflatten it */
+	unflatten_and_copy_device_tree();
+
+	/* Can be issue if someone passes cmd line arg "ro"
+	 * But that is unlikely so keeping it as it is
+	 */
+	root_mountflags &= ~MS_RDONLY;
+
+	arc_unwind_init();
+}
+
+/*
+ * Called from start_kernel() - boot CPU only
+ */
+void __init time_init(void)
+{
+	of_clk_init(NULL);
+	timer_probe();
+}
+
+static int __init customize_machine(void)
+{
+	if (machine_desc->init_machine)
+		machine_desc->init_machine();
+
+	return 0;
+}
+arch_initcall(customize_machine);
+
+static int __init init_late_machine(void)
+{
+	if (machine_desc->init_late)
+		machine_desc->init_late();
+
+	return 0;
+}
+late_initcall(init_late_machine);
+/*
+ *  Get CPU information for use by the procfs.
+ */
+
+#define cpu_to_ptr(c)	((void *)(0xFFFF0000 | (unsigned int)(c)))
+#define ptr_to_cpu(p)	(~0xFFFF0000UL & (unsigned int)(p))
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	char *str;
+	int cpu_id = ptr_to_cpu(v);
+	struct device *cpu_dev = get_cpu_device(cpu_id);
+	struct clk *cpu_clk;
+	unsigned long freq = 0;
+
+	if (!cpu_online(cpu_id)) {
+		seq_printf(m, "processor [%d]\t: Offline\n", cpu_id);
+		goto done;
+	}
+
+	str = (char *)__get_free_page(GFP_KERNEL);
+	if (!str)
+		goto done;
+
+	seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+
+	cpu_clk = clk_get(cpu_dev, NULL);
+	if (IS_ERR(cpu_clk)) {
+		seq_printf(m, "CPU speed \t: Cannot get clock for processor [%d]\n",
+			   cpu_id);
+	} else {
+		freq = clk_get_rate(cpu_clk);
+	}
+	if (freq)
+		seq_printf(m, "CPU speed\t: %lu.%02lu Mhz\n",
+			   freq / 1000000, (freq / 10000) % 100);
+
+	seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n",
+		   loops_per_jiffy / (500000 / HZ),
+		   (loops_per_jiffy / (5000 / HZ)) % 100);
+
+	seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+	seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
+	seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
+	seq_printf(m, arc_platform_smp_cpuinfo());
+
+	free_page((unsigned long)str);
+done:
+	seq_printf(m, "\n");
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	/*
+	 * Callback returns cpu-id to iterator for show routine, NULL to stop.
+	 * However since NULL is also a valid cpu-id (0), we use a round-about
+	 * way to pass it w/o having to kmalloc/free a 2 byte string.
+	 * Encode cpu-id as 0xFFcccc, which is decoded by show routine.
+	 */
+	return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo
+};
+
+static DEFINE_PER_CPU(struct cpu, cpu_topology);
+
+static int __init topology_init(void)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu)
+	    register_cpu(&per_cpu(cpu_topology, cpu), cpu);
+
+	return 0;
+}
+
+subsys_initcall(topology_init);
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
new file mode 100644
index 0000000000..cb2f88502b
--- /dev/null
+++ b/arch/arc/kernel/signal.c
@@ -0,0 +1,442 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Signal Handling for ARC
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * vineetg: Jan 2010 (Restarting of timer related syscalls)
+ *
+ * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
+ *  -do_signal() supports TIF_RESTORE_SIGMASK
+ *  -do_signal() no loner needs oldset, required by OLD sys_sigsuspend
+ *  -sys_rt_sigsuspend() now comes from generic code, so discard arch implemen
+ *  -sys_sigsuspend() no longer needs to fudge ptregs, hence that arg removed
+ *  -sys_sigsuspend() no longer loops for do_signal(), sets TIF_xxx and leaves
+ *   the job to do_signal()
+ *
+ * vineetg: July 2009
+ *  -Modified Code to support the uClibc provided userland sigreturn stub
+ *   to avoid kernel synthesing it on user stack at runtime, costing TLB
+ *   probes and Cache line flushes.
+ *
+ * vineetg: July 2009
+ *  -In stash_usr_regs( ) and restore_usr_regs( ), save/restore of user regs
+ *   in done in block copy rather than one word at a time.
+ *   This saves around 2K of code and improves LMBench lat_sig <catch>
+ *
+ * rajeshwarr: Feb 2009
+ *  - Support for Realtime Signals
+ *
+ * vineetg: Aug 11th 2008: Bug #94183
+ *  -ViXS were still seeing crashes when using insmod to load drivers.
+ *   It turned out that the code to change Execute permssions for TLB entries
+ *   of user was not guarded for interrupts (mod_tlb_permission)
+ *   This was causing TLB entries to be overwritten on unrelated indexes
+ *
+ * Vineetg: July 15th 2008: Bug #94183
+ *  -Exception happens in Delay slot of a JMP, and before user space resumes,
+ *   Signal is delivered (Ctrl + C) = >SIGINT.
+ *   setup_frame( ) sets up PC,SP,BLINK to enable user space signal handler
+ *   to run, but doesn't clear the Delay slot bit from status32. As a result,
+ *   on resuming user mode, signal handler branches off to BTA of orig JMP
+ *  -FIX: clear the DE bit from status32 in setup_frame( )
+ *
+ * Rahul Trivedi, Kanika Nema: Codito Technologies 2004
+ */
+
+#include <linux/signal.h>
+#include <linux/ptrace.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/ucontext.h>
+
+struct rt_sigframe {
+	struct siginfo info;
+	struct ucontext uc;
+#define MAGIC_SIGALTSTK		0x07302004
+	unsigned int sigret_magic;
+};
+
+static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+	int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+	struct user_regs_arcv2 v2abi;
+
+	v2abi.r30 = regs->r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	v2abi.r58 = regs->r58;
+	v2abi.r59 = regs->r59;
+#else
+	v2abi.r58 = v2abi.r59 = 0;
+#endif
+	err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi));
+#endif
+	return err;
+}
+
+static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+	int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+	struct user_regs_arcv2 v2abi;
+
+	err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi));
+
+	regs->r30 = v2abi.r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	regs->r58 = v2abi.r58;
+	regs->r59 = v2abi.r59;
+#endif
+#endif
+	return err;
+}
+
+static int
+stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
+	       sigset_t *set)
+{
+	int err;
+	struct user_regs_struct uregs;
+
+	uregs.scratch.bta	= regs->bta;
+	uregs.scratch.lp_start	= regs->lp_start;
+	uregs.scratch.lp_end	= regs->lp_end;
+	uregs.scratch.lp_count	= regs->lp_count;
+	uregs.scratch.status32	= regs->status32;
+	uregs.scratch.ret	= regs->ret;
+	uregs.scratch.blink	= regs->blink;
+	uregs.scratch.fp	= regs->fp;
+	uregs.scratch.gp	= regs->r26;
+	uregs.scratch.r12	= regs->r12;
+	uregs.scratch.r11	= regs->r11;
+	uregs.scratch.r10	= regs->r10;
+	uregs.scratch.r9	= regs->r9;
+	uregs.scratch.r8	= regs->r8;
+	uregs.scratch.r7	= regs->r7;
+	uregs.scratch.r6	= regs->r6;
+	uregs.scratch.r5	= regs->r5;
+	uregs.scratch.r4	= regs->r4;
+	uregs.scratch.r3	= regs->r3;
+	uregs.scratch.r2	= regs->r2;
+	uregs.scratch.r1	= regs->r1;
+	uregs.scratch.r0	= regs->r0;
+	uregs.scratch.sp	= regs->sp;
+
+	err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
+			     sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+	if (is_isa_arcv2())
+		err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
+	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
+
+	return err ? -EFAULT : 0;
+}
+
+static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
+{
+	sigset_t set;
+	int err;
+	struct user_regs_struct uregs;
+
+	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
+	err |= __copy_from_user(&uregs.scratch,
+				&(sf->uc.uc_mcontext.regs.scratch),
+				sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+	if (is_isa_arcv2())
+		err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
+	if (err)
+		return -EFAULT;
+
+	set_current_blocked(&set);
+	regs->bta	= uregs.scratch.bta;
+	regs->lp_start	= uregs.scratch.lp_start;
+	regs->lp_end	= uregs.scratch.lp_end;
+	regs->lp_count	= uregs.scratch.lp_count;
+	regs->status32	= uregs.scratch.status32;
+	regs->ret	= uregs.scratch.ret;
+	regs->blink	= uregs.scratch.blink;
+	regs->fp	= uregs.scratch.fp;
+	regs->r26	= uregs.scratch.gp;
+	regs->r12	= uregs.scratch.r12;
+	regs->r11	= uregs.scratch.r11;
+	regs->r10	= uregs.scratch.r10;
+	regs->r9	= uregs.scratch.r9;
+	regs->r8	= uregs.scratch.r8;
+	regs->r7	= uregs.scratch.r7;
+	regs->r6	= uregs.scratch.r6;
+	regs->r5	= uregs.scratch.r5;
+	regs->r4	= uregs.scratch.r4;
+	regs->r3	= uregs.scratch.r3;
+	regs->r2	= uregs.scratch.r2;
+	regs->r1	= uregs.scratch.r1;
+	regs->r0	= uregs.scratch.r0;
+	regs->sp	= uregs.scratch.sp;
+
+	return 0;
+}
+
+static inline int is_do_ss_needed(unsigned int magic)
+{
+	if (MAGIC_SIGALTSTK == magic)
+		return 1;
+	else
+		return 0;
+}
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct rt_sigframe __user *sf;
+	unsigned int magic;
+	struct pt_regs *regs = current_pt_regs();
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	/* Since we stacked the signal on a word boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->sp & 3)
+		goto badframe;
+
+	sf = (struct rt_sigframe __force __user *)(regs->sp);
+
+	if (!access_ok(sf, sizeof(*sf)))
+		goto badframe;
+
+	if (__get_user(magic, &sf->sigret_magic))
+		goto badframe;
+
+	if (unlikely(is_do_ss_needed(magic)))
+		if (restore_altstack(&sf->uc.uc_stack))
+			goto badframe;
+
+	if (restore_usr_regs(regs, sf))
+		goto badframe;
+
+	/* Don't restart from sigreturn */
+	syscall_wont_restart(regs);
+
+	/*
+	 * Ensure that sigreturn always returns to user mode (in case the
+	 * regs saved on user stack got fudged between save and sigreturn)
+	 * Otherwise it is easy to panic the kernel with a custom
+	 * signal handler and/or restorer which clobberes the status32/ret
+	 * to return to a bogus location in kernel mode.
+	 */
+	regs->status32 |= STATUS_U_MASK;
+
+	return regs->r0;
+
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *get_sigframe(struct ksignal *ksig,
+					struct pt_regs *regs,
+					unsigned long framesize)
+{
+	unsigned long sp = sigsp(regs->sp, ksig);
+	void __user *frame;
+
+	/* No matter what happens, 'sp' must be word
+	 * aligned otherwise nasty things could happen
+	 */
+
+	/* ATPCS B01 mandates 8-byte alignment */
+	frame = (void __user *)((sp - framesize) & ~7);
+
+	/* Check that we can actually write to the signal frame */
+	if (!access_ok(frame, framesize))
+		frame = NULL;
+
+	return frame;
+}
+
+static int
+setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *sf;
+	unsigned int magic = 0;
+	int err = 0;
+
+	sf = get_sigframe(ksig, regs, sizeof(struct rt_sigframe));
+	if (!sf)
+		return 1;
+
+	/*
+	 * w/o SA_SIGINFO, struct ucontext is partially populated (only
+	 * uc_mcontext/uc_sigmask) for kernel's normal user state preservation
+	 * during signal handler execution. This works for SA_SIGINFO as well
+	 * although the semantics are now overloaded (the same reg state can be
+	 * inspected by userland: but are they allowed to fiddle with it ?
+	 */
+	err |= stash_usr_regs(sf, regs, set);
+
+	/*
+	 * SA_SIGINFO requires 3 args to signal handler:
+	 *  #1: sig-no (common to any handler)
+	 *  #2: struct siginfo
+	 *  #3: struct ucontext (completely populated)
+	 */
+	if (unlikely(ksig->ka.sa.sa_flags & SA_SIGINFO)) {
+		err |= copy_siginfo_to_user(&sf->info, &ksig->info);
+		err |= __put_user(0, &sf->uc.uc_flags);
+		err |= __put_user(NULL, &sf->uc.uc_link);
+		err |= __save_altstack(&sf->uc.uc_stack, regs->sp);
+
+		/* setup args 2 and 3 for user mode handler */
+		regs->r1 = (unsigned long)&sf->info;
+		regs->r2 = (unsigned long)&sf->uc;
+
+		/*
+		 * small optim to avoid unconditionally calling do_sigaltstack
+		 * in sigreturn path, now that we only have rt_sigreturn
+		 */
+		magic = MAGIC_SIGALTSTK;
+	}
+
+	err |= __put_user(magic, &sf->sigret_magic);
+	if (err)
+		return err;
+
+	/* #1 arg to the user Signal handler */
+	regs->r0 = ksig->sig;
+
+	/* setup PC of user space signal handler */
+	regs->ret = (unsigned long)ksig->ka.sa.sa_handler;
+
+	/*
+	 * handler returns using sigreturn stub provided already by userpsace
+	 * If not, nuke the process right away
+	 */
+	if(!(ksig->ka.sa.sa_flags & SA_RESTORER))
+		return 1;
+
+	regs->blink = (unsigned long)ksig->ka.sa.sa_restorer;
+
+	/* User Stack for signal handler will be above the frame just carved */
+	regs->sp = (unsigned long)sf;
+
+	/*
+	 * Bug 94183, Clear the DE bit, so that when signal handler
+	 * starts to run, it doesn't use BTA
+	 */
+	regs->status32 &= ~STATUS_DE_MASK;
+	regs->status32 |= STATUS_L_MASK;
+
+	return err;
+}
+
+static void arc_restart_syscall(struct k_sigaction *ka, struct pt_regs *regs)
+{
+	switch (regs->r0) {
+	case -ERESTART_RESTARTBLOCK:
+	case -ERESTARTNOHAND:
+		/*
+		 * ERESTARTNOHAND means that the syscall should
+		 * only be restarted if there was no handler for
+		 * the signal, and since we only get here if there
+		 * is a handler, we don't restart
+		 */
+		regs->r0 = -EINTR;   /* ERESTART_xxx is internal */
+		break;
+
+	case -ERESTARTSYS:
+		/*
+		 * ERESTARTSYS means to restart the syscall if
+		 * there is no handler or the handler was
+		 * registered with SA_RESTART
+		 */
+		if (!(ka->sa.sa_flags & SA_RESTART)) {
+			regs->r0 = -EINTR;
+			break;
+		}
+		fallthrough;
+
+	case -ERESTARTNOINTR:
+		/*
+		 * ERESTARTNOINTR means that the syscall should
+		 * be called again after the signal handler returns.
+		 * Setup reg state just as it was before doing the trap
+		 * r0 has been clobbered with sys call ret code thus it
+		 * needs to be reloaded with orig first arg to syscall
+		 * in orig_r0. Rest of relevant reg-file:
+		 * r8 (syscall num) and (r1 - r7) will be reset to
+		 * their orig user space value when we ret from kernel
+		 */
+		regs->r0 = regs->orig_r0;
+		regs->ret -= is_isa_arcv2() ? 2 : 4;
+		break;
+	}
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+static void
+handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int failed;
+
+	/* Set up the stack frame */
+	failed = setup_rt_frame(ksig, oldset, regs);
+
+	signal_setup_done(failed, ksig, 0);
+}
+
+void do_signal(struct pt_regs *regs)
+{
+	struct ksignal ksig;
+	int restart_scall;
+
+	restart_scall = in_syscall(regs) && syscall_restartable(regs);
+
+	if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) {
+		if (restart_scall) {
+			arc_restart_syscall(&ksig.ka, regs);
+			syscall_wont_restart(regs);	/* No more restarts */
+		}
+		handle_signal(&ksig, regs);
+		return;
+	}
+
+	if (restart_scall) {
+		/* No handler for syscall: restart it */
+		if (regs->r0 == -ERESTARTNOHAND ||
+		    regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) {
+			regs->r0 = regs->orig_r0;
+			regs->ret -= is_isa_arcv2() ? 2 : 4;
+		} else if (regs->r0 == -ERESTART_RESTARTBLOCK) {
+			regs->r8 = __NR_restart_syscall;
+			regs->ret -= is_isa_arcv2() ? 2 : 4;
+		}
+		syscall_wont_restart(regs);	/* No more restarts */
+	}
+
+	/* If there's no signal to deliver, restore the saved sigmask back */
+	restore_saved_sigmask();
+}
+
+void do_notify_resume(struct pt_regs *regs)
+{
+	/*
+	 * ASM glue guarantees that this is only called when returning to
+	 * user mode
+	 */
+	if (test_thread_flag(TIF_NOTIFY_RESUME))
+		tracehook_notify_resume(regs);
+}
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
new file mode 100644
index 0000000000..78e6d069b1
--- /dev/null
+++ b/arch/arc/kernel/smp.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * RajeshwarR: Dec 11, 2007
+ *   -- Added support for Inter Processor Interrupts
+ *
+ * Vineetg: Nov 1st, 2007
+ *    -- Initial Write (Borrowed heavily from ARM)
+ */
+
+#include <linux/spinlock.h>
+#include <linux/sched/mm.h>
+#include <linux/interrupt.h>
+#include <linux/profile.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/atomic.h>
+#include <linux/cpumask.h>
+#include <linux/reboot.h>
+#include <linux/irqdomain.h>
+#include <linux/export.h>
+#include <linux/of_fdt.h>
+
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/mach_desc.h>
+
+#ifndef CONFIG_ARC_HAS_LLSC
+arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
+#endif
+
+struct plat_smp_ops  __weak plat_smp_ops;
+
+/* XXX: per cpu ? Only needed once in early seconday boot */
+struct task_struct *secondary_idle_tsk;
+
+/* Called from start_kernel */
+void __init smp_prepare_boot_cpu(void)
+{
+}
+
+static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask)
+{
+	unsigned long dt_root = of_get_flat_dt_root();
+	const char *buf;
+
+	buf = of_get_flat_dt_prop(dt_root, name, NULL);
+	if (!buf)
+		return -EINVAL;
+
+	if (cpulist_parse(buf, cpumask))
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Read from DeviceTree and setup cpu possible mask. If there is no
+ * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist.
+ */
+static void __init arc_init_cpu_possible(void)
+{
+	struct cpumask cpumask;
+
+	if (arc_get_cpu_map("possible-cpus", &cpumask)) {
+		pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n",
+			NR_CPUS);
+
+		cpumask_setall(&cpumask);
+	}
+
+	if (!cpumask_test_cpu(0, &cpumask))
+		panic("Master cpu (cpu[0]) is missed in cpu possible mask!");
+
+	init_cpu_possible(&cpumask);
+}
+
+/*
+ * Called from setup_arch() before calling setup_processor()
+ *
+ * - Initialise the CPU possible map early - this describes the CPUs
+ *   which may be present or become present in the system.
+ * - Call early smp init hook. This can initialize a specific multi-core
+ *   IP which is say common to several platforms (hence not part of
+ *   platform specific int_early() hook)
+ */
+void __init smp_init_cpus(void)
+{
+	arc_init_cpu_possible();
+
+	if (plat_smp_ops.init_early_smp)
+		plat_smp_ops.init_early_smp();
+}
+
+/* called from init ( ) =>  process 1 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	/*
+	 * if platform didn't set the present map already, do it now
+	 * boot cpu is set to present already by init/main.c
+	 */
+	if (num_present_cpus() <= 1)
+		init_cpu_present(cpu_possible_mask);
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+
+}
+
+/*
+ * Default smp boot helper for Run-on-reset case where all cores start off
+ * together. Non-masters need to wait for Master to start running.
+ * This is implemented using a flag in memory, which Non-masters spin-wait on.
+ * Master sets it to cpu-id of core to "ungate" it.
+ */
+static volatile int wake_flag;
+
+#ifdef CONFIG_ISA_ARCOMPACT
+
+#define __boot_read(f)		f
+#define __boot_write(f, v)	f = v
+
+#else
+
+#define __boot_read(f)		arc_read_uncached_32(&f)
+#define __boot_write(f, v)	arc_write_uncached_32(&f, v)
+
+#endif
+
+static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
+{
+	BUG_ON(cpu == 0);
+
+	__boot_write(wake_flag, cpu);
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+	/* for halt-on-reset, we've waited already */
+	if (IS_ENABLED(CONFIG_ARC_SMP_HALT_ON_RESET))
+		return;
+
+	while (__boot_read(wake_flag) != cpu)
+		;
+
+	__boot_write(wake_flag, 0);
+}
+
+const char *arc_platform_smp_cpuinfo(void)
+{
+	return plat_smp_ops.info ? : "";
+}
+
+/*
+ * The very first "C" code executed by secondary
+ * Called from asm stub in head.S
+ * "current"/R25 already setup by low level boot code
+ */
+void start_kernel_secondary(void)
+{
+	struct mm_struct *mm = &init_mm;
+	unsigned int cpu = smp_processor_id();
+
+	/* MMU, Caches, Vector Table, Interrupts etc */
+	setup_processor();
+
+	mmget(mm);
+	mmgrab(mm);
+	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+
+	/* Some SMP H/w setup - for each cpu */
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(cpu);
+
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(cpu);
+
+	notify_cpu_starting(cpu);
+	set_cpu_online(cpu, true);
+
+	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
+
+	local_irq_enable();
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+/*
+ * Called from kernel_init( ) -> smp_init( ) - for each CPU
+ *
+ * At this point, Secondary Processor  is "HALT"ed:
+ *  -It booted, but was halted in head.S
+ *  -It was configured to halt-on-reset
+ *  So need to wake it up.
+ *
+ * Essential requirements being where to run from (PC) and stack (SP)
+*/
+int __cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned long wait_till;
+
+	secondary_idle_tsk = idle;
+
+	pr_info("Idle Task [%d] %p", cpu, idle);
+	pr_info("Trying to bring up CPU%u ...\n", cpu);
+
+	if (plat_smp_ops.cpu_kick)
+		plat_smp_ops.cpu_kick(cpu,
+				(unsigned long)first_lines_of_secondary);
+	else
+		arc_default_smp_cpu_kick(cpu, (unsigned long)NULL);
+
+	/* wait for 1 sec after kicking the secondary */
+	wait_till = jiffies + HZ;
+	while (time_before(jiffies, wait_till)) {
+		if (cpu_online(cpu))
+			break;
+	}
+
+	if (!cpu_online(cpu)) {
+		pr_info("Timeout: CPU%u FAILED to come up !!!\n", cpu);
+		return -1;
+	}
+
+	secondary_idle_tsk = NULL;
+
+	return 0;
+}
+
+/*
+ * not supported here
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+/*****************************************************************************/
+/*              Inter Processor Interrupt Handling                           */
+/*****************************************************************************/
+
+enum ipi_msg_type {
+	IPI_EMPTY = 0,
+	IPI_RESCHEDULE = 1,
+	IPI_CALL_FUNC,
+	IPI_CPU_STOP,
+};
+
+/*
+ * In arches with IRQ for each msg type (above), receiver can use IRQ-id  to
+ * figure out what msg was sent. For those which don't (ARC has dedicated IPI
+ * IRQ), the msg-type needs to be conveyed via per-cpu data
+ */
+
+static DEFINE_PER_CPU(unsigned long, ipi_data);
+
+static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
+{
+	unsigned long __percpu *ipi_data_ptr = per_cpu_ptr(&ipi_data, cpu);
+	unsigned long old, new;
+	unsigned long flags;
+
+	pr_debug("%d Sending msg [%d] to %d\n", smp_processor_id(), msg, cpu);
+
+	local_irq_save(flags);
+
+	/*
+	 * Atomically write new msg bit (in case others are writing too),
+	 * and read back old value
+	 */
+	do {
+		new = old = READ_ONCE(*ipi_data_ptr);
+		new |= 1U << msg;
+	} while (cmpxchg(ipi_data_ptr, old, new) != old);
+
+	/*
+	 * Call the platform specific IPI kick function, but avoid if possible:
+	 * Only do so if there's no pending msg from other concurrent sender(s).
+	 * Otherwise, receiver will see this msg as well when it takes the
+	 * IPI corresponding to that msg. This is true, even if it is already in
+	 * IPI handler, because !@old means it has not yet dequeued the msg(s)
+	 * so @new msg can be a free-loader
+	 */
+	if (plat_smp_ops.ipi_send && !old)
+		plat_smp_ops.ipi_send(cpu);
+
+	local_irq_restore(flags);
+}
+
+static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, callmap)
+		ipi_send_msg_one(cpu, msg);
+}
+
+void smp_send_reschedule(int cpu)
+{
+	ipi_send_msg_one(cpu, IPI_RESCHEDULE);
+}
+
+void smp_send_stop(void)
+{
+	struct cpumask targets;
+	cpumask_copy(&targets, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &targets);
+	ipi_send_msg(&targets, IPI_CPU_STOP);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	ipi_send_msg_one(cpu, IPI_CALL_FUNC);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	ipi_send_msg(mask, IPI_CALL_FUNC);
+}
+
+/*
+ * ipi_cpu_stop - handle IPI from smp_send_stop()
+ */
+static void ipi_cpu_stop(void)
+{
+	machine_halt();
+}
+
+static inline int __do_IPI(unsigned long msg)
+{
+	int rc = 0;
+
+	switch (msg) {
+	case IPI_RESCHEDULE:
+		scheduler_ipi();
+		break;
+
+	case IPI_CALL_FUNC:
+		generic_smp_call_function_interrupt();
+		break;
+
+	case IPI_CPU_STOP:
+		ipi_cpu_stop();
+		break;
+
+	default:
+		rc = 1;
+	}
+
+	return rc;
+}
+
+/*
+ * arch-common ISR to handle for inter-processor interrupts
+ * Has hooks for platform specific IPI
+ */
+irqreturn_t do_IPI(int irq, void *dev_id)
+{
+	unsigned long pending;
+	unsigned long __maybe_unused copy;
+
+	pr_debug("IPI [%ld] received on cpu %d\n",
+		 *this_cpu_ptr(&ipi_data), smp_processor_id());
+
+	if (plat_smp_ops.ipi_clear)
+		plat_smp_ops.ipi_clear(irq);
+
+	/*
+	 * "dequeue" the msg corresponding to this IPI (and possibly other
+	 * piggybacked msg from elided IPIs: see ipi_send_msg_one() above)
+	 */
+	copy = pending = xchg(this_cpu_ptr(&ipi_data), 0);
+
+	do {
+		unsigned long msg = __ffs(pending);
+		int rc;
+
+		rc = __do_IPI(msg);
+		if (rc)
+			pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
+		pending &= ~(1U << msg);
+	} while (pending);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * API called by platform code to hookup arch-common ISR to their IPI IRQ
+ *
+ * Note: If IPI is provided by platform (vs. say ARC MCIP), their intc setup/map
+ * function needs to call call irq_set_percpu_devid() for IPI IRQ, otherwise
+ * request_percpu_irq() below will fail
+ */
+static DEFINE_PER_CPU(int, ipi_dev);
+
+int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq)
+{
+	int *dev = per_cpu_ptr(&ipi_dev, cpu);
+	unsigned int virq = irq_find_mapping(NULL, hwirq);
+
+	if (!virq)
+		panic("Cannot find virq for root domain and hwirq=%lu", hwirq);
+
+	/* Boot cpu calls request, all call enable */
+	if (!cpu) {
+		int rc;
+
+		rc = request_percpu_irq(virq, do_IPI, "IPI Interrupt", dev);
+		if (rc)
+			panic("Percpu IRQ request failed for %u\n", virq);
+	}
+
+	enable_percpu_irq(virq, 0);
+
+	return 0;
+}
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
new file mode 100644
index 0000000000..5372dc04e7
--- /dev/null
+++ b/arch/arc/kernel/stacktrace.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *	stacktrace.c : stacktracing APIs needed by rest of kernel
+ *			(wrappers over ARC dwarf based unwinder)
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ *  vineetg: aug 2009
+ *  -Implemented CONFIG_STACKTRACE APIs, primarily save_stack_trace_tsk( )
+ *   for displaying task's kernel mode call stack in /proc/<pid>/stack
+ *  -Iterator based approach to have single copy of unwinding core and APIs
+ *   needing unwinding, implement the logic in iterator regarding:
+ *      = which frame onwards to start capture
+ *      = which frame to stop capturing (wchan)
+ *      = specifics of data structs where trace is saved(CONFIG_STACKTRACE etc)
+ *
+ *  vineetg: March 2009
+ *  -Implemented correct versions of thread_saved_pc() and __get_wchan()
+ *
+ *  rajeshwarr: 2008
+ *  -Initial implementation
+ */
+
+#include <linux/ptrace.h>
+#include <linux/export.h>
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/sched/debug.h>
+
+#include <asm/arcregs.h>
+#include <asm/unwind.h>
+#include <asm/switch_to.h>
+
+/*-------------------------------------------------------------------------
+ *              Unwinder Iterator
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef CONFIG_ARC_DW2_UNWIND
+
+static int
+seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
+		       struct unwind_frame_info *frame_info)
+{
+	if (regs) {
+		/*
+		 * Asynchronous unwinding of intr/exception
+		 *  - Just uses the pt_regs passed
+		 */
+		frame_info->task = tsk;
+
+		frame_info->regs.r27 = regs->fp;
+		frame_info->regs.r28 = regs->sp;
+		frame_info->regs.r31 = regs->blink;
+		frame_info->regs.r63 = regs->ret;
+		frame_info->call_frame = 0;
+	} else if (tsk == NULL || tsk == current) {
+		/*
+		 * synchronous unwinding (e.g. dump_stack)
+		 *  - uses current values of SP and friends
+		 */
+		unsigned long fp, sp, blink, ret;
+		frame_info->task = current;
+
+		__asm__ __volatile__(
+			"mov %0,r27\n\t"
+			"mov %1,r28\n\t"
+			"mov %2,r31\n\t"
+			"mov %3,r63\n\t"
+			: "=r"(fp), "=r"(sp), "=r"(blink), "=r"(ret)
+		);
+
+		frame_info->regs.r27 = fp;
+		frame_info->regs.r28 = sp;
+		frame_info->regs.r31 = blink;
+		frame_info->regs.r63 = ret;
+		frame_info->call_frame = 0;
+	} else {
+		/*
+		 * Asynchronous unwinding of a likely sleeping task
+		 *  - first ensure it is actually sleeping
+		 *  - if so, it will be in __switch_to, kernel mode SP of task
+		 *    is safe-kept and BLINK at a well known location in there
+		 */
+
+		if (task_is_running(tsk))
+			return -1;
+
+		frame_info->task = tsk;
+
+		frame_info->regs.r27 = TSK_K_FP(tsk);
+		frame_info->regs.r28 = TSK_K_ESP(tsk);
+		frame_info->regs.r31 = TSK_K_BLINK(tsk);
+		frame_info->regs.r63 = (unsigned int)__switch_to;
+
+		/* In the prologue of __switch_to, first FP is saved on stack
+		 * and then SP is copied to FP. Dwarf assumes cfa as FP based
+		 * but we didn't save FP. The value retrieved above is FP's
+		 * state in previous frame.
+		 * As a work around for this, we unwind from __switch_to start
+		 * and adjust SP accordingly. The other limitation is that
+		 * __switch_to macro is dwarf rules are not generated for inline
+		 * assembly code
+		 */
+		frame_info->regs.r27 = 0;
+		frame_info->regs.r28 += 60;
+		frame_info->call_frame = 0;
+
+	}
+	return 0;
+}
+
+#endif
+
+notrace noinline unsigned int
+arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
+		int (*consumer_fn) (unsigned int, void *), void *arg)
+{
+#ifdef CONFIG_ARC_DW2_UNWIND
+	int ret = 0, cnt = 0;
+	unsigned int address;
+	struct unwind_frame_info frame_info;
+
+	if (seed_unwind_frame_info(tsk, regs, &frame_info))
+		return 0;
+
+	while (1) {
+		address = UNW_PC(&frame_info);
+
+		if (!address || !__kernel_text_address(address))
+			break;
+
+		if (consumer_fn(address, arg) == -1)
+			break;
+
+		ret = arc_unwind(&frame_info);
+		if (ret)
+			break;
+
+		frame_info.regs.r63 = frame_info.regs.r31;
+
+		if (cnt++ > 128) {
+			printk("unwinder looping too long, aborting !\n");
+			return 0;
+		}
+	}
+
+	return address;		/* return the last address it saw */
+#else
+	/* On ARC, only Dward based unwinder works. fp based backtracing is
+	 * not possible (-fno-omit-frame-pointer) because of the way function
+	 * prologue is setup (callee regs saved and then fp set and not other
+	 * way around
+	 */
+	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
+	return 0;
+
+#endif
+}
+
+/*-------------------------------------------------------------------------
+ * callbacks called by unwinder iterator to implement kernel APIs
+ *
+ * The callback can return -1 to force the iterator to stop, which by default
+ * keeps going till the bottom-most frame.
+ *-------------------------------------------------------------------------
+ */
+
+/* Call-back which plugs into unwinding core to dump the stack in
+ * case of panic/OOPs/BUG etc
+ */
+static int __print_sym(unsigned int address, void *arg)
+{
+	const char *loglvl = arg;
+
+	printk("%s  %pS\n", loglvl, (void *)address);
+	return 0;
+}
+
+#ifdef CONFIG_STACKTRACE
+
+/* Call-back which plugs into unwinding core to capture the
+ * traces needed by kernel on /proc/<pid>/stack
+ */
+static int __collect_all(unsigned int address, void *arg)
+{
+	struct stack_trace *trace = arg;
+
+	if (trace->skip > 0)
+		trace->skip--;
+	else
+		trace->entries[trace->nr_entries++] = address;
+
+	if (trace->nr_entries >= trace->max_entries)
+		return -1;
+
+	return 0;
+}
+
+static int __collect_all_but_sched(unsigned int address, void *arg)
+{
+	struct stack_trace *trace = arg;
+
+	if (in_sched_functions(address))
+		return 0;
+
+	if (trace->skip > 0)
+		trace->skip--;
+	else
+		trace->entries[trace->nr_entries++] = address;
+
+	if (trace->nr_entries >= trace->max_entries)
+		return -1;
+
+	return 0;
+}
+
+#endif
+
+static int __get_first_nonsched(unsigned int address, void *unused)
+{
+	if (in_sched_functions(address))
+		return 0;
+
+	return -1;
+}
+
+/*-------------------------------------------------------------------------
+ *              APIs expected by various kernel sub-systems
+ *-------------------------------------------------------------------------
+ */
+
+noinline void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs,
+			      const char *loglvl)
+{
+	printk("%s\nStack Trace:\n", loglvl);
+	arc_unwind_core(tsk, regs, __print_sym, (void *)loglvl);
+}
+EXPORT_SYMBOL(show_stacktrace);
+
+/* Expected by sched Code */
+void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
+{
+	show_stacktrace(tsk, NULL, loglvl);
+}
+
+/* Another API expected by schedular, shows up in "ps" as Wait Channel
+ * Of course just returning schedule( ) would be pointless so unwind until
+ * the function is not in schedular code
+ */
+unsigned int __get_wchan(struct task_struct *tsk)
+{
+	return arc_unwind_core(tsk, NULL, __get_first_nonsched, NULL);
+}
+
+#ifdef CONFIG_STACKTRACE
+
+/*
+ * API required by CONFIG_STACKTRACE, CONFIG_LATENCYTOP.
+ * A typical use is when /proc/<pid>/stack is queried by userland
+ */
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	/* Assumes @tsk is sleeping so unwinds from __switch_to */
+	arc_unwind_core(tsk, NULL, __collect_all_but_sched, trace);
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	/* Pass NULL for task so it unwinds the current call frame */
+	arc_unwind_core(NULL, NULL, __collect_all, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c
new file mode 100644
index 0000000000..1069446bdc
--- /dev/null
+++ b/arch/arc/kernel/sys.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/unistd.h>
+
+#include <asm/syscalls.h>
+
+#define sys_clone	sys_clone_wrapper
+#define sys_clone3	sys_clone3_wrapper
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+void *sys_call_table[NR_syscalls] = {
+	[0 ... NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
new file mode 100644
index 0000000000..6b83e3f2b4
--- /dev/null
+++ b/arch/arc/kernel/traps.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Traps/Non-MMU Exception handling for ARC
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * vineetg: May 2011
+ *  -user-space unaligned access emulation
+ *
+ * Rahul Trivedi: Codito Technologies 2004
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <linux/kprobes.h>
+#include <linux/kgdb.h>
+#include <asm/setup.h>
+#include <asm/unaligned.h>
+#include <asm/kprobes.h>
+
+void die(const char *str, struct pt_regs *regs, unsigned long address)
+{
+	show_kernel_fault_diag(str, regs, address);
+
+	/* DEAD END */
+	__asm__("flag 1");
+}
+
+/*
+ * Helper called for bulk of exceptions NOT needing specific handling
+ *  -for user faults enqueues requested signal
+ *  -for kernel, chk if due to copy_(to|from)_user, otherwise die()
+ */
+static noinline int
+unhandled_exception(const char *str, struct pt_regs *regs,
+		    int signo, int si_code, void __user *addr)
+{
+	if (user_mode(regs)) {
+		struct task_struct *tsk = current;
+
+		tsk->thread.fault_address = (__force unsigned int)addr;
+
+		force_sig_fault(signo, si_code, addr);
+
+	} else {
+		/* If not due to copy_(to|from)_user, we are doomed */
+		if (fixup_exception(regs))
+			return 0;
+
+		die(str, regs, (unsigned long)addr);
+	}
+
+	return 1;
+}
+
+#define DO_ERROR_INFO(signr, str, name, sicode) \
+int name(unsigned long address, struct pt_regs *regs) \
+{								\
+	return unhandled_exception(str, regs, signr, sicode,	\
+				   (void __user *)address);	\
+}
+
+/*
+ * Entry points for exceptions NOT needing specific handling
+ */
+DO_ERROR_INFO(SIGILL, "Priv Op/Disabled Extn", do_privilege_fault, ILL_PRVOPC)
+DO_ERROR_INFO(SIGILL, "Invalid Extn Insn", do_extension_fault, ILL_ILLOPC)
+DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
+DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
+DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
+DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
+DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0)
+
+/*
+ * Entry Point for Misaligned Data access Exception, for emulating in software
+ */
+int do_misaligned_access(unsigned long address, struct pt_regs *regs,
+			 struct callee_regs *cregs)
+{
+	/* If emulation not enabled, or failed, kill the task */
+	if (misaligned_fixup(address, regs, cregs) != 0)
+		return do_misaligned_error(address, regs);
+
+	return 0;
+}
+
+/*
+ * Entry point for miscll errors such as Nested Exceptions
+ *  -Duplicate TLB entry is handled seperately though
+ */
+void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
+{
+	die("Unhandled Machine Check Exception", regs, address);
+}
+
+
+/*
+ * Entry point for traps induced by ARCompact TRAP_S <n> insn
+ * This is same family as TRAP0/SWI insn (use the same vector).
+ * The only difference being SWI insn take no operand, while TRAP_S does
+ * which reflects in ECR Reg as 8 bit param.
+ * Thus TRAP_S <n> can be used for specific purpose
+ *  -1 used for software breakpointing (gdb)
+ *  -2 used by kprobes
+ *  -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as
+ *     -fno-isolate-erroneous-paths-dereference
+ */
+void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
+{
+	unsigned int param = regs->ecr_param;
+
+	switch (param) {
+	case 1:
+		trap_is_brkpt(address, regs);
+		break;
+
+	case 2:
+		trap_is_kprobe(address, regs);
+		break;
+
+	case 3:
+	case 4:
+		kgdb_trap(regs);
+		break;
+
+	case 5:
+		do_trap5_error(address, regs);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Entry point for Instruction Error Exception
+ *  -For a corner case, ARC kprobes implementation resorts to using
+ *   this exception, hence the check
+ */
+void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)
+{
+	int rc;
+
+	/* Check if this exception is caused by kprobes */
+	rc = notify_die(DIE_IERR, "kprobe_ierr", regs, address, 0, SIGILL);
+	if (rc == NOTIFY_STOP)
+		return;
+
+	insterror_is_error(address, regs);
+}
+
+/*
+ * abort() call generated by older gcc for __builtin_trap()
+ */
+void abort(void)
+{
+	__asm__ __volatile__("trap_s  5\n");
+}
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
new file mode 100644
index 0000000000..7654c2e42d
--- /dev/null
+++ b/arch/arc/kernel/troubleshoot.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
+
+#include <asm/arcregs.h>
+#include <asm/irqflags.h>
+
+#define ARC_PATH_MAX	256
+
+static noinline void print_regs_scratch(struct pt_regs *regs)
+{
+	pr_cont("BTA: 0x%08lx\n SP: 0x%08lx  FP: 0x%08lx BLK: %pS\n",
+		regs->bta, regs->sp, regs->fp, (void *)regs->blink);
+	pr_cont("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n",
+		regs->lp_start, regs->lp_end, regs->lp_count);
+
+	pr_info("r00: 0x%08lx\tr01: 0x%08lx\tr02: 0x%08lx\n"	\
+		"r03: 0x%08lx\tr04: 0x%08lx\tr05: 0x%08lx\n"	\
+		"r06: 0x%08lx\tr07: 0x%08lx\tr08: 0x%08lx\n"	\
+		"r09: 0x%08lx\tr10: 0x%08lx\tr11: 0x%08lx\n"	\
+		"r12: 0x%08lx\t",
+		regs->r0, regs->r1, regs->r2,
+		regs->r3, regs->r4, regs->r5,
+		regs->r6, regs->r7, regs->r8,
+		regs->r9, regs->r10, regs->r11,
+		regs->r12);
+}
+
+static void print_regs_callee(struct callee_regs *regs)
+{
+	pr_cont("r13: 0x%08lx\tr14: 0x%08lx\n"			\
+		"r15: 0x%08lx\tr16: 0x%08lx\tr17: 0x%08lx\n"	\
+		"r18: 0x%08lx\tr19: 0x%08lx\tr20: 0x%08lx\n"	\
+		"r21: 0x%08lx\tr22: 0x%08lx\tr23: 0x%08lx\n"	\
+		"r24: 0x%08lx\tr25: 0x%08lx\n",
+		regs->r13, regs->r14,
+		regs->r15, regs->r16, regs->r17,
+		regs->r18, regs->r19, regs->r20,
+		regs->r21, regs->r22, regs->r23,
+		regs->r24, regs->r25);
+}
+
+static void print_task_path_n_nm(struct task_struct *tsk)
+{
+	char *path_nm = NULL;
+	struct mm_struct *mm;
+	struct file *exe_file;
+	char buf[ARC_PATH_MAX];
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		goto done;
+
+	exe_file = get_mm_exe_file(mm);
+	mmput(mm);
+
+	if (exe_file) {
+		path_nm = file_path(exe_file, buf, ARC_PATH_MAX-1);
+		fput(exe_file);
+	}
+
+done:
+	pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
+}
+
+static void show_faulting_vma(unsigned long address)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *active_mm = current->active_mm;
+
+	/* can't use print_vma_addr() yet as it doesn't check for
+	 * non-inclusive vma
+	 */
+	mmap_read_lock(active_mm);
+	vma = vma_lookup(active_mm, address);
+
+	/* Lookup the vma at the address and report if the container VMA is not
+	 * found
+	 */
+	if (vma) {
+		char buf[ARC_PATH_MAX];
+		char *nm = "?";
+
+		if (vma->vm_file) {
+			nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
+			if (IS_ERR(nm))
+				nm = "?";
+		}
+		pr_info("  @off 0x%lx in [%s]  VMA: 0x%08lx to 0x%08lx\n",
+			vma->vm_start < TASK_UNMAPPED_BASE ?
+				address : address - vma->vm_start,
+			nm, vma->vm_start, vma->vm_end);
+	} else
+		pr_info("    @No matching VMA found\n");
+
+	mmap_read_unlock(active_mm);
+}
+
+static void show_ecr_verbose(struct pt_regs *regs)
+{
+	unsigned int vec, cause_code;
+	unsigned long address;
+
+	/* For Data fault, this is data address not instruction addr */
+	address = current->thread.fault_address;
+
+	vec = regs->ecr_vec;
+	cause_code = regs->ecr_cause;
+
+	/* For DTLB Miss or ProtV, display the memory involved too */
+	if (vec == ECR_V_DTLB_MISS) {
+		pr_cont("Invalid %s @ 0x%08lx by insn @ %pS\n",
+		       (cause_code == 0x01) ? "Read" :
+		       ((cause_code == 0x02) ? "Write" : "EX"),
+		       address, (void *)regs->ret);
+	} else if (vec == ECR_V_ITLB_MISS) {
+		pr_cont("Insn could not be fetched\n");
+	} else if (vec == ECR_V_MACH_CHK) {
+		pr_cont("Machine Check (%s)\n", (cause_code == 0x0) ?
+					"Double Fault" : "Other Fatal Err");
+
+	} else if (vec == ECR_V_PROTV) {
+		if (cause_code == ECR_C_PROTV_INST_FETCH)
+			pr_cont("Execute from Non-exec Page\n");
+		else if (cause_code == ECR_C_PROTV_MISALIG_DATA &&
+		         IS_ENABLED(CONFIG_ISA_ARCOMPACT))
+			pr_cont("Misaligned r/w from 0x%08lx\n", address);
+		else
+			pr_cont("%s access not allowed on page\n",
+				(cause_code == 0x01) ? "Read" :
+				((cause_code == 0x02) ? "Write" : "EX"));
+	} else if (vec == ECR_V_INSN_ERR) {
+		pr_cont("Illegal Insn\n");
+#ifdef CONFIG_ISA_ARCV2
+	} else if (vec == ECR_V_MEM_ERR) {
+		if (cause_code == 0x00)
+			pr_cont("Bus Error from Insn Mem\n");
+		else if (cause_code == 0x10)
+			pr_cont("Bus Error from Data Mem\n");
+		else
+			pr_cont("Bus Error, check PRM\n");
+	} else if (vec == ECR_V_MISALIGN) {
+		pr_cont("Misaligned r/w from 0x%08lx\n", address);
+#endif
+	} else if (vec == ECR_V_TRAP) {
+		if (regs->ecr_param == 5)
+			pr_cont("gcc generated __builtin_trap\n");
+	} else {
+		pr_cont("Check Programmer's Manual\n");
+	}
+}
+
+/************************************************************************
+ *  API called by rest of kernel
+ ***********************************************************************/
+
+void show_regs(struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	struct callee_regs *cregs = (struct callee_regs *)tsk->thread.callee_reg;
+
+	/*
+	 * generic code calls us with preemption disabled, but some calls
+	 * here could sleep, so re-enable to avoid lockdep splat
+	 */
+	preempt_enable();
+
+	print_task_path_n_nm(tsk);
+	show_regs_print_info(KERN_INFO);
+
+	show_ecr_verbose(regs);
+
+	if (user_mode(regs))
+		show_faulting_vma(regs->ret); /* faulting code, not data */
+
+	pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\nSTAT: 0x%08lx",
+		regs->event, current->thread.fault_address, regs->ret,
+		regs->status32);
+
+#define STS_BIT(r, bit)	r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
+
+#ifdef CONFIG_ISA_ARCOMPACT
+	pr_cont(" [%2s%2s%2s%2s%2s%2s%2s]",
+			(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+			STS_BIT(regs, DE), STS_BIT(regs, AE),
+			STS_BIT(regs, A2), STS_BIT(regs, A1),
+			STS_BIT(regs, E2), STS_BIT(regs, E1));
+#else
+	pr_cont(" [%2s%2s%2s%2s]   ",
+			STS_BIT(regs, IE),
+			(regs->status32 & STATUS_U_MASK) ? "U " : "K ",
+			STS_BIT(regs, DE), STS_BIT(regs, AE));
+#endif
+
+	print_regs_scratch(regs);
+	if (cregs)
+		print_regs_callee(cregs);
+
+	preempt_disable();
+}
+
+void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
+			    unsigned long address)
+{
+	current->thread.fault_address = address;
+
+	/* Show fault description */
+	pr_info("\n%s\n", str);
+
+	/* Caller and Callee regs */
+	show_regs(regs);
+
+	/* Show stack trace if this Fatality happened in kernel mode */
+	if (!user_mode(regs))
+		show_stacktrace(current, regs, KERN_DEFAULT);
+}
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
new file mode 100644
index 0000000000..d63ebd81f1
--- /dev/null
+++ b/arch/arc/kernel/unaligned.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011-2012 Synopsys (www.synopsys.com)
+ *
+ * vineetg : May 2011
+ *  -Adapted (from .26 to .35)
+ *  -original contribution by Tim.yao@amlogic.com
+ */
+
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/uaccess.h>
+#include <asm/disasm.h>
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define BE		1
+#define FIRST_BYTE_16	"swap %1, %1\n swape %1, %1\n"
+#define FIRST_BYTE_32	"swape %1, %1\n"
+#else
+#define BE		0
+#define FIRST_BYTE_16
+#define FIRST_BYTE_32
+#endif
+
+#define __get8_unaligned_check(val, addr, err)		\
+	__asm__(					\
+	"1:	ldb.ab	%1, [%2, 1]\n"			\
+	"2:\n"						\
+	"	.section .fixup,\"ax\"\n"		\
+	"	.align	4\n"				\
+	"3:	mov	%0, 1\n"			\
+	"	j	2b\n"				\
+	"	.previous\n"				\
+	"	.section __ex_table,\"a\"\n"		\
+	"	.align	4\n"				\
+	"	.long	1b, 3b\n"			\
+	"	.previous\n"				\
+	: "=r" (err), "=&r" (val), "=r" (addr)		\
+	: "0" (err), "2" (addr))
+
+#define get16_unaligned_check(val, addr)		\
+	do {						\
+		unsigned int err = 0, v, a = addr;	\
+		__get8_unaligned_check(v, a, err);	\
+		val =  v << ((BE) ? 8 : 0);		\
+		__get8_unaligned_check(v, a, err);	\
+		val |= v << ((BE) ? 0 : 8);		\
+		if (err)				\
+			goto fault;			\
+	} while (0)
+
+#define get32_unaligned_check(val, addr)		\
+	do {						\
+		unsigned int err = 0, v, a = addr;	\
+		__get8_unaligned_check(v, a, err);	\
+		val =  v << ((BE) ? 24 : 0);		\
+		__get8_unaligned_check(v, a, err);	\
+		val |= v << ((BE) ? 16 : 8);		\
+		__get8_unaligned_check(v, a, err);	\
+		val |= v << ((BE) ? 8 : 16);		\
+		__get8_unaligned_check(v, a, err);	\
+		val |= v << ((BE) ? 0 : 24);		\
+		if (err)				\
+			goto fault;			\
+	} while (0)
+
+#define put16_unaligned_check(val, addr)		\
+	do {						\
+		unsigned int err = 0, v = val, a = addr;\
+							\
+		__asm__(				\
+		FIRST_BYTE_16				\
+		"1:	stb.ab	%1, [%2, 1]\n"		\
+		"	lsr %1, %1, 8\n"		\
+		"2:	stb	%1, [%2]\n"		\
+		"3:\n"					\
+		"	.section .fixup,\"ax\"\n"	\
+		"	.align	4\n"			\
+		"4:	mov	%0, 1\n"		\
+		"	j	3b\n"			\
+		"	.previous\n"			\
+		"	.section __ex_table,\"a\"\n"	\
+		"	.align	4\n"			\
+		"	.long	1b, 4b\n"		\
+		"	.long	2b, 4b\n"		\
+		"	.previous\n"			\
+		: "=r" (err), "=&r" (v), "=&r" (a)	\
+		: "0" (err), "1" (v), "2" (a));		\
+							\
+		if (err)				\
+			goto fault;			\
+	} while (0)
+
+#define put32_unaligned_check(val, addr)		\
+	do {						\
+		unsigned int err = 0, v = val, a = addr;\
+							\
+		__asm__(				\
+		FIRST_BYTE_32				\
+		"1:	stb.ab	%1, [%2, 1]\n"		\
+		"	lsr %1, %1, 8\n"		\
+		"2:	stb.ab	%1, [%2, 1]\n"		\
+		"	lsr %1, %1, 8\n"		\
+		"3:	stb.ab	%1, [%2, 1]\n"		\
+		"	lsr %1, %1, 8\n"		\
+		"4:	stb	%1, [%2]\n"		\
+		"5:\n"					\
+		"	.section .fixup,\"ax\"\n"	\
+		"	.align	4\n"			\
+		"6:	mov	%0, 1\n"		\
+		"	j	5b\n"			\
+		"	.previous\n"			\
+		"	.section __ex_table,\"a\"\n"	\
+		"	.align	4\n"			\
+		"	.long	1b, 6b\n"		\
+		"	.long	2b, 6b\n"		\
+		"	.long	3b, 6b\n"		\
+		"	.long	4b, 6b\n"		\
+		"	.previous\n"			\
+		: "=r" (err), "=&r" (v), "=&r" (a)	\
+		: "0" (err), "1" (v), "2" (a));		\
+							\
+		if (err)				\
+			goto fault;			\
+	} while (0)
+
+/* sysctl hooks */
+int unaligned_enabled __read_mostly = 1;	/* Enabled by default */
+int no_unaligned_warning __read_mostly = 1;	/* Only 1 warning by default */
+
+static void fixup_load(struct disasm_state *state, struct pt_regs *regs,
+			struct callee_regs *cregs)
+{
+	int val;
+
+	/* register write back */
+	if ((state->aa == 1) || (state->aa == 2)) {
+		set_reg(state->wb_reg, state->src1 + state->src2, regs, cregs);
+
+		if (state->aa == 2)
+			state->src2 = 0;
+	}
+
+	if (state->zz == 0) {
+		get32_unaligned_check(val, state->src1 + state->src2);
+	} else {
+		get16_unaligned_check(val, state->src1 + state->src2);
+
+		if (state->x)
+			val = (val << 16) >> 16;
+	}
+
+	if (state->pref == 0)
+		set_reg(state->dest, val, regs, cregs);
+
+	return;
+
+fault:	state->fault = 1;
+}
+
+static void fixup_store(struct disasm_state *state, struct pt_regs *regs,
+			struct callee_regs *cregs)
+{
+	/* register write back */
+	if ((state->aa == 1) || (state->aa == 2)) {
+		set_reg(state->wb_reg, state->src2 + state->src3, regs, cregs);
+
+		if (state->aa == 3)
+			state->src3 = 0;
+	} else if (state->aa == 3) {
+		if (state->zz == 2) {
+			set_reg(state->wb_reg, state->src2 + (state->src3 << 1),
+				regs, cregs);
+		} else if (!state->zz) {
+			set_reg(state->wb_reg, state->src2 + (state->src3 << 2),
+				regs, cregs);
+		} else {
+			goto fault;
+		}
+	}
+
+	/* write fix-up */
+	if (!state->zz)
+		put32_unaligned_check(state->src1, state->src2 + state->src3);
+	else
+		put16_unaligned_check(state->src1, state->src2 + state->src3);
+
+	return;
+
+fault:	state->fault = 1;
+}
+
+/*
+ * Handle an unaligned access
+ * Returns 0 if successfully handled, 1 if some error happened
+ */
+int misaligned_fixup(unsigned long address, struct pt_regs *regs,
+		     struct callee_regs *cregs)
+{
+	struct disasm_state state;
+	char buf[TASK_COMM_LEN];
+
+	/* handle user mode only and only if enabled by sysadmin */
+	if (!user_mode(regs) || !unaligned_enabled)
+		return 1;
+
+	if (no_unaligned_warning) {
+		pr_warn_once("%s(%d) made unaligned access which was emulated"
+			     " by kernel assist\n. This can degrade application"
+			     " performance significantly\n. To enable further"
+			     " logging of such instances, please \n"
+			     " echo 0 > /proc/sys/kernel/ignore-unaligned-usertrap\n",
+			     get_task_comm(buf, current), task_pid_nr(current));
+	} else {
+		/* Add rate limiting if it gets down to it */
+		pr_warn("%s(%d): unaligned access to/from 0x%lx by PC: 0x%lx\n",
+			get_task_comm(buf, current), task_pid_nr(current),
+			address, regs->ret);
+
+	}
+
+	disasm_instr(regs->ret, &state, 1, regs, cregs);
+
+	if (state.fault)
+		goto fault;
+
+	/* ldb/stb should not have unaligned exception */
+	if ((state.zz == 1) || (state.di))
+		goto fault;
+
+	if (!state.write)
+		fixup_load(&state, regs, cregs);
+	else
+		fixup_store(&state, regs, cregs);
+
+	if (state.fault)
+		goto fault;
+
+	/* clear any remanants of delay slot */
+	if (delay_mode(regs)) {
+		regs->ret = regs->bta & ~1U;
+		regs->status32 &= ~STATUS_DE_MASK;
+	} else {
+		regs->ret += state.instr_len;
+
+		/* handle zero-overhead-loop */
+		if ((regs->ret == regs->lp_end) && (regs->lp_count)) {
+			regs->ret = regs->lp_start;
+			regs->lp_count--;
+		}
+	}
+
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
+	return 0;
+
+fault:
+	pr_err("Alignment trap: fault in fix-up %08lx at [<%08lx>]\n",
+		state.words[0], address);
+
+	return 1;
+}
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
new file mode 100644
index 0000000000..200270a945
--- /dev/null
+++ b/arch/arc/kernel/unwind.c
@@ -0,0 +1,1313 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2002-2006 Novell, Inc.
+ *	Jan Beulich <jbeulich@novell.com>
+ *
+ * A simple API for unwinding kernel stacks.  This is used for
+ * debugging and error reporting purposes.  The kernel doesn't need
+ * full-blown stack unwinding with all the bells and whistles, so there
+ * is not much point in implementing the full Dwarf2 unwind API.
+ */
+
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/memblock.h>
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <asm/sections.h>
+#include <asm/unaligned.h>
+#include <asm/unwind.h>
+
+extern char __start_unwind[], __end_unwind[];
+/* extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];*/
+
+/* #define UNWIND_DEBUG */
+
+#ifdef UNWIND_DEBUG
+int dbg_unw;
+#define unw_debug(fmt, ...)			\
+do {						\
+	if (dbg_unw)				\
+		pr_info(fmt, ##__VA_ARGS__);	\
+} while (0);
+#else
+#define unw_debug(fmt, ...)
+#endif
+
+#define MAX_STACK_DEPTH 8
+
+#define EXTRA_INFO(f) { \
+		BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
+				% sizeof_field(struct unwind_frame_info, f)) \
+				+ offsetof(struct unwind_frame_info, f) \
+				/ sizeof_field(struct unwind_frame_info, f), \
+				sizeof_field(struct unwind_frame_info, f) \
+	}
+#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
+
+static const struct {
+	unsigned offs:BITS_PER_LONG / 2;
+	unsigned width:BITS_PER_LONG / 2;
+} reg_info[] = {
+UNW_REGISTER_INFO};
+
+#undef PTREGS_INFO
+#undef EXTRA_INFO
+
+#ifndef REG_INVALID
+#define REG_INVALID(r) (reg_info[r].width == 0)
+#endif
+
+#define DW_CFA_nop                          0x00
+#define DW_CFA_set_loc                      0x01
+#define DW_CFA_advance_loc1                 0x02
+#define DW_CFA_advance_loc2                 0x03
+#define DW_CFA_advance_loc4                 0x04
+#define DW_CFA_offset_extended              0x05
+#define DW_CFA_restore_extended             0x06
+#define DW_CFA_undefined                    0x07
+#define DW_CFA_same_value                   0x08
+#define DW_CFA_register                     0x09
+#define DW_CFA_remember_state               0x0a
+#define DW_CFA_restore_state                0x0b
+#define DW_CFA_def_cfa                      0x0c
+#define DW_CFA_def_cfa_register             0x0d
+#define DW_CFA_def_cfa_offset               0x0e
+#define DW_CFA_def_cfa_expression           0x0f
+#define DW_CFA_expression                   0x10
+#define DW_CFA_offset_extended_sf           0x11
+#define DW_CFA_def_cfa_sf                   0x12
+#define DW_CFA_def_cfa_offset_sf            0x13
+#define DW_CFA_val_offset                   0x14
+#define DW_CFA_val_offset_sf                0x15
+#define DW_CFA_val_expression               0x16
+#define DW_CFA_lo_user                      0x1c
+#define DW_CFA_GNU_window_save              0x2d
+#define DW_CFA_GNU_args_size                0x2e
+#define DW_CFA_GNU_negative_offset_extended 0x2f
+#define DW_CFA_hi_user                      0x3f
+
+#define DW_EH_PE_FORM     0x07
+#define DW_EH_PE_native   0x00
+#define DW_EH_PE_leb128   0x01
+#define DW_EH_PE_data2    0x02
+#define DW_EH_PE_data4    0x03
+#define DW_EH_PE_data8    0x04
+#define DW_EH_PE_signed   0x08
+#define DW_EH_PE_ADJUST   0x70
+#define DW_EH_PE_abs      0x00
+#define DW_EH_PE_pcrel    0x10
+#define DW_EH_PE_textrel  0x20
+#define DW_EH_PE_datarel  0x30
+#define DW_EH_PE_funcrel  0x40
+#define DW_EH_PE_aligned  0x50
+#define DW_EH_PE_indirect 0x80
+#define DW_EH_PE_omit     0xff
+
+#define CIE_ID	0
+
+typedef unsigned long uleb128_t;
+typedef signed long sleb128_t;
+
+static struct unwind_table {
+	struct {
+		unsigned long pc;
+		unsigned long range;
+	} core, init;
+	const void *address;
+	unsigned long size;
+	const unsigned char *header;
+	unsigned long hdrsz;
+	struct unwind_table *link;
+	const char *name;
+} root_table;
+
+struct unwind_item {
+	enum item_location {
+		Nowhere,
+		Memory,
+		Register,
+		Value
+	} where;
+	uleb128_t value;
+};
+
+struct unwind_state {
+	uleb128_t loc, org;
+	const u8 *cieStart, *cieEnd;
+	uleb128_t codeAlign;
+	sleb128_t dataAlign;
+	struct cfa {
+		uleb128_t reg, offs;
+	} cfa;
+	struct unwind_item regs[ARRAY_SIZE(reg_info)];
+	unsigned stackDepth:8;
+	unsigned version:8;
+	const u8 *label;
+	const u8 *stack[MAX_STACK_DEPTH];
+};
+
+static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
+
+static struct unwind_table *find_table(unsigned long pc)
+{
+	struct unwind_table *table;
+
+	for (table = &root_table; table; table = table->link)
+		if ((pc >= table->core.pc
+		     && pc < table->core.pc + table->core.range)
+		    || (pc >= table->init.pc
+			&& pc < table->init.pc + table->init.range))
+			break;
+
+	return table;
+}
+
+static unsigned long read_pointer(const u8 **pLoc,
+				  const void *end, signed ptrType);
+static void init_unwind_hdr(struct unwind_table *table,
+			    void *(*alloc) (unsigned long));
+
+/*
+ * wrappers for header alloc (vs. calling one vs. other at call site)
+ * to elide section mismatches warnings
+ */
+static void *__init unw_hdr_alloc_early(unsigned long sz)
+{
+	return memblock_alloc_from(sz, sizeof(unsigned int), MAX_DMA_ADDRESS);
+}
+
+static void init_unwind_table(struct unwind_table *table, const char *name,
+			      const void *core_start, unsigned long core_size,
+			      const void *init_start, unsigned long init_size,
+			      const void *table_start, unsigned long table_size,
+			      const u8 *header_start, unsigned long header_size)
+{
+	table->core.pc = (unsigned long)core_start;
+	table->core.range = core_size;
+	table->init.pc = (unsigned long)init_start;
+	table->init.range = init_size;
+	table->address = table_start;
+	table->size = table_size;
+	/* To avoid the pointer addition with NULL pointer.*/
+	if (header_start != NULL) {
+		const u8 *ptr = header_start + 4;
+		const u8 *end = header_start + header_size;
+		/* See if the linker provided table looks valid. */
+		if (header_size <= 4
+		|| header_start[0] != 1
+		|| (void *)read_pointer(&ptr, end, header_start[1])
+				!= table_start
+		|| header_start[2] == DW_EH_PE_omit
+		|| read_pointer(&ptr, end, header_start[2]) <= 0
+		|| header_start[3] == DW_EH_PE_omit)
+			header_start = NULL;
+	}
+	table->hdrsz = header_size;
+	smp_wmb();
+	table->header = header_start;
+	table->link = NULL;
+	table->name = name;
+}
+
+void __init arc_unwind_init(void)
+{
+	init_unwind_table(&root_table, "kernel", _text, _end - _text, NULL, 0,
+			  __start_unwind, __end_unwind - __start_unwind,
+			  NULL, 0);
+	  /*__start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);*/
+
+	init_unwind_hdr(&root_table, unw_hdr_alloc_early);
+}
+
+static const u32 bad_cie, not_fde;
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+static const u32 *__cie_for_fde(const u32 *fde);
+static signed fde_pointer_type(const u32 *cie);
+
+struct eh_frame_hdr_table_entry {
+	unsigned long start, fde;
+};
+
+static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
+{
+	const struct eh_frame_hdr_table_entry *e1 = p1;
+	const struct eh_frame_hdr_table_entry *e2 = p2;
+
+	return (e1->start > e2->start) - (e1->start < e2->start);
+}
+
+static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
+{
+	struct eh_frame_hdr_table_entry *e1 = p1;
+	struct eh_frame_hdr_table_entry *e2 = p2;
+
+	swap(e1->start, e2->start);
+	swap(e1->fde, e2->fde);
+}
+
+static void init_unwind_hdr(struct unwind_table *table,
+			    void *(*alloc) (unsigned long))
+{
+	const u8 *ptr;
+	unsigned long tableSize = table->size, hdrSize;
+	unsigned int n;
+	const u32 *fde;
+	struct {
+		u8 version;
+		u8 eh_frame_ptr_enc;
+		u8 fde_count_enc;
+		u8 table_enc;
+		unsigned long eh_frame_ptr;
+		unsigned int fde_count;
+		struct eh_frame_hdr_table_entry table[];
+	} __attribute__ ((__packed__)) *header;
+
+	if (table->header)
+		return;
+
+	if (table->hdrsz)
+		pr_warn(".eh_frame_hdr for '%s' present but unusable\n",
+			table->name);
+
+	if (tableSize & (sizeof(*fde) - 1))
+		return;
+
+	for (fde = table->address, n = 0;
+	     tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+		const u32 *cie = cie_for_fde(fde, table);
+		signed ptrType;
+
+		if (cie == &not_fde)
+			continue;
+		if (cie == NULL || cie == &bad_cie)
+			goto ret_err;
+		ptrType = fde_pointer_type(cie);
+		if (ptrType < 0)
+			goto ret_err;
+
+		ptr = (const u8 *)(fde + 2);
+		if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde,
+								ptrType)) {
+			/* FIXME_Rajesh We have 4 instances of null addresses
+			 * instead of the initial loc addr
+			 * return;
+			 */
+			WARN(1, "unwinder: FDE->initial_location NULL %p\n",
+				(const u8 *)(fde + 1) + *fde);
+		}
+		++n;
+	}
+
+	if (tableSize || !n)
+		goto ret_err;
+
+	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
+	    + 2 * n * sizeof(unsigned long);
+
+	header = alloc(hdrSize);
+	if (!header)
+		goto ret_err;
+
+	header->version = 1;
+	header->eh_frame_ptr_enc = DW_EH_PE_abs | DW_EH_PE_native;
+	header->fde_count_enc = DW_EH_PE_abs | DW_EH_PE_data4;
+	header->table_enc = DW_EH_PE_abs | DW_EH_PE_native;
+	put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
+	BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
+		     % __alignof(typeof(header->fde_count)));
+	header->fde_count = n;
+
+	BUILD_BUG_ON(offsetof(typeof(*header), table)
+		     % __alignof(typeof(*header->table)));
+	for (fde = table->address, tableSize = table->size, n = 0;
+	     tableSize;
+	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+		const u32 *cie = __cie_for_fde(fde);
+
+		if (fde[1] == CIE_ID)
+			continue;	/* this is a CIE */
+		ptr = (const u8 *)(fde + 2);
+		header->table[n].start = read_pointer(&ptr,
+						      (const u8 *)(fde + 1) +
+						      *fde,
+						      fde_pointer_type(cie));
+		header->table[n].fde = (unsigned long)fde;
+		++n;
+	}
+	WARN_ON(n != header->fde_count);
+
+	sort(header->table,
+	     n,
+	     sizeof(*header->table),
+	     cmp_eh_frame_hdr_table_entries, swap_eh_frame_hdr_table_entries);
+
+	table->hdrsz = hdrSize;
+	smp_wmb();
+	table->header = (const void *)header;
+	return;
+
+ret_err:
+	panic("Attention !!! Dwarf FDE parsing errors\n");
+}
+
+#ifdef CONFIG_MODULES
+static void *unw_hdr_alloc(unsigned long sz)
+{
+	return kmalloc(sz, GFP_KERNEL);
+}
+
+static struct unwind_table *last_table;
+
+/* Must be called with module_mutex held. */
+void *unwind_add_table(struct module *module, const void *table_start,
+		       unsigned long table_size)
+{
+	struct unwind_table *table;
+
+	if (table_size <= 0)
+		return NULL;
+
+	table = kmalloc(sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	init_unwind_table(table, module->name,
+			  module->core_layout.base, module->core_layout.size,
+			  module->init_layout.base, module->init_layout.size,
+			  table_start, table_size,
+			  NULL, 0);
+
+	init_unwind_hdr(table, unw_hdr_alloc);
+
+#ifdef UNWIND_DEBUG
+	unw_debug("Table added for [%s] %lx %lx\n",
+		module->name, table->core.pc, table->core.range);
+#endif
+	if (last_table)
+		last_table->link = table;
+	else
+		root_table.link = table;
+	last_table = table;
+
+	return table;
+}
+
+struct unlink_table_info {
+	struct unwind_table *table;
+	int init_only;
+};
+
+static int unlink_table(void *arg)
+{
+	struct unlink_table_info *info = arg;
+	struct unwind_table *table = info->table, *prev;
+
+	for (prev = &root_table; prev->link && prev->link != table;
+	     prev = prev->link)
+		;
+
+	if (prev->link) {
+		if (info->init_only) {
+			table->init.pc = 0;
+			table->init.range = 0;
+			info->table = NULL;
+		} else {
+			prev->link = table->link;
+			if (!prev->link)
+				last_table = prev;
+		}
+	} else
+		info->table = NULL;
+
+	return 0;
+}
+
+/* Must be called with module_mutex held. */
+void unwind_remove_table(void *handle, int init_only)
+{
+	struct unwind_table *table = handle;
+	struct unlink_table_info info;
+
+	if (!table || table == &root_table)
+		return;
+
+	if (init_only && table == last_table) {
+		table->init.pc = 0;
+		table->init.range = 0;
+		return;
+	}
+
+	info.table = table;
+	info.init_only = init_only;
+
+	unlink_table(&info); /* XXX: SMP */
+	kfree(table->header);
+	kfree(table);
+}
+
+#endif /* CONFIG_MODULES */
+
+static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
+{
+	const u8 *cur = *pcur;
+	uleb128_t value;
+	unsigned int shift;
+
+	for (shift = 0, value = 0; cur < end; shift += 7) {
+		if (shift + 7 > 8 * sizeof(value)
+		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+			cur = end + 1;
+			break;
+		}
+		value |= (uleb128_t) (*cur & 0x7f) << shift;
+		if (!(*cur++ & 0x80))
+			break;
+	}
+	*pcur = cur;
+
+	return value;
+}
+
+static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
+{
+	const u8 *cur = *pcur;
+	sleb128_t value;
+	unsigned int shift;
+
+	for (shift = 0, value = 0; cur < end; shift += 7) {
+		if (shift + 7 > 8 * sizeof(value)
+		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
+			cur = end + 1;
+			break;
+		}
+		value |= (sleb128_t) (*cur & 0x7f) << shift;
+		if (!(*cur & 0x80)) {
+			value |= -(*cur++ & 0x40) << shift;
+			break;
+		}
+	}
+	*pcur = cur;
+
+	return value;
+}
+
+static const u32 *__cie_for_fde(const u32 *fde)
+{
+	const u32 *cie;
+
+	cie = fde + 1 - fde[1] / sizeof(*fde);
+
+	return cie;
+}
+
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
+{
+	const u32 *cie;
+
+	if (!*fde || (*fde & (sizeof(*fde) - 1)))
+		return &bad_cie;
+
+	if (fde[1] == CIE_ID)
+		return &not_fde;	/* this is a CIE */
+
+	if ((fde[1] & (sizeof(*fde) - 1)))
+/* || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) */
+		return NULL;	/* this is not a valid FDE */
+
+	cie = __cie_for_fde(fde);
+
+	if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde)
+	    || (*cie & (sizeof(*cie) - 1))
+	    || (cie[1] != CIE_ID))
+		return NULL;	/* this is not a (valid) CIE */
+	return cie;
+}
+
+static unsigned long read_pointer(const u8 **pLoc, const void *end,
+				  signed ptrType)
+{
+	unsigned long value = 0;
+	union {
+		const u8 *p8;
+		const u16 *p16u;
+		const s16 *p16s;
+		const u32 *p32u;
+		const s32 *p32s;
+		const unsigned long *pul;
+	} ptr;
+
+	if (ptrType < 0 || ptrType == DW_EH_PE_omit)
+		return 0;
+	ptr.p8 = *pLoc;
+	switch (ptrType & DW_EH_PE_FORM) {
+	case DW_EH_PE_data2:
+		if (end < (const void *)(ptr.p16u + 1))
+			return 0;
+		if (ptrType & DW_EH_PE_signed)
+			value = get_unaligned((u16 *) ptr.p16s++);
+		else
+			value = get_unaligned((u16 *) ptr.p16u++);
+		break;
+	case DW_EH_PE_data4:
+#ifdef CONFIG_64BIT
+		if (end < (const void *)(ptr.p32u + 1))
+			return 0;
+		if (ptrType & DW_EH_PE_signed)
+			value = get_unaligned(ptr.p32s++);
+		else
+			value = get_unaligned(ptr.p32u++);
+		break;
+	case DW_EH_PE_data8:
+		BUILD_BUG_ON(sizeof(u64) != sizeof(value));
+#else
+		BUILD_BUG_ON(sizeof(u32) != sizeof(value));
+#endif
+		fallthrough;
+	case DW_EH_PE_native:
+		if (end < (const void *)(ptr.pul + 1))
+			return 0;
+		value = get_unaligned((unsigned long *)ptr.pul++);
+		break;
+	case DW_EH_PE_leb128:
+		BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
+		value = ptrType & DW_EH_PE_signed ? get_sleb128(&ptr.p8, end)
+		    : get_uleb128(&ptr.p8, end);
+		if ((const void *)ptr.p8 > end)
+			return 0;
+		break;
+	default:
+		return 0;
+	}
+	switch (ptrType & DW_EH_PE_ADJUST) {
+	case DW_EH_PE_abs:
+		break;
+	case DW_EH_PE_pcrel:
+		value += (unsigned long)*pLoc;
+		break;
+	default:
+		return 0;
+	}
+	if ((ptrType & DW_EH_PE_indirect)
+	    && __get_user(value, (unsigned long __user *)value))
+		return 0;
+	*pLoc = ptr.p8;
+
+	return value;
+}
+
+static signed fde_pointer_type(const u32 *cie)
+{
+	const u8 *ptr = (const u8 *)(cie + 2);
+	unsigned int version = *ptr;
+
+	if (*++ptr) {
+		const char *aug;
+		const u8 *end = (const u8 *)(cie + 1) + *cie;
+		uleb128_t len;
+
+		/* check if augmentation size is first (and thus present) */
+		if (*ptr != 'z')
+			return -1;
+
+		/* check if augmentation string is nul-terminated */
+		aug = (const void *)ptr;
+		ptr = memchr(aug, 0, end - ptr);
+		if (ptr == NULL)
+			return -1;
+
+		++ptr;		/* skip terminator */
+		get_uleb128(&ptr, end);	/* skip code alignment */
+		get_sleb128(&ptr, end);	/* skip data alignment */
+		/* skip return address column */
+		version <= 1 ? (void) ++ptr : (void)get_uleb128(&ptr, end);
+		len = get_uleb128(&ptr, end);	/* augmentation length */
+
+		if (ptr + len < ptr || ptr + len > end)
+			return -1;
+
+		end = ptr + len;
+		while (*++aug) {
+			if (ptr >= end)
+				return -1;
+			switch (*aug) {
+			case 'L':
+				++ptr;
+				break;
+			case 'P':{
+					signed ptrType = *ptr++;
+
+					if (!read_pointer(&ptr, end, ptrType)
+					    || ptr > end)
+						return -1;
+				}
+				break;
+			case 'R':
+				return *ptr;
+			default:
+				return -1;
+			}
+		}
+	}
+	return DW_EH_PE_native | DW_EH_PE_abs;
+}
+
+static int advance_loc(unsigned long delta, struct unwind_state *state)
+{
+	state->loc += delta * state->codeAlign;
+
+	/* FIXME_Rajesh: Probably we are defining for the initial range as well;
+	   return delta > 0;
+	 */
+	unw_debug("delta %3lu => loc 0x%lx: ", delta, state->loc);
+	return 1;
+}
+
+static void set_rule(uleb128_t reg, enum item_location where, uleb128_t value,
+		     struct unwind_state *state)
+{
+	if (reg < ARRAY_SIZE(state->regs)) {
+		state->regs[reg].where = where;
+		state->regs[reg].value = value;
+
+#ifdef UNWIND_DEBUG
+		unw_debug("r%lu: ", reg);
+		switch (where) {
+		case Nowhere:
+			unw_debug("s ");
+			break;
+		case Memory:
+			unw_debug("c(%lu) ", value);
+			break;
+		case Register:
+			unw_debug("r(%lu) ", value);
+			break;
+		case Value:
+			unw_debug("v(%lu) ", value);
+			break;
+		default:
+			break;
+		}
+#endif
+	}
+}
+
+static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc,
+		      signed ptrType, struct unwind_state *state)
+{
+	union {
+		const u8 *p8;
+		const u16 *p16;
+		const u32 *p32;
+	} ptr;
+	int result = 1;
+	u8 opcode;
+
+	if (start != state->cieStart) {
+		state->loc = state->org;
+		result =
+		    processCFI(state->cieStart, state->cieEnd, 0, ptrType,
+			       state);
+		if (targetLoc == 0 && state->label == NULL)
+			return result;
+	}
+	for (ptr.p8 = start; result && ptr.p8 < end;) {
+		switch (*ptr.p8 >> 6) {
+			uleb128_t value;
+
+		case 0:
+			opcode = *ptr.p8++;
+
+			switch (opcode) {
+			case DW_CFA_nop:
+				unw_debug("cfa nop ");
+				break;
+			case DW_CFA_set_loc:
+				state->loc = read_pointer(&ptr.p8, end,
+							  ptrType);
+				if (state->loc == 0)
+					result = 0;
+				unw_debug("cfa_set_loc: 0x%lx ", state->loc);
+				break;
+			case DW_CFA_advance_loc1:
+				unw_debug("\ncfa advance loc1:");
+				result = ptr.p8 < end
+				    && advance_loc(*ptr.p8++, state);
+				break;
+			case DW_CFA_advance_loc2:
+				value = *ptr.p8++;
+				value += *ptr.p8++ << 8;
+				unw_debug("\ncfa advance loc2:");
+				result = ptr.p8 <= end + 2
+				    /* && advance_loc(*ptr.p16++, state); */
+				    && advance_loc(value, state);
+				break;
+			case DW_CFA_advance_loc4:
+				unw_debug("\ncfa advance loc4:");
+				result = ptr.p8 <= end + 4
+				    && advance_loc(*ptr.p32++, state);
+				break;
+			case DW_CFA_offset_extended:
+				value = get_uleb128(&ptr.p8, end);
+				unw_debug("cfa_offset_extended: ");
+				set_rule(value, Memory,
+					 get_uleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_val_offset:
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value, Value,
+					 get_uleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_offset_extended_sf:
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value, Memory,
+					 get_sleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_val_offset_sf:
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value, Value,
+					 get_sleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_restore_extended:
+				unw_debug("cfa_restore_extended: ");
+			case DW_CFA_undefined:
+				unw_debug("cfa_undefined: ");
+			case DW_CFA_same_value:
+				unw_debug("cfa_same_value: ");
+				set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0,
+					 state);
+				break;
+			case DW_CFA_register:
+				unw_debug("cfa_register: ");
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value,
+					 Register,
+					 get_uleb128(&ptr.p8, end), state);
+				break;
+			case DW_CFA_remember_state:
+				unw_debug("cfa_remember_state: ");
+				if (ptr.p8 == state->label) {
+					state->label = NULL;
+					return 1;
+				}
+				if (state->stackDepth >= MAX_STACK_DEPTH)
+					return 0;
+				state->stack[state->stackDepth++] = ptr.p8;
+				break;
+			case DW_CFA_restore_state:
+				unw_debug("cfa_restore_state: ");
+				if (state->stackDepth) {
+					const uleb128_t loc = state->loc;
+					const u8 *label = state->label;
+
+					state->label =
+					    state->stack[state->stackDepth - 1];
+					memcpy(&state->cfa, &badCFA,
+					       sizeof(state->cfa));
+					memset(state->regs, 0,
+					       sizeof(state->regs));
+					state->stackDepth = 0;
+					result =
+					    processCFI(start, end, 0, ptrType,
+						       state);
+					state->loc = loc;
+					state->label = label;
+				} else
+					return 0;
+				break;
+			case DW_CFA_def_cfa:
+				state->cfa.reg = get_uleb128(&ptr.p8, end);
+				unw_debug("cfa_def_cfa: r%lu ", state->cfa.reg);
+				fallthrough;
+			case DW_CFA_def_cfa_offset:
+				state->cfa.offs = get_uleb128(&ptr.p8, end);
+				unw_debug("cfa_def_cfa_offset: 0x%lx ",
+					  state->cfa.offs);
+				break;
+			case DW_CFA_def_cfa_sf:
+				state->cfa.reg = get_uleb128(&ptr.p8, end);
+				fallthrough;
+			case DW_CFA_def_cfa_offset_sf:
+				state->cfa.offs = get_sleb128(&ptr.p8, end)
+				    * state->dataAlign;
+				break;
+			case DW_CFA_def_cfa_register:
+				unw_debug("cfa_def_cfa_register: ");
+				state->cfa.reg = get_uleb128(&ptr.p8, end);
+				break;
+				/*todo case DW_CFA_def_cfa_expression: */
+				/*todo case DW_CFA_expression: */
+				/*todo case DW_CFA_val_expression: */
+			case DW_CFA_GNU_args_size:
+				get_uleb128(&ptr.p8, end);
+				break;
+			case DW_CFA_GNU_negative_offset_extended:
+				value = get_uleb128(&ptr.p8, end);
+				set_rule(value,
+					 Memory,
+					 (uleb128_t) 0 - get_uleb128(&ptr.p8,
+								     end),
+					 state);
+				break;
+			case DW_CFA_GNU_window_save:
+			default:
+				unw_debug("UNKNOWN OPCODE 0x%x\n", opcode);
+				result = 0;
+				break;
+			}
+			break;
+		case 1:
+			unw_debug("\ncfa_adv_loc: ");
+			result = advance_loc(*ptr.p8++ & 0x3f, state);
+			break;
+		case 2:
+			unw_debug("cfa_offset: ");
+			value = *ptr.p8++ & 0x3f;
+			set_rule(value, Memory, get_uleb128(&ptr.p8, end),
+				 state);
+			break;
+		case 3:
+			unw_debug("cfa_restore: ");
+			set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
+			break;
+		}
+
+		if (ptr.p8 > end)
+			result = 0;
+		if (result && targetLoc != 0 && targetLoc < state->loc)
+			return 1;
+	}
+
+	return result && ptr.p8 == end && (targetLoc == 0 || (
+		/*todo While in theory this should apply, gcc in practice omits
+		  everything past the function prolog, and hence the location
+		  never reaches the end of the function.
+		targetLoc < state->loc && */  state->label == NULL));
+}
+
+/* Unwind to previous to frame.  Returns 0 if successful, negative
+ * number in case of an error. */
+int arc_unwind(struct unwind_frame_info *frame)
+{
+#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
+	const u32 *fde = NULL, *cie = NULL;
+	const u8 *ptr = NULL, *end = NULL;
+	unsigned long pc = UNW_PC(frame) - frame->call_frame;
+	unsigned long startLoc = 0, endLoc = 0, cfa;
+	unsigned int i;
+	signed ptrType = -1;
+	uleb128_t retAddrReg = 0;
+	const struct unwind_table *table;
+	struct unwind_state state;
+	unsigned long *fptr;
+	unsigned long addr;
+
+	unw_debug("\n\nUNWIND FRAME:\n");
+	unw_debug("PC: 0x%lx BLINK: 0x%lx, SP: 0x%lx, FP: 0x%x\n",
+		  UNW_PC(frame), UNW_BLINK(frame), UNW_SP(frame),
+		  UNW_FP(frame));
+
+	if (UNW_PC(frame) == 0)
+		return -EINVAL;
+
+#ifdef UNWIND_DEBUG
+	{
+		unsigned long *sptr = (unsigned long *)UNW_SP(frame);
+		unw_debug("\nStack Dump:\n");
+		for (i = 0; i < 20; i++, sptr++)
+			unw_debug("0x%p:  0x%lx\n", sptr, *sptr);
+		unw_debug("\n");
+	}
+#endif
+
+	table = find_table(pc);
+	if (table != NULL
+	    && !(table->size & (sizeof(*fde) - 1))) {
+		const u8 *hdr = table->header;
+		unsigned long tableSize;
+
+		smp_rmb();
+		if (hdr && hdr[0] == 1) {
+			switch (hdr[3] & DW_EH_PE_FORM) {
+			case DW_EH_PE_native:
+				tableSize = sizeof(unsigned long);
+				break;
+			case DW_EH_PE_data2:
+				tableSize = 2;
+				break;
+			case DW_EH_PE_data4:
+				tableSize = 4;
+				break;
+			case DW_EH_PE_data8:
+				tableSize = 8;
+				break;
+			default:
+				tableSize = 0;
+				break;
+			}
+			ptr = hdr + 4;
+			end = hdr + table->hdrsz;
+			if (tableSize && read_pointer(&ptr, end, hdr[1])
+			    == (unsigned long)table->address
+			    && (i = read_pointer(&ptr, end, hdr[2])) > 0
+			    && i == (end - ptr) / (2 * tableSize)
+			    && !((end - ptr) % (2 * tableSize))) {
+				do {
+					const u8 *cur =
+					    ptr + (i / 2) * (2 * tableSize);
+
+					startLoc = read_pointer(&cur,
+								cur + tableSize,
+								hdr[3]);
+					if (pc < startLoc)
+						i /= 2;
+					else {
+						ptr = cur - tableSize;
+						i = (i + 1) / 2;
+					}
+				} while (startLoc && i > 1);
+				if (i == 1
+				    && (startLoc = read_pointer(&ptr,
+								ptr + tableSize,
+								hdr[3])) != 0
+				    && pc >= startLoc)
+					fde = (void *)read_pointer(&ptr,
+								   ptr +
+								   tableSize,
+								   hdr[3]);
+			}
+		}
+
+		if (fde != NULL) {
+			cie = cie_for_fde(fde, table);
+			ptr = (const u8 *)(fde + 2);
+			if (cie != NULL
+			    && cie != &bad_cie
+			    && cie != &not_fde
+			    && (ptrType = fde_pointer_type(cie)) >= 0
+			    && read_pointer(&ptr,
+					    (const u8 *)(fde + 1) + *fde,
+					    ptrType) == startLoc) {
+				if (!(ptrType & DW_EH_PE_indirect))
+					ptrType &=
+					    DW_EH_PE_FORM | DW_EH_PE_signed;
+				endLoc =
+				    startLoc + read_pointer(&ptr,
+							    (const u8 *)(fde +
+									 1) +
+							    *fde, ptrType);
+				if (pc >= endLoc) {
+					fde = NULL;
+					cie = NULL;
+				}
+			} else {
+				fde = NULL;
+				cie = NULL;
+			}
+		}
+	}
+	if (cie != NULL) {
+		memset(&state, 0, sizeof(state));
+		state.cieEnd = ptr;	/* keep here temporarily */
+		ptr = (const u8 *)(cie + 2);
+		end = (const u8 *)(cie + 1) + *cie;
+		frame->call_frame = 1;
+		if (*++ptr) {
+			/* check if augmentation size is first (thus present) */
+			if (*ptr == 'z') {
+				while (++ptr < end && *ptr) {
+					switch (*ptr) {
+					/* chk for ignorable or already handled
+					 * nul-terminated augmentation string */
+					case 'L':
+					case 'P':
+					case 'R':
+						continue;
+					case 'S':
+						frame->call_frame = 0;
+						continue;
+					default:
+						break;
+					}
+					break;
+				}
+			}
+			if (ptr >= end || *ptr)
+				cie = NULL;
+		}
+		++ptr;
+	}
+	if (cie != NULL) {
+		/* get code alignment factor */
+		state.codeAlign = get_uleb128(&ptr, end);
+		/* get data alignment factor */
+		state.dataAlign = get_sleb128(&ptr, end);
+		if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
+			cie = NULL;
+		else {
+			retAddrReg =
+			    state.version <= 1 ? *ptr++ : get_uleb128(&ptr,
+								      end);
+			unw_debug("CIE Frame Info:\n");
+			unw_debug("return Address register 0x%lx\n",
+				  retAddrReg);
+			unw_debug("data Align: %ld\n", state.dataAlign);
+			unw_debug("code Align: %lu\n", state.codeAlign);
+			/* skip augmentation */
+			if (((const char *)(cie + 2))[1] == 'z') {
+				uleb128_t augSize = get_uleb128(&ptr, end);
+
+				ptr += augSize;
+			}
+			if (ptr > end || retAddrReg >= ARRAY_SIZE(reg_info)
+			    || REG_INVALID(retAddrReg)
+			    || reg_info[retAddrReg].width !=
+			    sizeof(unsigned long))
+				cie = NULL;
+		}
+	}
+	if (cie != NULL) {
+		state.cieStart = ptr;
+		ptr = state.cieEnd;
+		state.cieEnd = end;
+		end = (const u8 *)(fde + 1) + *fde;
+		/* skip augmentation */
+		if (((const char *)(cie + 2))[1] == 'z') {
+			uleb128_t augSize = get_uleb128(&ptr, end);
+
+			if ((ptr += augSize) > end)
+				fde = NULL;
+		}
+	}
+	if (cie == NULL || fde == NULL) {
+#ifdef CONFIG_FRAME_POINTER
+		unsigned long top, bottom;
+
+		top = STACK_TOP_UNW(frame->task);
+		bottom = STACK_BOTTOM_UNW(frame->task);
+#if FRAME_RETADDR_OFFSET < 0
+		if (UNW_SP(frame) < top && UNW_FP(frame) <= UNW_SP(frame)
+		    && bottom < UNW_FP(frame)
+#else
+		if (UNW_SP(frame) > top && UNW_FP(frame) >= UNW_SP(frame)
+		    && bottom > UNW_FP(frame)
+#endif
+		    && !((UNW_SP(frame) | UNW_FP(frame))
+			 & (sizeof(unsigned long) - 1))) {
+			unsigned long link;
+
+			if (!__get_user(link, (unsigned long *)
+					(UNW_FP(frame) + FRAME_LINK_OFFSET))
+#if FRAME_RETADDR_OFFSET < 0
+			    && link > bottom && link < UNW_FP(frame)
+#else
+			    && link > UNW_FP(frame) && link < bottom
+#endif
+			    && !(link & (sizeof(link) - 1))
+			    && !__get_user(UNW_PC(frame),
+					   (unsigned long *)(UNW_FP(frame)
+						+ FRAME_RETADDR_OFFSET)))
+			{
+				UNW_SP(frame) =
+				    UNW_FP(frame) + FRAME_RETADDR_OFFSET
+#if FRAME_RETADDR_OFFSET < 0
+				    -
+#else
+				    +
+#endif
+				    sizeof(UNW_PC(frame));
+				UNW_FP(frame) = link;
+				return 0;
+			}
+		}
+#endif
+		return -ENXIO;
+	}
+	state.org = startLoc;
+	memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
+
+	unw_debug("\nProcess instructions\n");
+
+	/* process instructions
+	 * For ARC, we optimize by having blink(retAddrReg) with
+	 * the sameValue in the leaf function, so we should not check
+	 * state.regs[retAddrReg].where == Nowhere
+	 */
+	if (!processCFI(ptr, end, pc, ptrType, &state)
+	    || state.loc > endLoc
+/*	   || state.regs[retAddrReg].where == Nowhere */
+	    || state.cfa.reg >= ARRAY_SIZE(reg_info)
+	    || reg_info[state.cfa.reg].width != sizeof(unsigned long)
+	    || state.cfa.offs % sizeof(unsigned long))
+		return -EIO;
+
+#ifdef UNWIND_DEBUG
+	unw_debug("\n");
+
+	unw_debug("\nRegister State Based on the rules parsed from FDE:\n");
+	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+
+		if (REG_INVALID(i))
+			continue;
+
+		switch (state.regs[i].where) {
+		case Nowhere:
+			break;
+		case Memory:
+			unw_debug(" r%d: c(%lu),", i, state.regs[i].value);
+			break;
+		case Register:
+			unw_debug(" r%d: r(%lu),", i, state.regs[i].value);
+			break;
+		case Value:
+			unw_debug(" r%d: v(%lu),", i, state.regs[i].value);
+			break;
+		}
+	}
+
+	unw_debug("\n");
+#endif
+
+	/* update frame */
+	if (frame->call_frame
+	    && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
+		frame->call_frame = 0;
+	cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
+	startLoc = min_t(unsigned long, UNW_SP(frame), cfa);
+	endLoc = max_t(unsigned long, UNW_SP(frame), cfa);
+	if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
+		startLoc = min(STACK_LIMIT(cfa), cfa);
+		endLoc = max(STACK_LIMIT(cfa), cfa);
+	}
+
+	unw_debug("\nCFA reg: 0x%lx, offset: 0x%lx =>  0x%lx\n",
+		  state.cfa.reg, state.cfa.offs, cfa);
+
+	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
+		if (REG_INVALID(i)) {
+			if (state.regs[i].where == Nowhere)
+				continue;
+			return -EIO;
+		}
+		switch (state.regs[i].where) {
+		default:
+			break;
+		case Register:
+			if (state.regs[i].value >= ARRAY_SIZE(reg_info)
+			    || REG_INVALID(state.regs[i].value)
+			    || reg_info[i].width >
+			    reg_info[state.regs[i].value].width)
+				return -EIO;
+			switch (reg_info[state.regs[i].value].width) {
+			case sizeof(u8):
+				state.regs[i].value =
+				FRAME_REG(state.regs[i].value, const u8);
+				break;
+			case sizeof(u16):
+				state.regs[i].value =
+				FRAME_REG(state.regs[i].value, const u16);
+				break;
+			case sizeof(u32):
+				state.regs[i].value =
+				FRAME_REG(state.regs[i].value, const u32);
+				break;
+#ifdef CONFIG_64BIT
+			case sizeof(u64):
+				state.regs[i].value =
+				FRAME_REG(state.regs[i].value, const u64);
+				break;
+#endif
+			default:
+				return -EIO;
+			}
+			break;
+		}
+	}
+
+	unw_debug("\nRegister state after evaluation with realtime Stack:\n");
+	fptr = (unsigned long *)(&frame->regs);
+	for (i = 0; i < ARRAY_SIZE(state.regs); ++i, fptr++) {
+
+		if (REG_INVALID(i))
+			continue;
+		switch (state.regs[i].where) {
+		case Nowhere:
+			if (reg_info[i].width != sizeof(UNW_SP(frame))
+			    || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
+			    != &UNW_SP(frame))
+				continue;
+			UNW_SP(frame) = cfa;
+			break;
+		case Register:
+			switch (reg_info[i].width) {
+			case sizeof(u8):
+				FRAME_REG(i, u8) = state.regs[i].value;
+				break;
+			case sizeof(u16):
+				FRAME_REG(i, u16) = state.regs[i].value;
+				break;
+			case sizeof(u32):
+				FRAME_REG(i, u32) = state.regs[i].value;
+				break;
+#ifdef CONFIG_64BIT
+			case sizeof(u64):
+				FRAME_REG(i, u64) = state.regs[i].value;
+				break;
+#endif
+			default:
+				return -EIO;
+			}
+			break;
+		case Value:
+			if (reg_info[i].width != sizeof(unsigned long))
+				return -EIO;
+			FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
+			    * state.dataAlign;
+			break;
+		case Memory:
+			addr = cfa + state.regs[i].value * state.dataAlign;
+
+			if ((state.regs[i].value * state.dataAlign)
+			    % sizeof(unsigned long)
+			    || addr < startLoc
+			    || addr + sizeof(unsigned long) < addr
+			    || addr + sizeof(unsigned long) > endLoc)
+					return -EIO;
+
+			switch (reg_info[i].width) {
+			case sizeof(u8):
+				__get_user(FRAME_REG(i, u8),
+					   (u8 __user *)addr);
+				break;
+			case sizeof(u16):
+				__get_user(FRAME_REG(i, u16),
+					   (u16 __user *)addr);
+				break;
+			case sizeof(u32):
+				__get_user(FRAME_REG(i, u32),
+					   (u32 __user *)addr);
+				break;
+#ifdef CONFIG_64BIT
+			case sizeof(u64):
+				__get_user(FRAME_REG(i, u64),
+					   (u64 __user *)addr);
+				break;
+#endif
+			default:
+				return -EIO;
+			}
+
+			break;
+		}
+		unw_debug("r%d: 0x%lx ", i, *fptr);
+	}
+
+	return 0;
+#undef FRAME_REG
+}
+EXPORT_SYMBOL(arc_unwind);
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
new file mode 100644
index 0000000000..529ae50f9f
--- /dev/null
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
+
+OUTPUT_ARCH(arc)
+ENTRY(res_service)
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+jiffies = jiffies_64 + 4;
+#else
+jiffies = jiffies_64;
+#endif
+
+SECTIONS
+{
+	/*
+	 * ICCM starts at 0x8000_0000. So if kernel is relocated to some other
+	 * address, make sure peripheral at 0x8z doesn't clash with ICCM
+	 * Essentially vector is also in ICCM.
+	 */
+
+	. = CONFIG_LINUX_LINK_BASE;
+
+	_int_vec_base_lds = .;
+	.vector : {
+		*(.vector)
+		. = ALIGN(PAGE_SIZE);
+	}
+
+#ifdef CONFIG_ARC_HAS_ICCM
+	.text.arcfp : {
+		*(.text.arcfp)
+		. = ALIGN(CONFIG_ARC_ICCM_SZ * 1024);
+	}
+#endif
+
+	/*
+	 * The reason for having a seperate subsection .init.ramfs is to
+	 * prevent objump from including it in kernel dumps
+	 *
+	 * Reason for having .init.ramfs above .init is to make sure that the
+	 * binary blob is tucked away to one side, reducing the displacement
+	 * between .init.text and .text, avoiding any possible relocation
+	 * errors because of calls from .init.text to .text
+	 * Yes such calls do exist. e.g.
+	 *	decompress_inflate.c:gunzip( ) -> zlib_inflate_workspace( )
+	 */
+
+	__init_begin = .;
+
+	.init.ramfs : { INIT_RAM_FS }
+
+	. = ALIGN(PAGE_SIZE);
+
+	HEAD_TEXT_SECTION
+	INIT_TEXT_SECTION(L1_CACHE_BYTES)
+
+	/* INIT_DATA_SECTION open-coded: special INIT_RAM_FS handling */
+	.init.data : {
+		INIT_DATA
+		INIT_SETUP(L1_CACHE_BYTES)
+		INIT_CALLS
+		CON_INITCALL
+	}
+
+	.init.arch.info : {
+		__arch_info_begin = .;
+		*(.arch.info.init)
+		__arch_info_end = .;
+	}
+
+	PERCPU_SECTION(L1_CACHE_BYTES)
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+	.text : {
+		_text = .;
+		_stext = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		CPUIDLE_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+	}
+	EXCEPTION_TABLE(L1_CACHE_BYTES)
+	_etext = .;
+
+	_sdata = .;
+	RO_DATA(PAGE_SIZE)
+
+	/*
+	 * 1. this is .data essentially
+	 * 2. THREAD_SIZE for init.task, must be kernel-stk sz aligned
+	 */
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+	_edata = .;
+
+	BSS_SECTION(4, 4, 4)
+
+#ifdef CONFIG_ARC_DW2_UNWIND
+	. = ALIGN(PAGE_SIZE);
+	.eh_frame  : {
+		__start_unwind = .;
+		*(.eh_frame)
+		__end_unwind = .;
+	}
+#else
+	/DISCARD/ : {	*(.eh_frame) }
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	_end = . ;
+
+	STABS_DEBUG
+	ELF_DETAILS
+	DISCARDS
+
+	.arcextmap 0 : {
+		*(.gnu.linkonce.arcextmap.*)
+		*(.arcextmap.*)
+	}
+
+#ifndef CONFIG_DEBUG_INFO
+	/DISCARD/ : { *(.debug_frame) }
+	/DISCARD/ : { *(.debug_aranges) }
+	/DISCARD/ : { *(.debug_pubnames) }
+	/DISCARD/ : { *(.debug_info) }
+	/DISCARD/ : { *(.debug_abbrev) }
+	/DISCARD/ : { *(.debug_line) }
+	/DISCARD/ : { *(.debug_str) }
+	/DISCARD/ : { *(.debug_loc) }
+	/DISCARD/ : { *(.debug_macinfo) }
+	/DISCARD/ : { *(.debug_ranges) }
+#endif
+
+#ifdef CONFIG_ARC_HAS_DCCM
+	. = CONFIG_ARC_DCCM_BASE;
+	__arc_dccm_base = .;
+	.data.arcfp : {
+		*(.data.arcfp)
+	}
+	. = ALIGN(CONFIG_ARC_DCCM_SZ * 1024);
+#endif
+}
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
new file mode 100644
index 0000000000..30158ae69f
--- /dev/null
+++ b/arch/arc/lib/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+
+lib-y	:= strchr-700.o strcpy-700.o strlen.o memcmp.o
+
+lib-$(CONFIG_ISA_ARCOMPACT)	+= memcpy-700.o memset.o strcmp.o
+lib-$(CONFIG_ISA_ARCV2)		+= memset-archs.o strcmp-archs.o
+
+ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS
+lib-$(CONFIG_ISA_ARCV2)		+=memcpy-archs-unaligned.o
+else
+lib-$(CONFIG_ISA_ARCV2)		+=memcpy-archs.o
+endif
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
new file mode 100644
index 0000000000..d6dc5e9bc4
--- /dev/null
+++ b/arch/arc/lib/memcmp.S
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define WORD2 r2
+#define SHIFT r3
+#else /* BIG ENDIAN */
+#define WORD2 r3
+#define SHIFT r2
+#endif
+
+ENTRY_CFI(memcmp)
+	or	r12,r0,r1
+	asl_s	r12,r12,30
+	sub	r3,r2,1
+	brls	r2,r12,.Lbytewise
+	ld	r4,[r0,0]
+	ld	r5,[r1,0]
+	lsr.f	lp_count,r3,3
+#ifdef CONFIG_ISA_ARCV2
+	/* In ARCv2 a branch can't be the last instruction in a zero overhead
+	 * loop.
+	 * So we move the branch to the start of the loop, duplicate it
+	 * after the end, and set up r12 so that the branch isn't taken
+	 *  initially.
+	 */
+	mov_s	r12,WORD2
+	lpne	.Loop_end
+	brne	WORD2,r12,.Lodd
+	ld	WORD2,[r0,4]
+#else
+	lpne	.Loop_end
+	ld_s	WORD2,[r0,4]
+#endif
+	ld_s	r12,[r1,4]
+	brne	r4,r5,.Leven
+	ld.a	r4,[r0,8]
+	ld.a	r5,[r1,8]
+#ifdef CONFIG_ISA_ARCV2
+.Loop_end:
+	brne	WORD2,r12,.Lodd
+#else
+	brne	WORD2,r12,.Lodd
+.Loop_end:
+#endif
+	asl_s	SHIFT,SHIFT,3
+	bhs_s	.Last_cmp
+	brne	r4,r5,.Leven
+	ld	r4,[r0,4]
+	ld	r5,[r1,4]
+#ifdef __LITTLE_ENDIAN__
+	nop_s
+	; one more load latency cycle
+.Last_cmp:
+	xor	r0,r4,r5
+	bset	r0,r0,SHIFT
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	b.d	.Leven_cmp
+	and	r1,r1,24
+.Leven:
+	xor	r0,r4,r5
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	; slow track insn
+	and	r1,r1,24
+.Leven_cmp:
+	asl	r2,r4,r1
+	asl	r12,r5,r1
+	lsr_s	r2,r2,1
+	lsr_s	r12,r12,1
+	j_s.d	[blink]
+	sub	r0,r2,r12
+	.balign	4
+.Lodd:
+	xor	r0,WORD2,r12
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	; slow track insn
+	and	r1,r1,24
+	asl_s	r2,r2,r1
+	asl_s	r12,r12,r1
+	lsr_s	r2,r2,1
+	lsr_s	r12,r12,1
+	j_s.d	[blink]
+	sub	r0,r2,r12
+#else /* BIG ENDIAN */
+.Last_cmp:
+	neg_s	SHIFT,SHIFT
+	lsr	r4,r4,SHIFT
+	lsr	r5,r5,SHIFT
+	; slow track insn
+.Leven:
+	sub.f	r0,r4,r5
+	mov.ne	r0,1
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+.Lodd:
+	cmp_s	WORD2,r12
+	mov_s	r0,1
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+#endif /* ENDIAN */
+	.balign	4
+.Lbytewise:
+	breq	r2,0,.Lnil
+	ldb	r4,[r0,0]
+	ldb	r5,[r1,0]
+	lsr.f	lp_count,r3
+#ifdef CONFIG_ISA_ARCV2
+	mov	r12,r3
+	lpne	.Lbyte_end
+	brne	r3,r12,.Lbyte_odd
+#else
+	lpne	.Lbyte_end
+#endif
+	ldb_s	r3,[r0,1]
+	ldb	r12,[r1,1]
+	brne	r4,r5,.Lbyte_even
+	ldb.a	r4,[r0,2]
+	ldb.a	r5,[r1,2]
+#ifdef CONFIG_ISA_ARCV2
+.Lbyte_end:
+	brne	r3,r12,.Lbyte_odd
+#else
+	brne	r3,r12,.Lbyte_odd
+.Lbyte_end:
+#endif
+	bcc	.Lbyte_even
+	brne	r4,r5,.Lbyte_even
+	ldb_s	r3,[r0,1]
+	ldb_s	r12,[r1,1]
+.Lbyte_odd:
+	j_s.d	[blink]
+	sub	r0,r3,r12
+.Lbyte_even:
+	j_s.d	[blink]
+	sub	r0,r4,r5
+.Lnil:
+	j_s.d	[blink]
+	mov	r0,0
+END_CFI(memcmp)
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
new file mode 100644
index 0000000000..f2e239e219
--- /dev/null
+++ b/arch/arc/lib/memcpy-700.S
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/linkage.h>
+
+ENTRY_CFI(memcpy)
+	or	r3,r0,r1
+	asl_s	r3,r3,30
+	mov_s	r5,r0
+	brls.d	r2,r3,.Lcopy_bytewise
+	sub.f	r3,r2,1
+	ld_s	r12,[r1,0]
+	asr.f	lp_count,r3,3
+	bbit0.d	r3,2,.Lnox4
+	bmsk_s	r2,r2,1
+	st.ab	r12,[r5,4]
+	ld.a	r12,[r1,4]
+.Lnox4:
+	lppnz	.Lendloop
+	ld_s	r3,[r1,4]
+	st.ab	r12,[r5,4]
+	ld.a	r12,[r1,8]
+	st.ab	r3,[r5,4]
+.Lendloop:
+	breq	r2,0,.Last_store
+	ld	r3,[r5,0]
+#ifdef __LITTLE_ENDIAN__
+	add3	r2,-1,r2
+	; uses long immediate
+	xor_s	r12,r12,r3
+	bmsk	r12,r12,r2
+    xor_s	r12,r12,r3
+#else /* BIG ENDIAN */
+	sub3	r2,31,r2
+	; uses long immediate
+        xor_s	r3,r3,r12
+        bmsk	r3,r3,r2
+        xor_s	r12,r12,r3
+#endif /* ENDIAN */
+.Last_store:
+	j_s.d	[blink]
+	st	r12,[r5,0]
+
+	.balign	4
+.Lcopy_bytewise:
+	jcs	[blink]
+	ldb_s	r12,[r1,0]
+	lsr.f	lp_count,r3
+	bhs_s	.Lnox1
+	stb.ab	r12,[r5,1]
+	ldb.a	r12,[r1,1]
+.Lnox1:
+	lppnz	.Lendbloop
+	ldb_s	r3,[r1,1]
+	stb.ab	r12,[r5,1]
+	ldb.a	r12,[r1,2]
+	stb.ab	r3,[r5,1]
+.Lendbloop:
+	j_s.d	[blink]
+	stb	r12,[r5,0]
+END_CFI(memcpy)
diff --git a/arch/arc/lib/memcpy-archs-unaligned.S b/arch/arc/lib/memcpy-archs-unaligned.S
new file mode 100644
index 0000000000..28993a73fd
--- /dev/null
+++ b/arch/arc/lib/memcpy-archs-unaligned.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * ARCv2 memcpy implementation optimized for unaligned memory access using.
+ *
+ * Copyright (C) 2019 Synopsys
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#include <linux/linkage.h>
+
+#ifdef CONFIG_ARC_HAS_LL64
+# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
+# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
+# define ZOLSHFT		5
+# define ZOLAND			0x1F
+#else
+# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
+# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
+# define ZOLSHFT		4
+# define ZOLAND			0xF
+#endif
+
+ENTRY_CFI(memcpy)
+	mov	r3, r0		; don;t clobber ret val
+
+	lsr.f	lp_count, r2, ZOLSHFT
+	lpnz	@.Lcopy32_64bytes
+	;; LOOP START
+	LOADX	(r6, r1)
+	LOADX	(r8, r1)
+	LOADX	(r10, r1)
+	LOADX	(r4, r1)
+	STOREX	(r6, r3)
+	STOREX	(r8, r3)
+	STOREX	(r10, r3)
+	STOREX	(r4, r3)
+.Lcopy32_64bytes:
+
+	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
+	lpnz	@.Lcopyremainingbytes
+	;; LOOP START
+	ldb.ab	r5, [r1, 1]
+	stb.ab	r5, [r3, 1]
+.Lcopyremainingbytes:
+
+	j	[blink]
+END_CFI(memcpy)
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S
new file mode 100644
index 0000000000..0051a84f60
--- /dev/null
+++ b/arch/arc/lib/memcpy-archs.S
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
+# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
+# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
+# define MERGE_2(RX,RY,IMM)
+# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
+# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
+#else
+# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
+# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
+# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
+# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
+# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
+# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
+#endif
+
+#ifdef CONFIG_ARC_HAS_LL64
+# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
+# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
+# define ZOLSHFT		5
+# define ZOLAND			0x1F
+#else
+# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
+# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
+# define ZOLSHFT		4
+# define ZOLAND			0xF
+#endif
+
+ENTRY_CFI(memcpy)
+	mov.f	0, r2
+;;; if size is zero
+	jz.d	[blink]
+	mov	r3, r0		; don;t clobber ret val
+
+;;; if size <= 8
+	cmp	r2, 8
+	bls.d	@.Lsmallchunk
+	mov.f	lp_count, r2
+
+	and.f	r4, r0, 0x03
+	rsub	lp_count, r4, 4
+	lpnz	@.Laligndestination
+	;; LOOP BEGIN
+	ldb.ab	r5, [r1,1]
+	sub	r2, r2, 1
+	stb.ab	r5, [r3,1]
+.Laligndestination:
+
+;;; Check the alignment of the source
+	and.f	r4, r1, 0x03
+	bnz.d	@.Lsourceunaligned
+
+;;; CASE 0: Both source and destination are 32bit aligned
+;;; Convert len to Dwords, unfold x4
+	lsr.f	lp_count, r2, ZOLSHFT
+	lpnz	@.Lcopy32_64bytes
+	;; LOOP START
+	LOADX (r6, r1)
+	LOADX (r8, r1)
+	LOADX (r10, r1)
+	LOADX (r4, r1)
+	STOREX (r6, r3)
+	STOREX (r8, r3)
+	STOREX (r10, r3)
+	STOREX (r4, r3)
+.Lcopy32_64bytes:
+
+	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
+.Lsmallchunk:
+	lpnz	@.Lcopyremainingbytes
+	;; LOOP START
+	ldb.ab	r5, [r1,1]
+	stb.ab	r5, [r3,1]
+.Lcopyremainingbytes:
+
+	j	[blink]
+;;; END CASE 0
+
+.Lsourceunaligned:
+	cmp	r4, 2
+	beq.d	@.LunalignedOffby2
+	sub	r2, r2, 1
+
+	bhi.d	@.LunalignedOffby3
+	ldb.ab	r5, [r1, 1]
+
+;;; CASE 1: The source is unaligned, off by 1
+	;; Hence I need to read 1 byte for a 16bit alignment
+	;; and 2bytes to reach 32bit alignment
+	ldh.ab	r6, [r1, 2]
+	sub	r2, r2, 2
+	;; Convert to words, unfold x2
+	lsr.f	lp_count, r2, 3
+	MERGE_1 (r6, r6, 8)
+	MERGE_2 (r5, r5, 24)
+	or	r5, r5, r6
+
+	;; Both src and dst are aligned
+	lpnz	@.Lcopy8bytes_1
+	;; LOOP START
+	ld.ab	r6, [r1, 4]
+	ld.ab	r8, [r1,4]
+
+	SHIFT_1	(r7, r6, 24)
+	or	r7, r7, r5
+	SHIFT_2	(r5, r6, 8)
+
+	SHIFT_1	(r9, r8, 24)
+	or	r9, r9, r5
+	SHIFT_2	(r5, r8, 8)
+
+	st.ab	r7, [r3, 4]
+	st.ab	r9, [r3, 4]
+.Lcopy8bytes_1:
+
+	;; Write back the remaining 16bits
+	EXTRACT_1 (r6, r5, 16)
+	sth.ab	r6, [r3, 2]
+	;; Write back the remaining 8bits
+	EXTRACT_2 (r5, r5, 16)
+	stb.ab	r5, [r3, 1]
+
+	and.f	lp_count, r2, 0x07 ;Last 8bytes
+	lpnz	@.Lcopybytewise_1
+	;; LOOP START
+	ldb.ab	r6, [r1,1]
+	stb.ab	r6, [r3,1]
+.Lcopybytewise_1:
+	j	[blink]
+
+.LunalignedOffby2:
+;;; CASE 2: The source is unaligned, off by 2
+	ldh.ab	r5, [r1, 2]
+	sub	r2, r2, 1
+
+	;; Both src and dst are aligned
+	;; Convert to words, unfold x2
+	lsr.f	lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+	asl.nz	r5, r5, 16
+#endif
+	lpnz	@.Lcopy8bytes_2
+	;; LOOP START
+	ld.ab	r6, [r1, 4]
+	ld.ab	r8, [r1,4]
+
+	SHIFT_1	(r7, r6, 16)
+	or	r7, r7, r5
+	SHIFT_2	(r5, r6, 16)
+
+	SHIFT_1	(r9, r8, 16)
+	or	r9, r9, r5
+	SHIFT_2	(r5, r8, 16)
+
+	st.ab	r7, [r3, 4]
+	st.ab	r9, [r3, 4]
+.Lcopy8bytes_2:
+
+#ifdef __BIG_ENDIAN__
+	lsr.nz	r5, r5, 16
+#endif
+	sth.ab	r5, [r3, 2]
+
+	and.f	lp_count, r2, 0x07 ;Last 8bytes
+	lpnz	@.Lcopybytewise_2
+	;; LOOP START
+	ldb.ab	r6, [r1,1]
+	stb.ab	r6, [r3,1]
+.Lcopybytewise_2:
+	j	[blink]
+
+.LunalignedOffby3:
+;;; CASE 3: The source is unaligned, off by 3
+;;; Hence, I need to read 1byte for achieve the 32bit alignment
+
+	;; Both src and dst are aligned
+	;; Convert to words, unfold x2
+	lsr.f	lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+	asl.ne	r5, r5, 24
+#endif
+	lpnz	@.Lcopy8bytes_3
+	;; LOOP START
+	ld.ab	r6, [r1, 4]
+	ld.ab	r8, [r1,4]
+
+	SHIFT_1	(r7, r6, 8)
+	or	r7, r7, r5
+	SHIFT_2	(r5, r6, 24)
+
+	SHIFT_1	(r9, r8, 8)
+	or	r9, r9, r5
+	SHIFT_2	(r5, r8, 24)
+
+	st.ab	r7, [r3, 4]
+	st.ab	r9, [r3, 4]
+.Lcopy8bytes_3:
+
+#ifdef __BIG_ENDIAN__
+	lsr.nz	r5, r5, 24
+#endif
+	stb.ab	r5, [r3, 1]
+
+	and.f	lp_count, r2, 0x07 ;Last 8bytes
+	lpnz	@.Lcopybytewise_3
+	;; LOOP START
+	ldb.ab	r6, [r1,1]
+	stb.ab	r6, [r3,1]
+.Lcopybytewise_3:
+	j	[blink]
+
+END_CFI(memcpy)
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
new file mode 100644
index 0000000000..d2e09fece5
--- /dev/null
+++ b/arch/arc/lib/memset-archs.S
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/linkage.h>
+#include <asm/cache.h>
+
+/*
+ * The memset implementation below is optimized to use prefetchw and prealloc
+ * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
+ * If you want to implement optimized memset for other possible L1 data cache
+ * line lengths (32B and 128B) you should rewrite code carefully checking
+ * we don't call any prefetchw/prealloc instruction for L1 cache lines which
+ * don't belongs to memset area.
+ */
+
+#if L1_CACHE_SHIFT == 6
+
+.macro PREALLOC_INSTR	reg, off
+	prealloc	[\reg, \off]
+.endm
+
+.macro PREFETCHW_INSTR	reg, off
+	prefetchw	[\reg, \off]
+.endm
+
+#else
+
+.macro PREALLOC_INSTR	reg, off
+.endm
+
+.macro PREFETCHW_INSTR	reg, off
+.endm
+
+#endif
+
+ENTRY_CFI(memset)
+	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
+	mov.f	0, r2
+;;; if size is zero
+	jz.d	[blink]
+	mov	r3, r0		; don't clobber ret val
+
+;;; if length < 8
+	brls.d.nt	r2, 8, .Lsmallchunk
+	mov.f	lp_count,r2
+
+	and.f	r4, r0, 0x03
+	rsub	lp_count, r4, 4
+	lpnz	@.Laligndestination
+	;; LOOP BEGIN
+	stb.ab	r1, [r3,1]
+	sub	r2, r2, 1
+.Laligndestination:
+
+;;; Destination is aligned
+	and	r1, r1, 0xFF
+	asl	r4, r1, 8
+	or	r4, r4, r1
+	asl	r5, r4, 16
+	or	r5, r5, r4
+	mov	r4, r5
+
+	sub3	lp_count, r2, 8
+	cmp     r2, 64
+	bmsk.hi	r2, r2, 5
+	mov.ls	lp_count, 0
+	add3.hi	r2, r2, 8
+
+;;; Convert len to Dwords, unfold x8
+	lsr.f	lp_count, lp_count, 6
+
+	lpnz	@.Lset64bytes
+	;; LOOP START
+	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
+
+#ifdef CONFIG_ARC_HAS_LL64
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+#else
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+#endif
+.Lset64bytes:
+
+	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
+	lpnz	.Lset32bytes
+	;; LOOP START
+#ifdef CONFIG_ARC_HAS_LL64
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r4, [r3, 8]
+#else
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r4, [r3, 4]
+#endif
+.Lset32bytes:
+
+	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
+.Lsmallchunk:
+	lpnz	.Lcopy3bytes
+	;; LOOP START
+	stb.ab	r1, [r3, 1]
+.Lcopy3bytes:
+
+	j	[blink]
+
+END_CFI(memset)
+
+ENTRY_CFI(memzero)
+    ; adjust bzero args to memset args
+    mov r2, r1
+    b.d  memset    ;tail call so need to tinker with blink
+    mov r1, 0
+END_CFI(memzero)
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
new file mode 100644
index 0000000000..9f35960da1
--- /dev/null
+++ b/arch/arc/lib/memset.S
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/linkage.h>
+
+#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
+
+ENTRY_CFI(memset)
+	mov_s	r4,r0
+	or	r12,r0,r2
+	bmsk.f	r12,r12,1
+	extb_s	r1,r1
+	asl	r3,r1,8
+	beq.d	.Laligned
+	or_s	r1,r1,r3
+	brls	r2,SMALL,.Ltiny
+	add	r3,r2,r0
+	stb	r1,[r3,-1]
+	bclr_s	r3,r3,0
+	stw	r1,[r3,-2]
+	bmsk.f	r12,r0,1
+	add_s	r2,r2,r12
+	sub.ne	r2,r2,4
+	stb.ab	r1,[r4,1]
+	and	r4,r4,-2
+	stw.ab	r1,[r4,2]
+	and	r4,r4,-4
+.Laligned:	; This code address should be aligned for speed.
+	asl	r3,r1,16
+	lsr.f	lp_count,r2,2
+	or_s	r1,r1,r3
+	lpne	.Loop_end
+	st.ab	r1,[r4,4]
+.Loop_end:
+	j_s	[blink]
+
+	.balign	4
+.Ltiny:
+	mov.f	lp_count,r2
+	lpne	.Ltiny_end
+	stb.ab	r1,[r4,1]
+.Ltiny_end:
+	j_s	[blink]
+END_CFI(memset)
+
+; memzero: @r0 = mem, @r1 = size_t
+; memset:  @r0 = mem, @r1 = char, @r2 = size_t
+
+ENTRY_CFI(memzero)
+    ; adjust bzero args to memset args
+    mov r2, r1
+    mov r1, 0
+    b  memset    ;tail call so need to tinker with blink
+END_CFI(memzero)
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
new file mode 100644
index 0000000000..d52e2833f9
--- /dev/null
+++ b/arch/arc/lib/strchr-700.S
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/* ARC700 has a relatively long pipeline and branch prediction, so we want
+   to avoid branches that are hard to predict.  On the other hand, the
+   presence of the norm instruction makes it easier to operate on whole
+   words branch-free.  */
+
+#include <linux/linkage.h>
+
+ENTRY_CFI(strchr)
+	extb_s	r1,r1
+	asl	r5,r1,8
+	bmsk	r2,r0,1
+	or	r5,r5,r1
+	mov_s	r3,0x01010101
+	breq.d	r2,r0,.Laligned
+	asl	r4,r5,16
+	sub_s	r0,r0,r2
+	asl	r7,r2,3
+	ld_s	r2,[r0]
+#ifdef __LITTLE_ENDIAN__
+	asl	r7,r3,r7
+#else
+	lsr	r7,r3,r7
+#endif
+	or	r5,r5,r4
+	ror	r4,r3
+	sub	r12,r2,r7
+	bic_s	r12,r12,r2
+	and	r12,r12,r4
+	brne.d	r12,0,.Lfound0_ua
+	xor	r6,r2,r5
+	ld.a	r2,[r0,4]
+	sub	r12,r6,r7
+	bic	r12,r12,r6
+#ifdef __LITTLE_ENDIAN__
+	and	r7,r12,r4
+	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
+	b	.Lfound_char ; Likewise this one.
+#else
+	and	r12,r12,r4
+	breq	r12,0,.Loop ; For speed, we want this branch to be unaligned.
+	lsr_s	r12,r12,7
+	bic 	r2,r7,r6
+	b.d	.Lfound_char_b
+	and_s	r2,r2,r12
+#endif
+; /* We require this code address to be unaligned for speed...  */
+.Laligned:
+	ld_s	r2,[r0]
+	or	r5,r5,r4
+	ror	r4,r3
+; /* ... so that this code address is aligned, for itself and ...  */
+.Loop:
+	sub	r12,r2,r3
+	bic_s	r12,r12,r2
+	and	r12,r12,r4
+	brne.d	r12,0,.Lfound0
+	xor	r6,r2,r5
+	ld.a	r2,[r0,4]
+	sub	r12,r6,r3
+	bic	r12,r12,r6
+	and	r7,r12,r4
+	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
+	; Found searched-for character.  r0 has already advanced to next word.
+#ifdef __LITTLE_ENDIAN__
+/* We only need the information about the first matching byte
+   (i.e. the least significant matching byte) to be exact,
+   hence there is no problem with carry effects.  */
+.Lfound_char:
+	sub	r3,r7,1
+	bic	r3,r3,r7
+	norm	r2,r3
+	sub_s	r0,r0,1
+	asr_s	r2,r2,3
+	j.d	[blink]
+	sub_s	r0,r0,r2
+
+	.balign	4
+.Lfound0_ua:
+	mov	r3,r7
+.Lfound0:
+	sub	r3,r6,r3
+	bic	r3,r3,r6
+	and	r2,r3,r4
+	or_s	r12,r12,r2
+	sub_s	r3,r12,1
+	bic_s	r3,r3,r12
+	norm	r3,r3
+	add_s	r0,r0,3
+	asr_s	r12,r3,3
+	asl.f	0,r2,r3
+	sub_s	r0,r0,r12
+	j_s.d	[blink]
+	mov.pl	r0,0
+#else /* BIG ENDIAN */
+.Lfound_char:
+	lsr	r7,r7,7
+
+	bic	r2,r7,r6
+.Lfound_char_b:
+	norm	r2,r2
+	sub_s	r0,r0,4
+	asr_s	r2,r2,3
+	j.d	[blink]
+	add_s	r0,r0,r2
+
+.Lfound0_ua:
+	mov_s	r3,r7
+.Lfound0:
+	asl_s	r2,r2,7
+	or	r7,r6,r4
+	bic_s	r12,r12,r2
+	sub	r2,r7,r3
+	or	r2,r2,r6
+	bic	r12,r2,r12
+	bic.f	r3,r4,r12
+	norm	r3,r3
+
+	add.pl	r3,r3,1
+	asr_s	r12,r3,3
+	asl.f	0,r2,r3
+	add_s	r0,r0,r12
+	j_s.d	[blink]
+	mov.mi	r0,0
+#endif /* ENDIAN */
+END_CFI(strchr)
diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S
new file mode 100644
index 0000000000..7cffb37174
--- /dev/null
+++ b/arch/arc/lib/strcmp-archs.S
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/linkage.h>
+
+ENTRY_CFI(strcmp)
+	or	r2, r0, r1
+	bmsk_s	r2, r2, 1
+	brne	r2, 0, @.Lcharloop
+
+;;; s1 and s2 are word aligned
+	ld.ab	r2, [r0, 4]
+
+	mov_s	r12, 0x01010101
+	ror	r11, r12
+	.align  4
+.LwordLoop:
+	ld.ab	r3, [r1, 4]
+	;; Detect NULL char in str1
+	sub	r4, r2, r12
+	ld.ab	r5, [r0, 4]
+	bic	r4, r4, r2
+	and	r4, r4, r11
+	brne.d.nt	r4, 0, .LfoundNULL
+	;; Check if the read locations are the same
+	cmp	r2, r3
+	beq.d	.LwordLoop
+	mov.eq	r2, r5
+
+	;; A match is found, spot it out
+#ifdef __LITTLE_ENDIAN__
+	swape	r3, r3
+	mov_s	r0, 1
+	swape	r2, r2
+#else
+	mov_s	r0, 1
+#endif
+	cmp_s	r2, r3
+	j_s.d	[blink]
+	bset.lo	r0, r0, 31
+
+	.align 4
+.LfoundNULL:
+#ifdef __BIG_ENDIAN__
+	swape	r4, r4
+	swape	r2, r2
+	swape	r3, r3
+#endif
+	;; Find null byte
+	ffs	r0, r4
+	bmsk	r2, r2, r0
+	bmsk	r3, r3, r0
+	swape	r2, r2
+	swape	r3, r3
+	;; make the return value
+	sub.f	r0, r2, r3
+	mov.hi	r0, 1
+	j_s.d	[blink]
+	bset.lo	r0, r0, 31
+
+	.align 4
+.Lcharloop:
+	ldb.ab	r2, [r0, 1]
+	ldb.ab	r3, [r1, 1]
+	nop
+	breq	r2, 0, .Lcmpend
+	breq	r2, r3, .Lcharloop
+
+	.align 4
+.Lcmpend:
+	j_s.d	[blink]
+	sub	r0, r2, r3
+END_CFI(strcmp)
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
new file mode 100644
index 0000000000..b20c98fb3b
--- /dev/null
+++ b/arch/arc/lib/strcmp.S
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/* This is optimized primarily for the ARC700.
+   It would be possible to speed up the loops by one cycle / word
+   respective one cycle / byte by forcing double source 1 alignment, unrolling
+   by a factor of two, and speculatively loading the second word / byte of
+   source 1; however, that would increase the overhead for loop setup / finish,
+   and strcmp might often terminate early.  */
+
+#include <linux/linkage.h>
+
+ENTRY_CFI(strcmp)
+	or	r2,r0,r1
+	bmsk_s	r2,r2,1
+	brne	r2,0,.Lcharloop
+	mov_s	r12,0x01010101
+	ror	r5,r12
+.Lwordloop:
+	ld.ab	r2,[r0,4]
+	ld.ab	r3,[r1,4]
+	nop_s
+	sub	r4,r2,r12
+	bic	r4,r4,r2
+	and	r4,r4,r5
+	brne	r4,0,.Lfound0
+	breq	r2,r3,.Lwordloop
+#ifdef	__LITTLE_ENDIAN__
+	xor	r0,r2,r3	; mask for difference
+	sub_s	r1,r0,1
+	bic_s	r0,r0,r1	; mask for least significant difference bit
+	sub	r1,r5,r0
+	xor	r0,r5,r1	; mask for least significant difference byte
+	and_s	r2,r2,r0
+	and_s	r3,r3,r0
+#endif /* LITTLE ENDIAN */
+	cmp_s	r2,r3
+	mov_s	r0,1
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+
+	.balign	4
+#ifdef __LITTLE_ENDIAN__
+.Lfound0:
+	xor	r0,r2,r3	; mask for difference
+	or	r0,r0,r4	; or in zero indicator
+	sub_s	r1,r0,1
+	bic_s	r0,r0,r1	; mask for least significant difference bit
+	sub	r1,r5,r0
+	xor	r0,r5,r1	; mask for least significant difference byte
+	and_s	r2,r2,r0
+	and_s	r3,r3,r0
+	sub.f	r0,r2,r3
+	mov.hi	r0,1
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+#else /* BIG ENDIAN */
+	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
+	   because of carry-propagateion from a lower significant zero byte.
+	   We can compensate for this by checking that bit0 is zero.
+	   This compensation is not necessary in the step where we
+	   get a low estimate for r2, because in any affected bytes
+	   we already have 0x00 or 0x01, which will remain unchanged
+	   when bit 7 is cleared.  */
+	.balign	4
+.Lfound0:
+	lsr	r0,r4,8
+	lsr_s	r1,r2
+	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
+	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
+	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
+	cmp_s	r3,r2		; ... be independent of trailing garbage
+	or_s	r2,r2,r0	; likewise for r3 > r2
+	bic_s	r3,r3,r0
+	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
+	cmp_s	r2,r3
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+#endif /* ENDIAN */
+
+	.balign	4
+.Lcharloop:
+	ldb.ab	r2,[r0,1]
+	ldb.ab	r3,[r1,1]
+	nop_s
+	breq	r2,0,.Lcmpend
+	breq	r2,r3,.Lcharloop
+.Lcmpend:
+	j_s.d	[blink]
+	sub	r0,r2,r3
+END_CFI(strcmp)
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 0000000000..6e2294d13e
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
+   If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+   it 8 byte aligned.  Thus, we can do a little read-ahead, without
+   dereferencing a cache line that we should not touch.
+   Note that short and long instructions have been scheduled to avoid
+   branch stalls.
+   The beq_s to r3z could be made unaligned & long to avoid a stall
+   there, but the it is not likely to be taken often, and it
+   would also be likey to cost an unaligned mispredict at the next call.  */
+
+#include <linux/linkage.h>
+
+ENTRY_CFI(strcpy)
+	or	r2,r0,r1
+	bmsk_s	r2,r2,1
+	brne.d	r2,0,charloop
+	mov_s	r10,r0
+	ld_s	r3,[r1,0]
+	mov	r8,0x01010101
+	bbit0.d	r1,2,loop_start
+	ror	r12,r8
+	sub	r2,r3,r8
+	bic_s	r2,r2,r3
+	tst_s	r2,r12
+	bne	r3z
+	mov_s	r4,r3
+	.balign 4
+loop:
+	ld.a	r3,[r1,4]
+	st.ab	r4,[r10,4]
+loop_start:
+	ld.a	r4,[r1,4]
+	sub	r2,r3,r8
+	bic_s	r2,r2,r3
+	tst_s	r2,r12
+	bne_s	r3z
+	st.ab	r3,[r10,4]
+	sub	r2,r4,r8
+	bic	r2,r2,r4
+	tst	r2,r12
+	beq	loop
+	mov_s	r3,r4
+#ifdef __LITTLE_ENDIAN__
+r3z:	bmsk.f	r1,r3,7
+	lsr_s	r3,r3,8
+#else
+r3z:	lsr.f	r1,r3,24
+	asl_s	r3,r3,8
+#endif
+	bne.d	r3z
+	stb.ab	r1,[r10,1]
+	j_s	[blink]
+
+	.balign	4
+charloop:
+	ldb.ab	r3,[r1,1]
+
+
+	brne.d	r3,0,charloop
+	stb.ab	r3,[r10,1]
+	j	[blink]
+END_CFI(strcpy)
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
new file mode 100644
index 0000000000..dae428ceb8
--- /dev/null
+++ b/arch/arc/lib/strlen.S
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/linkage.h>
+
+ENTRY_CFI(strlen)
+	or	r3,r0,7
+	ld	r2,[r3,-7]
+	ld.a	r6,[r3,-3]
+	mov	r4,0x01010101
+	; uses long immediate
+#ifdef __LITTLE_ENDIAN__
+	asl_s	r1,r0,3
+	btst_s	r0,2
+	asl	r7,r4,r1
+	ror	r5,r4
+	sub	r1,r2,r7
+	bic_s	r1,r1,r2
+	mov.eq	r7,r4
+	sub	r12,r6,r7
+	bic	r12,r12,r6
+	or.eq	r12,r12,r1
+	and	r12,r12,r5
+	brne	r12,0,.Learly_end
+#else /* BIG ENDIAN */
+	ror	r5,r4
+	btst_s	r0,2
+	mov_s	r1,31
+	sub3	r7,r1,r0
+	sub	r1,r2,r4
+	bic_s	r1,r1,r2
+	bmsk	r1,r1,r7
+	sub	r12,r6,r4
+	bic	r12,r12,r6
+	bmsk.ne	r12,r12,r7
+	or.eq	r12,r12,r1
+	and	r12,r12,r5
+	brne	r12,0,.Learly_end
+#endif /* ENDIAN */
+
+.Loop:
+	ld_s	r2,[r3,4]
+	ld.a	r6,[r3,8]
+	; stall for load result
+	sub	r1,r2,r4
+	bic_s	r1,r1,r2
+	sub	r12,r6,r4
+	bic	r12,r12,r6
+	or	r12,r12,r1
+	and	r12,r12,r5
+	breq r12,0,.Loop
+.Lend:
+	and.f	r1,r1,r5
+	sub.ne	r3,r3,4
+	mov.eq	r1,r12
+#ifdef __LITTLE_ENDIAN__
+	sub_s	r2,r1,1
+	bic_s	r2,r2,r1
+	norm	r1,r2
+	sub_s	r0,r0,3
+	lsr_s	r1,r1,3
+	sub	    r0,r3,r0
+	j_s.d	[blink]
+	sub	    r0,r0,r1
+#else /* BIG ENDIAN */
+	lsr_s	r1,r1,7
+	mov.eq	r2,r6
+	bic_s	r1,r1,r2
+	norm	r1,r1
+	sub	    r0,r3,r0
+	lsr_s	r1,r1,3
+	j_s.d	[blink]
+	add	    r0,r0,r1
+#endif /* ENDIAN */
+.Learly_end:
+	b.d	.Lend
+	sub_s.ne r1,r1,r1
+END_CFI(strlen)
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile
new file mode 100644
index 0000000000..633a773369
--- /dev/null
+++ b/arch/arc/mm/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+
+obj-y	:= extable.o ioremap.o dma.o fault.o init.o
+obj-y	+= tlb.o tlbex.o cache.o mmap.o
+obj-$(CONFIG_HIGHMEM)	+= highmem.o
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
new file mode 100644
index 0000000000..8aa1231865
--- /dev/null
+++ b/arch/arc/mm/cache.c
@@ -0,0 +1,1248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARC Cache Management
+ *
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/cache.h>
+#include <linux/mmu_context.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/pagemap.h>
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/setup.h>
+
+#ifdef CONFIG_ISA_ARCV2
+#define USE_RGN_FLSH	1
+#endif
+
+static int l2_line_sz;
+static int ioc_exists;
+int slc_enable = 1, ioc_enable = 1;
+unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
+unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
+
+void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
+			       unsigned long sz, const int op, const int full_page);
+
+void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz);
+void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz);
+void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz);
+
+char *arc_cache_mumbojumbo(int c, char *buf, int len)
+{
+	int n = 0;
+	struct cpuinfo_arc_cache *p;
+
+#define PR_CACHE(p, cfg, str)						\
+	if (!(p)->line_len)						\
+		n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");	\
+	else								\
+		n += scnprintf(buf + n, len - n,			\
+			str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",	\
+			(p)->sz_k, (p)->assoc, (p)->line_len,		\
+			(p)->vipt ? "VIPT" : "PIPT",			\
+			(p)->alias ? " aliasing" : "",			\
+			IS_USED_CFG(cfg));
+
+	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
+	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
+
+	p = &cpuinfo_arc700[c].slc;
+	if (p->line_len)
+		n += scnprintf(buf + n, len - n,
+			       "SLC\t\t: %uK, %uB Line%s\n",
+			       p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
+
+	n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
+		       perip_base,
+		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
+
+	return buf;
+}
+
+/*
+ * Read the Cache Build Confuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation done here, simply read/convert the BCRs
+ */
+static void read_decode_cache_bcr_arcv2(int cpu)
+{
+	struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
+	struct bcr_generic sbcr;
+
+	struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+		unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+	} slc_cfg;
+
+	struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+		unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+	} cbcr;
+
+	struct bcr_volatile {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int start:4, limit:4, pad:22, order:1, disable:1;
+#else
+		unsigned int disable:1, order:1, pad:22, limit:4, start:4;
+#endif
+	} vol;
+
+
+	READ_BCR(ARC_REG_SLC_BCR, sbcr);
+	if (sbcr.ver) {
+		READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
+		p_slc->sz_k = 128 << slc_cfg.sz;
+		l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+	}
+
+	READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
+	if (cbcr.c) {
+		ioc_exists = 1;
+
+		/*
+		 * As for today we don't support both IOC and ZONE_HIGHMEM enabled
+		 * simultaneously. This happens because as of today IOC aperture covers
+		 * only ZONE_NORMAL (low mem) and any dma transactions outside this
+		 * region won't be HW coherent.
+		 * If we want to use both IOC and ZONE_HIGHMEM we can use
+		 * bounce_buffer to handle dma transactions to HIGHMEM.
+		 * Also it is possible to modify dma_direct cache ops or increase IOC
+		 * aperture size if we are planning to use HIGHMEM without PAE.
+		 */
+		if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled())
+			ioc_enable = 0;
+	} else {
+		ioc_enable = 0;
+	}
+
+	/* HS 2.0 didn't have AUX_VOL */
+	if (cpuinfo_arc700[cpu].core.family > 0x51) {
+		READ_BCR(AUX_VOL, vol);
+		perip_base = vol.start << 28;
+		/* HS 3.0 has limit and strict-ordering fields */
+		if (cpuinfo_arc700[cpu].core.family > 0x52)
+			perip_end = (vol.limit << 28) - 1;
+	}
+}
+
+void read_decode_cache_bcr(void)
+{
+	struct cpuinfo_arc_cache *p_ic, *p_dc;
+	unsigned int cpu = smp_processor_id();
+	struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+		unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+	} ibcr, dbcr;
+
+	p_ic = &cpuinfo_arc700[cpu].icache;
+	READ_BCR(ARC_REG_IC_BCR, ibcr);
+
+	if (!ibcr.ver)
+		goto dc_chk;
+
+	if (ibcr.ver <= 3) {
+		BUG_ON(ibcr.config != 3);
+		p_ic->assoc = 2;		/* Fixed to 2w set assoc */
+	} else if (ibcr.ver >= 4) {
+		p_ic->assoc = 1 << ibcr.config;	/* 1,2,4,8 */
+	}
+
+	p_ic->line_len = 8 << ibcr.line_len;
+	p_ic->sz_k = 1 << (ibcr.sz - 1);
+	p_ic->vipt = 1;
+	p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+
+dc_chk:
+	p_dc = &cpuinfo_arc700[cpu].dcache;
+	READ_BCR(ARC_REG_DC_BCR, dbcr);
+
+	if (!dbcr.ver)
+		goto slc_chk;
+
+	if (dbcr.ver <= 3) {
+		BUG_ON(dbcr.config != 2);
+		p_dc->assoc = 4;		/* Fixed to 4w set assoc */
+		p_dc->vipt = 1;
+		p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+	} else if (dbcr.ver >= 4) {
+		p_dc->assoc = 1 << dbcr.config;	/* 1,2,4,8 */
+		p_dc->vipt = 0;
+		p_dc->alias = 0;		/* PIPT so can't VIPT alias */
+	}
+
+	p_dc->line_len = 16 << dbcr.line_len;
+	p_dc->sz_k = 1 << (dbcr.sz - 1);
+
+slc_chk:
+	if (is_isa_arcv2())
+                read_decode_cache_bcr_arcv2(cpu);
+}
+
+/*
+ * Line Operation on {I,D}-Cache
+ */
+
+#define OP_INV		0x1
+#define OP_FLUSH	0x2
+#define OP_FLUSH_N_INV	0x3
+#define OP_INV_IC	0x4
+
+/*
+ * Cache Flush programming model
+ *
+ * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
+ * Programming model requires both paddr and vaddr irrespecive of aliasing
+ * considerations:
+ *  - vaddr in {I,D}C_IV?L
+ *  - paddr in {I,D}C_PTAG
+ *
+ * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
+ * Programming model is different for aliasing vs. non-aliasing I$
+ *  - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
+ *  - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
+ *
+ *  - If PAE40 is enabled, independent of aliasing considerations, the higher
+ *    bits needs to be written into PTAG_HI
+ */
+
+static inline
+void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
+			  unsigned long sz, const int op, const int full_page)
+{
+	unsigned int aux_cmd, aux_tag;
+	int num_lines;
+
+	if (op == OP_INV_IC) {
+		aux_cmd = ARC_REG_IC_IVIL;
+		aux_tag = ARC_REG_IC_PTAG;
+	} else {
+		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+		aux_tag = ARC_REG_DC_PTAG;
+	}
+
+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @paddr - aligned to cache line and integral @num_lines.
+	 * This however can be avoided for page sized since:
+	 *  -@paddr will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!full_page) {
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+		vaddr &= CACHE_LINE_MASK;
+	}
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+	/*
+	 * MMUv3, cache ops require paddr in PTAG reg
+	 * if V-P const for loop, PTAG can be written once outside loop
+	 */
+	if (full_page)
+		write_aux_reg(aux_tag, paddr);
+
+	/*
+	 * This is technically for MMU v4, using the MMU v3 programming model
+	 * Special work for HS38 aliasing I-cache configuration with PAE40
+	 *   - upper 8 bits of paddr need to be written into PTAG_HI
+	 *   - (and needs to be written before the lower 32 bits)
+	 * Note that PTAG_HI is hoisted outside the line loop
+	 */
+	if (is_pae40_enabled() && op == OP_INV_IC)
+		write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+
+	while (num_lines-- > 0) {
+		if (!full_page) {
+			write_aux_reg(aux_tag, paddr);
+			paddr += L1_CACHE_BYTES;
+		}
+
+		write_aux_reg(aux_cmd, vaddr);
+		vaddr += L1_CACHE_BYTES;
+	}
+}
+
+#ifndef USE_RGN_FLSH
+
+/*
+ */
+static inline
+void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
+			  unsigned long sz, const int op, const int full_page)
+{
+	unsigned int aux_cmd;
+	int num_lines;
+
+	if (op == OP_INV_IC) {
+		aux_cmd = ARC_REG_IC_IVIL;
+	} else {
+		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+		aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+	}
+
+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @paddr - aligned to cache line and integral @num_lines.
+	 * This however can be avoided for page sized since:
+	 *  -@paddr will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!full_page) {
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+	}
+
+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+	/*
+	 * For HS38 PAE40 configuration
+	 *   - upper 8 bits of paddr need to be written into PTAG_HI
+	 *   - (and needs to be written before the lower 32 bits)
+	 */
+	if (is_pae40_enabled()) {
+		if (op == OP_INV_IC)
+			/*
+			 * Non aliasing I-cache in HS38,
+			 * aliasing I-cache handled in __cache_line_loop_v3()
+			 */
+			write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+		else
+			write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
+	}
+
+	while (num_lines-- > 0) {
+		write_aux_reg(aux_cmd, paddr);
+		paddr += L1_CACHE_BYTES;
+	}
+}
+
+#else
+
+/*
+ * optimized flush operation which takes a region as opposed to iterating per line
+ */
+static inline
+void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
+			  unsigned long sz, const int op, const int full_page)
+{
+	unsigned int s, e;
+
+	/* Only for Non aliasing I-cache in HS38 */
+	if (op == OP_INV_IC) {
+		s = ARC_REG_IC_IVIR;
+		e = ARC_REG_IC_ENDR;
+	} else {
+		s = ARC_REG_DC_STARTR;
+		e = ARC_REG_DC_ENDR;
+	}
+
+	if (!full_page) {
+		/* for any leading gap between @paddr and start of cache line */
+		sz += paddr & ~CACHE_LINE_MASK;
+		paddr &= CACHE_LINE_MASK;
+
+		/*
+		 *  account for any trailing gap to end of cache line
+		 *  this is equivalent to DIV_ROUND_UP() in line ops above
+		 */
+		sz += L1_CACHE_BYTES - 1;
+	}
+
+	if (is_pae40_enabled()) {
+		/* TBD: check if crossing 4TB boundary */
+		if (op == OP_INV_IC)
+			write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+		else
+			write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
+	}
+
+	/* ENDR needs to be set ahead of START */
+	write_aux_reg(e, paddr + sz);	/* ENDR is exclusive */
+	write_aux_reg(s, paddr);
+
+	/* caller waits on DC_CTRL.FS */
+}
+
+#endif
+
+#ifdef CONFIG_ARC_MMU_V3
+#define __cache_line_loop	__cache_line_loop_v3
+#else
+#define __cache_line_loop	__cache_line_loop_v4
+#endif
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+
+/***************************************************************
+ * Machine specific helpers for Entire D-Cache or Per Line ops
+ */
+
+#ifndef USE_RGN_FLSH
+/*
+ * this version avoids extra read/write of DC_CTRL for flush or invalid ops
+ * in the non region flush regime (such as for ARCompact)
+ */
+static inline void __before_dc_op(const int op)
+{
+	if (op == OP_FLUSH_N_INV) {
+		/* Dcache provides 2 cmd: FLUSH or INV
+		 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+		 * flush-n-inv is achieved by INV cmd but with IM=1
+		 * So toggle INV sub-mode depending on op request and default
+		 */
+		const unsigned int ctl = ARC_REG_DC_CTRL;
+		write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH);
+	}
+}
+
+#else
+
+static inline void __before_dc_op(const int op)
+{
+	const unsigned int ctl = ARC_REG_DC_CTRL;
+	unsigned int val = read_aux_reg(ctl);
+
+	if (op == OP_FLUSH_N_INV) {
+		val |= DC_CTRL_INV_MODE_FLUSH;
+	}
+
+	if (op != OP_INV_IC) {
+		/*
+		 * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1
+		 * combined Flush-n-invalidate uses DC_CTRL.IM = 1 set above
+		 */
+		val &= ~DC_CTRL_RGN_OP_MSK;
+		if (op & OP_INV)
+			val |= DC_CTRL_RGN_OP_INV;
+	}
+	write_aux_reg(ctl, val);
+}
+
+#endif
+
+
+static inline void __after_dc_op(const int op)
+{
+	if (op & OP_FLUSH) {
+		const unsigned int ctl = ARC_REG_DC_CTRL;
+		unsigned int reg;
+
+		/* flush / flush-n-inv both wait */
+		while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS)
+			;
+
+		/* Switch back to default Invalidate mode */
+		if (op == OP_FLUSH_N_INV)
+			write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH);
+	}
+}
+
+/*
+ * Operation on Entire D-Cache
+ * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * Note that constant propagation ensures all the checks are gone
+ * in generated code
+ */
+static inline void __dc_entire_op(const int op)
+{
+	int aux;
+
+	__before_dc_op(op);
+
+	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
+		aux = ARC_REG_DC_IVDC;
+	else
+		aux = ARC_REG_DC_FLSH;
+
+	write_aux_reg(aux, 0x1);
+
+	__after_dc_op(op);
+}
+
+static inline void __dc_disable(void)
+{
+	const int r = ARC_REG_DC_CTRL;
+
+	__dc_entire_op(OP_FLUSH_N_INV);
+	write_aux_reg(r, read_aux_reg(r) | DC_CTRL_DIS);
+}
+
+static void __dc_enable(void)
+{
+	const int r = ARC_REG_DC_CTRL;
+
+	write_aux_reg(r, read_aux_reg(r) & ~DC_CTRL_DIS);
+}
+
+/* For kernel mappings cache operation: index is same as paddr */
+#define __dc_line_op_k(p, sz, op)	__dc_line_op(p, p, sz, op)
+
+/*
+ * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback)
+ */
+static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr,
+				unsigned long sz, const int op)
+{
+	const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	__before_dc_op(op);
+
+	__cache_line_loop(paddr, vaddr, sz, op, full_page);
+
+	__after_dc_op(op);
+
+	local_irq_restore(flags);
+}
+
+#else
+
+#define __dc_entire_op(op)
+#define __dc_disable()
+#define __dc_enable()
+#define __dc_line_op(paddr, vaddr, sz, op)
+#define __dc_line_op_k(paddr, sz, op)
+
+#endif /* CONFIG_ARC_HAS_DCACHE */
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+
+static inline void __ic_entire_inv(void)
+{
+	write_aux_reg(ARC_REG_IC_IVIC, 1);
+	read_aux_reg(ARC_REG_IC_CTRL);	/* blocks */
+}
+
+static inline void
+__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr,
+			  unsigned long sz)
+{
+	const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	(*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC, full_page);
+	local_irq_restore(flags);
+}
+
+#ifndef CONFIG_SMP
+
+#define __ic_line_inv_vaddr(p, v, s)	__ic_line_inv_vaddr_local(p, v, s)
+
+#else
+
+struct ic_inv_args {
+	phys_addr_t paddr, vaddr;
+	int sz;
+};
+
+static void __ic_line_inv_vaddr_helper(void *info)
+{
+        struct ic_inv_args *ic_inv = info;
+
+        __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
+}
+
+static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
+				unsigned long sz)
+{
+	struct ic_inv_args ic_inv = {
+		.paddr = paddr,
+		.vaddr = vaddr,
+		.sz    = sz
+	};
+
+	on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1);
+}
+
+#endif	/* CONFIG_SMP */
+
+#else	/* !CONFIG_ARC_HAS_ICACHE */
+
+#define __ic_entire_inv()
+#define __ic_line_inv_vaddr(pstart, vstart, sz)
+
+#endif /* CONFIG_ARC_HAS_ICACHE */
+
+noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+	/*
+	 * SLC is shared between all cores and concurrent aux operations from
+	 * multiple cores need to be serialized using a spinlock
+	 * A concurrent operation can be silently ignored and/or the old/new
+	 * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop
+	 * below)
+	 */
+	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
+	unsigned int ctrl;
+	phys_addr_t end;
+
+	spin_lock_irqsave(&lock, flags);
+
+	/*
+	 * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
+	 *  - b'000 (default) is Flush,
+	 *  - b'001 is Invalidate if CTRL.IM == 0
+	 *  - b'001 is Flush-n-Invalidate if CTRL.IM == 1
+	 */
+	ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+	/* Don't rely on default value of IM bit */
+	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
+		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
+	else
+		ctrl |= SLC_CTRL_IM;
+
+	if (op & OP_INV)
+		ctrl |= SLC_CTRL_RGN_OP_INV;	/* Inv or flush-n-inv */
+	else
+		ctrl &= ~SLC_CTRL_RGN_OP_INV;
+
+	write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+	/*
+	 * Lower bits are ignored, no need to clip
+	 * END needs to be setup before START (latter triggers the operation)
+	 * END can't be same as START, so add (l2_line_sz - 1) to sz
+	 */
+	end = paddr + sz + l2_line_sz - 1;
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_SLC_RGN_END1, upper_32_bits(end));
+
+	write_aux_reg(ARC_REG_SLC_RGN_END, lower_32_bits(end));
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_SLC_RGN_START1, upper_32_bits(paddr));
+
+	write_aux_reg(ARC_REG_SLC_RGN_START, lower_32_bits(paddr));
+
+	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+	read_aux_reg(ARC_REG_SLC_CTRL);
+
+	while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+	spin_unlock_irqrestore(&lock, flags);
+#endif
+}
+
+noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+	/*
+	 * SLC is shared between all cores and concurrent aux operations from
+	 * multiple cores need to be serialized using a spinlock
+	 * A concurrent operation can be silently ignored and/or the old/new
+	 * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop
+	 * below)
+	 */
+	static DEFINE_SPINLOCK(lock);
+
+	const unsigned long SLC_LINE_MASK = ~(l2_line_sz - 1);
+	unsigned int ctrl, cmd;
+	unsigned long flags;
+	int num_lines;
+
+	spin_lock_irqsave(&lock, flags);
+
+	ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+	/* Don't rely on default value of IM bit */
+	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
+		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
+	else
+		ctrl |= SLC_CTRL_IM;
+
+	write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+	cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL;
+
+	sz += paddr & ~SLC_LINE_MASK;
+	paddr &= SLC_LINE_MASK;
+
+	num_lines = DIV_ROUND_UP(sz, l2_line_sz);
+
+	while (num_lines-- > 0) {
+		write_aux_reg(cmd, paddr);
+		paddr += l2_line_sz;
+	}
+
+	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+	read_aux_reg(ARC_REG_SLC_CTRL);
+
+	while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+	spin_unlock_irqrestore(&lock, flags);
+#endif
+}
+
+#define slc_op(paddr, sz, op)	slc_op_rgn(paddr, sz, op)
+
+noinline static void slc_entire_op(const int op)
+{
+	unsigned int ctrl, r = ARC_REG_SLC_CTRL;
+
+	ctrl = read_aux_reg(r);
+
+	if (!(op & OP_FLUSH))		/* i.e. OP_INV */
+		ctrl &= ~SLC_CTRL_IM;	/* clear IM: Disable flush before Inv */
+	else
+		ctrl |= SLC_CTRL_IM;
+
+	write_aux_reg(r, ctrl);
+
+	if (op & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
+		write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1);
+	else
+		write_aux_reg(ARC_REG_SLC_FLUSH, 0x1);
+
+	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+	read_aux_reg(r);
+
+	/* Important to wait for flush to complete */
+	while (read_aux_reg(r) & SLC_CTRL_BUSY);
+}
+
+static inline void arc_slc_disable(void)
+{
+	const int r = ARC_REG_SLC_CTRL;
+
+	slc_entire_op(OP_FLUSH_N_INV);
+	write_aux_reg(r, read_aux_reg(r) | SLC_CTRL_DIS);
+}
+
+static inline void arc_slc_enable(void)
+{
+	const int r = ARC_REG_SLC_CTRL;
+
+	write_aux_reg(r, read_aux_reg(r) & ~SLC_CTRL_DIS);
+}
+
+/***********************************************************
+ * Exported APIs
+ */
+
+/*
+ * Handle cache congruency of kernel and userspace mappings of page when kernel
+ * writes-to/reads-from
+ *
+ * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
+ *  -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
+ *  -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
+ *  -In SMP, if hardware caches are coherent
+ *
+ * There's a corollary case, where kernel READs from a userspace mapped page.
+ * If the U-mapping is not congruent to to K-mapping, former needs flushing.
+ */
+void flush_dcache_page(struct page *page)
+{
+	struct address_space *mapping;
+
+	if (!cache_is_vipt_aliasing()) {
+		clear_bit(PG_dc_clean, &page->flags);
+		return;
+	}
+
+	/* don't handle anon pages here */
+	mapping = page_mapping_file(page);
+	if (!mapping)
+		return;
+
+	/*
+	 * pagecache page, file not yet mapped to userspace
+	 * Make a note that K-mapping is dirty
+	 */
+	if (!mapping_mapped(mapping)) {
+		clear_bit(PG_dc_clean, &page->flags);
+	} else if (page_mapcount(page)) {
+
+		/* kernel reading from page with U-mapping */
+		phys_addr_t paddr = (unsigned long)page_address(page);
+		unsigned long vaddr = page->index << PAGE_SHIFT;
+
+		if (addr_not_cache_congruent(paddr, vaddr))
+			__flush_dcache_page(paddr, vaddr);
+	}
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+/*
+ * DMA ops for systems with L1 cache only
+ * Make memory coherent with L1 cache by flushing/invalidating L1 lines
+ */
+static void __dma_cache_wback_inv_l1(phys_addr_t start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+}
+
+static void __dma_cache_inv_l1(phys_addr_t start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_INV);
+}
+
+static void __dma_cache_wback_l1(phys_addr_t start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_FLUSH);
+}
+
+/*
+ * DMA ops for systems with both L1 and L2 caches, but without IOC
+ * Both L1 and L2 lines need to be explicitly flushed/invalidated
+ */
+static void __dma_cache_wback_inv_slc(phys_addr_t start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+	slc_op(start, sz, OP_FLUSH_N_INV);
+}
+
+static void __dma_cache_inv_slc(phys_addr_t start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_INV);
+	slc_op(start, sz, OP_INV);
+}
+
+static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz)
+{
+	__dc_line_op_k(start, sz, OP_FLUSH);
+	slc_op(start, sz, OP_FLUSH);
+}
+
+/*
+ * Exported DMA API
+ */
+void dma_cache_wback_inv(phys_addr_t start, unsigned long sz)
+{
+	__dma_cache_wback_inv(start, sz);
+}
+EXPORT_SYMBOL(dma_cache_wback_inv);
+
+void dma_cache_inv(phys_addr_t start, unsigned long sz)
+{
+	__dma_cache_inv(start, sz);
+}
+EXPORT_SYMBOL(dma_cache_inv);
+
+void dma_cache_wback(phys_addr_t start, unsigned long sz)
+{
+	__dma_cache_wback(start, sz);
+}
+EXPORT_SYMBOL(dma_cache_wback);
+
+/*
+ * This is API for making I/D Caches consistent when modifying
+ * kernel code (loadable modules, kprobes, kgdb...)
+ * This is called on insmod, with kernel virtual address for CODE of
+ * the module. ARC cache maintenance ops require PHY address thus we
+ * need to convert vmalloc addr to PHY addr
+ */
+void flush_icache_range(unsigned long kstart, unsigned long kend)
+{
+	unsigned int tot_sz;
+
+	WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
+
+	/* Shortcut for bigger flush ranges.
+	 * Here we don't care if this was kernel virtual or phy addr
+	 */
+	tot_sz = kend - kstart;
+	if (tot_sz > PAGE_SIZE) {
+		flush_cache_all();
+		return;
+	}
+
+	/* Case: Kernel Phy addr (0x8000_0000 onwards) */
+	if (likely(kstart > PAGE_OFFSET)) {
+		/*
+		 * The 2nd arg despite being paddr will be used to index icache
+		 * This is OK since no alternate virtual mappings will exist
+		 * given the callers for this case: kprobe/kgdb in built-in
+		 * kernel code only.
+		 */
+		__sync_icache_dcache(kstart, kstart, kend - kstart);
+		return;
+	}
+
+	/*
+	 * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
+	 * (1) ARC Cache Maintenance ops only take Phy addr, hence special
+	 *     handling of kernel vaddr.
+	 *
+	 * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
+	 *     it still needs to handle  a 2 page scenario, where the range
+	 *     straddles across 2 virtual pages and hence need for loop
+	 */
+	while (tot_sz > 0) {
+		unsigned int off, sz;
+		unsigned long phy, pfn;
+
+		off = kstart % PAGE_SIZE;
+		pfn = vmalloc_to_pfn((void *)kstart);
+		phy = (pfn << PAGE_SHIFT) + off;
+		sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
+		__sync_icache_dcache(phy, kstart, sz);
+		kstart += sz;
+		tot_sz -= sz;
+	}
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+/*
+ * General purpose helper to make I and D cache lines consistent.
+ * @paddr is phy addr of region
+ * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
+ *    However in one instance, when called by kprobe (for a breakpt in
+ *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
+ *    use a paddr to index the cache (despite VIPT). This is fine since since a
+ *    builtin kernel page will not have any virtual mappings.
+ *    kprobe on loadable module will be kernel vaddr.
+ */
+void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len)
+{
+	__dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
+	__ic_line_inv_vaddr(paddr, vaddr, len);
+}
+
+/* wrapper to compile time eliminate alignment checks in flush loop */
+void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr)
+{
+	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
+}
+
+/*
+ * wrapper to clearout kernel or userspace mappings of a page
+ * For kernel mappings @vaddr == @paddr
+ */
+void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr)
+{
+	__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
+}
+
+noinline void flush_cache_all(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	__ic_entire_inv();
+	__dc_entire_op(OP_FLUSH_N_INV);
+
+	local_irq_restore(flags);
+
+}
+
+#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	flush_cache_all();
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
+		      unsigned long pfn)
+{
+	phys_addr_t paddr = pfn << PAGE_SHIFT;
+
+	u_vaddr &= PAGE_MASK;
+
+	__flush_dcache_page(paddr, u_vaddr);
+
+	if (vma->vm_flags & VM_EXEC)
+		__inv_icache_page(paddr, u_vaddr);
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+	flush_cache_all();
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+		     unsigned long u_vaddr)
+{
+	/* TBD: do we really need to clear the kernel mapping */
+	__flush_dcache_page((phys_addr_t)page_address(page), u_vaddr);
+	__flush_dcache_page((phys_addr_t)page_address(page),
+			    (phys_addr_t)page_address(page));
+
+}
+
+#endif
+
+void copy_user_highpage(struct page *to, struct page *from,
+	unsigned long u_vaddr, struct vm_area_struct *vma)
+{
+	void *kfrom = kmap_atomic(from);
+	void *kto = kmap_atomic(to);
+	int clean_src_k_mappings = 0;
+
+	/*
+	 * If SRC page was already mapped in userspace AND it's U-mapping is
+	 * not congruent with K-mapping, sync former to physical page so that
+	 * K-mapping in memcpy below, sees the right data
+	 *
+	 * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
+	 * equally valid for SRC page as well
+	 *
+	 * For !VIPT cache, all of this gets compiled out as
+	 * addr_not_cache_congruent() is 0
+	 */
+	if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
+		__flush_dcache_page((unsigned long)kfrom, u_vaddr);
+		clean_src_k_mappings = 1;
+	}
+
+	copy_page(kto, kfrom);
+
+	/*
+	 * Mark DST page K-mapping as dirty for a later finalization by
+	 * update_mmu_cache(). Although the finalization could have been done
+	 * here as well (given that both vaddr/paddr are available).
+	 * But update_mmu_cache() already has code to do that for other
+	 * non copied user pages (e.g. read faults which wire in pagecache page
+	 * directly).
+	 */
+	clear_bit(PG_dc_clean, &to->flags);
+
+	/*
+	 * if SRC was already usermapped and non-congruent to kernel mapping
+	 * sync the kernel mapping back to physical page
+	 */
+	if (clean_src_k_mappings) {
+		__flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom);
+		set_bit(PG_dc_clean, &from->flags);
+	} else {
+		clear_bit(PG_dc_clean, &from->flags);
+	}
+
+	kunmap_atomic(kto);
+	kunmap_atomic(kfrom);
+}
+
+void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
+{
+	clear_page(to);
+	clear_bit(PG_dc_clean, &page->flags);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+/**********************************************************************
+ * Explicit Cache flush request from user space via syscall
+ * Needed for JITs which generate code on the fly
+ */
+SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
+{
+	/* TBD: optimize this */
+	flush_cache_all();
+	return 0;
+}
+
+/*
+ * IO-Coherency (IOC) setup rules:
+ *
+ * 1. Needs to be at system level, so only once by Master core
+ *    Non-Masters need not be accessing caches at that time
+ *    - They are either HALT_ON_RESET and kick started much later or
+ *    - if run on reset, need to ensure that arc_platform_smp_wait_to_boot()
+ *      doesn't perturb caches or coherency unit
+ *
+ * 2. caches (L1 and SLC) need to be purged (flush+inv) before setting up IOC,
+ *    otherwise any straggler data might behave strangely post IOC enabling
+ *
+ * 3. All Caches need to be disabled when setting up IOC to elide any in-flight
+ *    Coherency transactions
+ */
+noinline void __init arc_ioc_setup(void)
+{
+	unsigned int ioc_base, mem_sz;
+
+	/*
+	 * If IOC was already enabled (due to bootloader) it technically needs to
+	 * be reconfigured with aperture base,size corresponding to Linux memory map
+	 * which will certainly be different than uboot's. But disabling and
+	 * reenabling IOC when DMA might be potentially active is tricky business.
+	 * To avoid random memory issues later, just panic here and ask user to
+	 * upgrade bootloader to one which doesn't enable IOC
+	 */
+	if (read_aux_reg(ARC_REG_IO_COH_ENABLE) & ARC_IO_COH_ENABLE_BIT)
+		panic("IOC already enabled, please upgrade bootloader!\n");
+
+	if (!ioc_enable)
+		return;
+
+	/* Flush + invalidate + disable L1 dcache */
+	__dc_disable();
+
+	/* Flush + invalidate SLC */
+	if (read_aux_reg(ARC_REG_SLC_BCR))
+		slc_entire_op(OP_FLUSH_N_INV);
+
+	/*
+	 * currently IOC Aperture covers entire DDR
+	 * TBD: fix for PGU + 1GB of low mem
+	 * TBD: fix for PAE
+	 */
+	mem_sz = arc_get_mem_sz();
+
+	if (!is_power_of_2(mem_sz) || mem_sz < 4096)
+		panic("IOC Aperture size must be power of 2 larger than 4KB");
+
+	/*
+	 * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
+	 * so setting 0x11 implies 512MB, 0x12 implies 1GB...
+	 */
+	write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, order_base_2(mem_sz >> 10) - 2);
+
+	/* for now assume kernel base is start of IOC aperture */
+	ioc_base = CONFIG_LINUX_RAM_BASE;
+
+	if (ioc_base % mem_sz != 0)
+		panic("IOC Aperture start must be aligned to the size of the aperture");
+
+	write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12);
+	write_aux_reg(ARC_REG_IO_COH_PARTIAL, ARC_IO_COH_PARTIAL_BIT);
+	write_aux_reg(ARC_REG_IO_COH_ENABLE, ARC_IO_COH_ENABLE_BIT);
+
+	/* Re-enable L1 dcache */
+	__dc_enable();
+}
+
+/*
+ * Cache related boot time checks/setups only needed on master CPU:
+ *  - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES)
+ *    Assume SMP only, so all cores will have same cache config. A check on
+ *    one core suffices for all
+ *  - IOC setup / dma callbacks only need to be done once
+ */
+void __init arc_cache_init_master(void)
+{
+	unsigned int __maybe_unused cpu = smp_processor_id();
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
+		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+
+		if (!ic->line_len)
+			panic("cache support enabled but non-existent cache\n");
+
+		if (ic->line_len != L1_CACHE_BYTES)
+			panic("ICache line [%d] != kernel Config [%d]",
+			      ic->line_len, L1_CACHE_BYTES);
+
+		/*
+		 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
+		 * pair to provide vaddr/paddr respectively, just as in MMU v3
+		 */
+		if (is_isa_arcv2() && ic->alias)
+			_cache_line_loop_ic_fn = __cache_line_loop_v3;
+		else
+			_cache_line_loop_ic_fn = __cache_line_loop;
+	}
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+		struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+
+		if (!dc->line_len)
+			panic("cache support enabled but non-existent cache\n");
+
+		if (dc->line_len != L1_CACHE_BYTES)
+			panic("DCache line [%d] != kernel Config [%d]",
+			      dc->line_len, L1_CACHE_BYTES);
+
+		/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
+		if (is_isa_arcompact()) {
+			int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+			int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE);
+
+			if (dc->alias) {
+				if (!handled)
+					panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+				if (CACHE_COLORS_NUM != num_colors)
+					panic("CACHE_COLORS_NUM not optimized for config\n");
+			} else if (!dc->alias && handled) {
+				panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+			}
+		}
+	}
+
+	/*
+	 * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger
+	 * or equal to any cache line length.
+	 */
+	BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES,
+			 "SMP_CACHE_BYTES must be >= any cache line length");
+	if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES))
+		panic("L2 Cache line [%d] > kernel Config [%d]\n",
+		      l2_line_sz, SMP_CACHE_BYTES);
+
+	/* Note that SLC disable not formally supported till HS 3.0 */
+	if (is_isa_arcv2() && l2_line_sz && !slc_enable)
+		arc_slc_disable();
+
+	if (is_isa_arcv2() && ioc_exists)
+		arc_ioc_setup();
+
+	if (is_isa_arcv2() && l2_line_sz && slc_enable) {
+		__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
+		__dma_cache_inv = __dma_cache_inv_slc;
+		__dma_cache_wback = __dma_cache_wback_slc;
+	} else {
+		__dma_cache_wback_inv = __dma_cache_wback_inv_l1;
+		__dma_cache_inv = __dma_cache_inv_l1;
+		__dma_cache_wback = __dma_cache_wback_l1;
+	}
+	/*
+	 * In case of IOC (say IOC+SLC case), pointers above could still be set
+	 * but end up not being relevant as the first function in chain is not
+	 * called at all for devices using coherent DMA.
+	 *     arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*()
+	 */
+}
+
+void __ref arc_cache_init(void)
+{
+	unsigned int __maybe_unused cpu = smp_processor_id();
+	char str[256];
+
+	pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+	if (!cpu)
+		arc_cache_init_master();
+
+	/*
+	 * In PAE regime, TLB and cache maintenance ops take wider addresses
+	 * And even if PAE is not enabled in kernel, the upper 32-bits still need
+	 * to be zeroed to keep the ops sane.
+	 * As an optimization for more common !PAE enabled case, zero them out
+	 * once at init, rather than checking/setting to 0 for every runtime op
+	 */
+	if (is_isa_arcv2() && pae40_exist_but_not_enab()) {
+
+		if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE))
+			write_aux_reg(ARC_REG_IC_PTAG_HI, 0);
+
+		if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE))
+			write_aux_reg(ARC_REG_DC_PTAG_HI, 0);
+
+		if (l2_line_sz) {
+			write_aux_reg(ARC_REG_SLC_RGN_END1, 0);
+			write_aux_reg(ARC_REG_SLC_RGN_START1, 0);
+		}
+	}
+}
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
new file mode 100644
index 0000000000..2a7fbbb83b
--- /dev/null
+++ b/arch/arc/mm/dma.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/dma-map-ops.h>
+#include <asm/cache.h>
+#include <asm/cacheflush.h>
+
+/*
+ * ARCH specific callbacks for generic noncoherent DMA ops
+ *  - hardware IOC not available (or "dma-coherent" not set for device in DT)
+ *  - But still handle both coherent and non-coherent requests from caller
+ *
+ * For DMA coherent hardware (IOC) generic code suffices
+ */
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+	/*
+	 * Evict any existing L1 and/or L2 lines for the backing page
+	 * in case it was used earlier as a normal "cached" page.
+	 * Yeah this bit us - STAR 9000898266
+	 *
+	 * Although core does call flush_cache_vmap(), it gets kvaddr hence
+	 * can't be used to efficiently flush L1 and/or L2 which need paddr
+	 * Currently flush_cache_vmap nukes the L1 cache completely which
+	 * will be optimized as a separate commit
+	 */
+	dma_cache_wback_inv(page_to_phys(page), size);
+}
+
+/*
+ * Cache operations depending on function and direction argument, inspired by
+ * https://lore.kernel.org/lkml/20180518175004.GF17671@n2100.armlinux.org.uk
+ * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
+ * dma-mapping: provide a generic dma-noncoherent implementation)"
+ *
+ *          |   map          ==  for_device     |   unmap     ==  for_cpu
+ *          |----------------------------------------------------------------
+ * TO_DEV   |   writeback        writeback      |   none          none
+ * FROM_DEV |   invalidate       invalidate     |   invalidate*   invalidate*
+ * BIDIR    |   writeback+inv    writeback+inv  |   invalidate    invalidate
+ *
+ *     [*] needed for CPU speculative prefetches
+ *
+ * NOTE: we don't check the validity of direction argument as it is done in
+ * upper layer functions (in include/linux/dma-mapping.h)
+ */
+
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		dma_cache_wback(paddr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		dma_cache_inv(paddr, size);
+		break;
+
+	case DMA_BIDIRECTIONAL:
+		dma_cache_wback_inv(paddr, size);
+		break;
+
+	default:
+		break;
+	}
+}
+
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		break;
+
+	/* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
+	case DMA_FROM_DEVICE:
+	case DMA_BIDIRECTIONAL:
+		dma_cache_inv(paddr, size);
+		break;
+
+	default:
+		break;
+	}
+}
+
+/*
+ * Plug in direct dma map ops.
+ */
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent)
+{
+	/*
+	 * IOC hardware snoops all DMA traffic keeping the caches consistent
+	 * with memory - eliding need for any explicit cache maintenance of
+	 * DMA buffers.
+	 */
+	if (is_isa_arcv2() && ioc_enable && coherent)
+		dev->dma_coherent = true;
+
+	dev_info(dev, "use %scoherent DMA ops\n",
+		 dev->dma_coherent ? "" : "non");
+}
diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c
new file mode 100644
index 0000000000..4e14c4244e
--- /dev/null
+++ b/arch/arc/mm/extable.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Borrowed heavily from MIPS
+ */
+
+#include <linux/export.h>
+#include <linux/extable.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(instruction_pointer(regs));
+	if (fixup) {
+		regs->ret = fixup->fixup;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+
+unsigned long arc_clear_user_noinline(void __user *to,
+		unsigned long n)
+{
+	return __arc_clear_user(to, n);
+}
+EXPORT_SYMBOL(arc_clear_user_noinline);
+
+#endif
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
new file mode 100644
index 0000000000..dad27e4d69
--- /dev/null
+++ b/arch/arc/mm/fault.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Page Fault Handling for ARC (TLB Miss / ProtV)
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/signal.h>
+#include <linux/interrupt.h>
+#include <linux/sched/signal.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/uaccess.h>
+#include <linux/kdebug.h>
+#include <linux/perf_event.h>
+#include <linux/mm_types.h>
+#include <asm/mmu.h>
+
+/*
+ * kernel virtual address is required to implement vmalloc/pkmap/fixmap
+ * Refer to asm/processor.h for System Memory Map
+ *
+ * It simply copies the PMD entry (pointer to 2nd level page table or hugepage)
+ * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared
+ */
+noinline static int handle_kernel_vaddr_fault(unsigned long address)
+{
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 */
+	pgd_t *pgd, *pgd_k;
+	p4d_t *p4d, *p4d_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd = pgd_offset(current->active_mm, address);
+	pgd_k = pgd_offset_k(address);
+
+	if (pgd_none (*pgd_k))
+		goto bad_area;
+	if (!pgd_present(*pgd))
+		set_pgd(pgd, *pgd_k);
+
+	p4d = p4d_offset(pgd, address);
+	p4d_k = p4d_offset(pgd_k, address);
+	if (p4d_none(*p4d_k))
+		goto bad_area;
+	if (!p4d_present(*p4d))
+		set_p4d(p4d, *p4d_k);
+
+	pud = pud_offset(p4d, address);
+	pud_k = pud_offset(p4d_k, address);
+	if (pud_none(*pud_k))
+		goto bad_area;
+	if (!pud_present(*pud))
+		set_pud(pud, *pud_k);
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (pmd_none(*pmd_k))
+		goto bad_area;
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
+
+	/* XXX: create the TLB entry here */
+	return 0;
+
+bad_area:
+	return 1;
+}
+
+void do_page_fault(unsigned long address, struct pt_regs *regs)
+{
+	struct vm_area_struct *vma = NULL;
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+	int sig, si_code = SEGV_MAPERR;
+	unsigned int write = 0, exec = 0, mask;
+	vm_fault_t fault = VM_FAULT_SIGSEGV;	/* handle_mm_fault() output */
+	unsigned int flags;			/* handle_mm_fault() input */
+
+	/*
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 */
+	if (address >= VMALLOC_START && !user_mode(regs)) {
+		if (unlikely(handle_kernel_vaddr_fault(address)))
+			goto no_context;
+		else
+			return;
+	}
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (faulthandler_disabled() || !mm)
+		goto no_context;
+
+	if (regs->ecr_cause & ECR_C_PROTV_STORE)	/* ST/EX */
+		write = 1;
+	else if ((regs->ecr_vec == ECR_V_PROTV) &&
+	         (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
+		exec = 1;
+
+	flags = FAULT_FLAG_DEFAULT;
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (write)
+		flags |= FAULT_FLAG_WRITE;
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+retry:
+	mmap_read_lock(mm);
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (unlikely(address < vma->vm_start)) {
+		if (!(vma->vm_flags & VM_GROWSDOWN) || expand_stack(vma, address))
+			goto bad_area;
+	}
+
+	/*
+	 * vm_area is good, now check permissions for this memory access
+	 */
+	mask = VM_READ;
+	if (write)
+		mask = VM_WRITE;
+	if (exec)
+		mask = VM_EXEC;
+
+	if (!(vma->vm_flags & mask)) {
+		si_code = SEGV_ACCERR;
+		goto bad_area;
+	}
+
+	fault = handle_mm_fault(vma, address, flags, regs);
+
+	/* Quick path to respond to signals */
+	if (fault_signal_pending(fault, regs)) {
+		if (!user_mode(regs))
+			goto no_context;
+		return;
+	}
+
+	/*
+	 * Fault retry nuances, mmap_lock already relinquished by core mm
+	 */
+	if (unlikely(fault & VM_FAULT_RETRY)) {
+		flags |= FAULT_FLAG_TRIED;
+		goto retry;
+	}
+
+bad_area:
+	mmap_read_unlock(mm);
+
+	/*
+	 * Major/minor page fault accounting
+	 * (in case of retry we only land here once)
+	 */
+	if (likely(!(fault & VM_FAULT_ERROR)))
+		/* Normal return path: fault Handled Gracefully */
+		return;
+
+	if (!user_mode(regs))
+		goto no_context;
+
+	if (fault & VM_FAULT_OOM) {
+		pagefault_out_of_memory();
+		return;
+	}
+
+	if (fault & VM_FAULT_SIGBUS) {
+		sig = SIGBUS;
+		si_code = BUS_ADRERR;
+	}
+	else {
+		sig = SIGSEGV;
+	}
+
+	tsk->thread.fault_address = address;
+	force_sig_fault(sig, si_code, (void __user *)address);
+	return;
+
+no_context:
+	if (fixup_exception(regs))
+		return;
+
+	die("Oops", regs, address);
+}
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
new file mode 100644
index 0000000000..c79912a6b1
--- /dev/null
+++ b/arch/arc/mm/highmem.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/highmem.h>
+#include <linux/pgtable.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+/*
+ * HIGHMEM API:
+ *
+ * kmap() API provides sleep semantics hence referred to as "permanent maps"
+ * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor
+ * for book-keeping
+ *
+ * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides
+ * shortlived ala "temporary mappings" which historically were implemented as
+ * fixmaps (compile time addr etc). Their book-keeping is done per cpu.
+ *
+ *	Both these facts combined (preemption disabled and per-cpu allocation)
+ *	means the total number of concurrent fixmaps will be limited to max
+ *	such allocations in a single control path. Thus KM_TYPE_NR (another
+ *	historic relic) is a small'ish number which caps max percpu fixmaps
+ *
+ * ARC HIGHMEM Details
+ *
+ * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module)
+ *   is now shared between vmalloc and kmap (non overlapping though)
+ *
+ * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD
+ *   This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means
+ *   2M of kvaddr space for typical config (8K page and 11:8:13 traversal split)
+ *
+ * - The fixed KMAP slots for kmap_local/atomic() require KM_MAX_IDX slots per
+ *   CPU. So the number of CPUs sharing a single PTE page is limited.
+ *
+ * - pkmap being preemptible, in theory could do with more than 256 concurrent
+ *   mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse
+ *   the PGD and only works with a single page table @pkmap_page_table, hence
+ *   sets the limit
+ */
+
+extern pte_t * pkmap_page_table;
+
+static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
+{
+	pmd_t *pmd_k = pmd_off_k(kvaddr);
+	pte_t *pte_k;
+
+	pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+	if (!pte_k)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, PAGE_SIZE, PAGE_SIZE);
+
+	pmd_populate_kernel(&init_mm, pmd_k, pte_k);
+	return pte_k;
+}
+
+void __init kmap_init(void)
+{
+	/* Due to recursive include hell, we can't do this in processor.h */
+	BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE));
+	BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE);
+	BUILD_BUG_ON(FIX_KMAP_SLOTS > PTRS_PER_PTE);
+
+	pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE);
+	alloc_kmap_pgtable(FIXMAP_BASE);
+}
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
new file mode 100644
index 0000000000..ce4e939a7f
--- /dev/null
+++ b/arch/arc/mm/init.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/initrd.h>
+#endif
+#include <linux/of_fdt.h>
+#include <linux/swap.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/arcregs.h>
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
+char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+EXPORT_SYMBOL(empty_zero_page);
+
+static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE;
+static unsigned long low_mem_sz;
+
+#ifdef CONFIG_HIGHMEM
+static unsigned long min_high_pfn, max_high_pfn;
+static phys_addr_t high_mem_start;
+static phys_addr_t high_mem_sz;
+unsigned long arch_pfn_offset;
+EXPORT_SYMBOL(arch_pfn_offset);
+#endif
+
+long __init arc_get_mem_sz(void)
+{
+	return low_mem_sz;
+}
+
+/* User can over-ride above with "mem=nnn[KkMm]" in cmdline */
+static int __init setup_mem_sz(char *str)
+{
+	low_mem_sz = memparse(str, NULL) & PAGE_MASK;
+
+	/* early console might not be setup yet - it will show up later */
+	pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz));
+
+	return 0;
+}
+early_param("mem", setup_mem_sz);
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	int in_use = 0;
+
+	if (!low_mem_sz) {
+		if (base != low_mem_start)
+			panic("CONFIG_LINUX_RAM_BASE != DT memory { }");
+
+		low_mem_sz = size;
+		in_use = 1;
+		memblock_add_node(base, size, 0, MEMBLOCK_NONE);
+	} else {
+#ifdef CONFIG_HIGHMEM
+		high_mem_start = base;
+		high_mem_sz = size;
+		in_use = 1;
+		memblock_add_node(base, size, 1, MEMBLOCK_NONE);
+		memblock_reserve(base, size);
+#endif
+	}
+
+	pr_info("Memory @ %llx [%lldM] %s\n",
+		base, TO_MB(size), !in_use ? "Not used":"");
+}
+
+bool arch_has_descending_max_zone_pfns(void)
+{
+	return !IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
+/*
+ * First memory setup routine called from setup_arch()
+ * 1. setup swapper's mm @init_mm
+ * 2. Count the pages we have and setup bootmem allocator
+ * 3. zone setup
+ */
+void __init setup_arch_memory(void)
+{
+	unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
+
+	setup_initial_init_mm(_text, _etext, _edata, _end);
+
+	/* first page of system - kernel .vector starts here */
+	min_low_pfn = virt_to_pfn(CONFIG_LINUX_RAM_BASE);
+
+	/* Last usable page of low mem */
+	max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz);
+
+	/*------------- bootmem allocator setup -----------------------*/
+
+	/*
+	 * seed the bootmem allocator after any DT memory node parsing or
+	 * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz
+	 *
+	 * Only low mem is added, otherwise we have crashes when allocating
+	 * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of
+	 * avail memory, ending in highmem with a > 32-bit address. However
+	 * it then tries to memset it with a truncaed 32-bit handle, causing
+	 * the crash
+	 */
+
+	memblock_reserve(CONFIG_LINUX_LINK_BASE,
+			 __pa(_end) - CONFIG_LINUX_LINK_BASE);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (phys_initrd_size) {
+		memblock_reserve(phys_initrd_start, phys_initrd_size);
+		initrd_start = (unsigned long)__va(phys_initrd_start);
+		initrd_end = initrd_start + phys_initrd_size;
+	}
+#endif
+
+	early_init_fdt_reserve_self();
+	early_init_fdt_scan_reserved_mem();
+
+	memblock_dump_all();
+
+	/*----------------- node/zones setup --------------------------*/
+	max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
+
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * On ARC (w/o PAE) HIGHMEM addresses are actually smaller (0 based)
+	 * than addresses in normal aka low memory (0x8000_0000 based).
+	 * Even with PAE, the huge peripheral space hole would waste a lot of
+	 * mem with single contiguous mem_map[].
+	 * Thus when HIGHMEM on ARC is enabled the memory map corresponding
+	 * to the hole is freed and ARC specific version of pfn_valid()
+	 * handles the hole in the memory map.
+	 */
+
+	min_high_pfn = PFN_DOWN(high_mem_start);
+	max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
+
+	/*
+	 * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE.
+	 * For HIGHMEM without PAE max_high_pfn should be less than
+	 * min_low_pfn to guarantee that these two regions don't overlap.
+	 * For PAE case highmem is greater than lowmem, so it is natural
+	 * to use max_high_pfn.
+	 *
+	 * In both cases, holes should be handled by pfn_valid().
+	 */
+	max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
+
+	high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
+
+	arch_pfn_offset = min(min_low_pfn, min_high_pfn);
+	kmap_init();
+
+#else /* CONFIG_HIGHMEM */
+	/* pfn_valid() uses this when FLATMEM=y and HIGHMEM=n */
+	max_mapnr = max_low_pfn - min_low_pfn;
+
+#endif /* CONFIG_HIGHMEM */
+
+	free_area_init(max_zone_pfn);
+}
+
+static void __init highmem_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long tmp;
+
+	memblock_phys_free(high_mem_start, high_mem_sz);
+	for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++)
+		free_highmem_page(pfn_to_page(tmp));
+#endif
+}
+
+/*
+ * mem_init - initializes memory
+ *
+ * Frees up bootmem
+ * Calculates and displays memory available/used
+ */
+void __init mem_init(void)
+{
+	memblock_free_all();
+	highmem_init();
+
+	BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
+	BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
+}
+
+#ifdef CONFIG_HIGHMEM
+int pfn_valid(unsigned long pfn)
+{
+	return (pfn >= min_high_pfn && pfn <= max_high_pfn) ||
+		(pfn >= min_low_pfn && pfn <= max_low_pfn);
+}
+EXPORT_SYMBOL(pfn_valid);
+#endif
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
new file mode 100644
index 0000000000..0ee75aca6e
--- /dev/null
+++ b/arch/arc/mm/ioremap.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/cache.h>
+
+static inline bool arc_uncached_addr_space(phys_addr_t paddr)
+{
+	if (is_isa_arcompact()) {
+		if (paddr >= ARC_UNCACHED_ADDR_SPACE)
+			return true;
+	} else if (paddr >= perip_base && paddr <= perip_end) {
+		return true;
+	}
+
+	return false;
+}
+
+void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
+{
+	phys_addr_t end;
+
+	/* Don't allow wraparound or zero size */
+	end = paddr + size - 1;
+	if (!size || (end < paddr))
+		return NULL;
+
+	/*
+	 * If the region is h/w uncached, MMU mapping can be elided as optim
+	 * The cast to u32 is fine as this region can only be inside 4GB
+	 */
+	if (arc_uncached_addr_space(paddr))
+		return (void __iomem *)(u32)paddr;
+
+	return ioremap_prot(paddr, size,
+			    pgprot_val(pgprot_noncached(PAGE_KERNEL)));
+}
+EXPORT_SYMBOL(ioremap);
+
+/*
+ * ioremap with access flags
+ * Cache semantics wise it is same as ioremap - "forced" uncached.
+ * However unlike vanilla ioremap which bypasses ARC MMU for addresses in
+ * ARC hardware uncached region, this one still goes thru the MMU as caller
+ * might need finer access control (R/W/X)
+ */
+void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
+			   unsigned long flags)
+{
+	unsigned int off;
+	unsigned long vaddr;
+	struct vm_struct *area;
+	phys_addr_t end;
+	pgprot_t prot = __pgprot(flags);
+
+	/* Don't allow wraparound, zero size */
+	end = paddr + size - 1;
+	if ((!size) || (end < paddr))
+		return NULL;
+
+	/* An early platform driver might end up here */
+	if (!slab_is_available())
+		return NULL;
+
+	/* force uncached */
+	prot = pgprot_noncached(prot);
+
+	/* Mappings have to be page-aligned */
+	off = paddr & ~PAGE_MASK;
+	paddr &= PAGE_MASK_PHYS;
+	size = PAGE_ALIGN(end + 1) - paddr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	area->phys_addr = paddr;
+	vaddr = (unsigned long)area->addr;
+	if (ioremap_page_range(vaddr, vaddr + size, paddr, prot)) {
+		vunmap((void __force *)vaddr);
+		return NULL;
+	}
+	return (void __iomem *)(off + (char __iomem *)vaddr);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+
+void iounmap(const void __iomem *addr)
+{
+	/* weird double cast to handle phys_addr_t > 32 bits */
+	if (arc_uncached_addr_space((phys_addr_t)(u32)addr))
+		return;
+
+	vfree((void *)(PAGE_MASK & (unsigned long __force)addr));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
new file mode 100644
index 0000000000..722d26b943
--- /dev/null
+++ b/arch/arc/mm/mmap.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARC700 mmap
+ *
+ * (started from arm version - for VIPT alias handling)
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+
+#include <asm/cacheflush.h>
+
+#define COLOUR_ALIGN(addr, pgoff)			\
+	((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) +	\
+	 (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1)))
+
+/*
+ * Ensure that shared mappings are correctly aligned to
+ * avoid aliasing issues with VIPT caches.
+ * We need to ensure that
+ * a specific page of an object is always mapped at a multiple of
+ * SHMLBA bytes.
+ */
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int do_align = 0;
+	int aliasing = cache_is_vipt_aliasing();
+	struct vm_unmapped_area_info info;
+
+	/*
+	 * We only need to do colour alignment if D cache aliases.
+	 */
+	if (aliasing)
+		do_align = filp || (flags & MAP_SHARED);
+
+	/*
+	 * We enforce the MAP_FIXED case.
+	 */
+	if (flags & MAP_FIXED) {
+		if (aliasing && flags & MAP_SHARED &&
+		    (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		if (do_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			return addr;
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = mm->mmap_base;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	return vm_unmapped_area(&info);
+}
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
new file mode 100644
index 0000000000..5f71445f26
--- /dev/null
+++ b/arch/arc/mm/tlb.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/bug.h>
+#include <linux/mm_types.h>
+
+#include <asm/arcregs.h>
+#include <asm/setup.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+
+/* A copy of the ASID from the PID reg is kept in asid_cache */
+DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
+
+static int __read_mostly pae_exists;
+
+/*
+ * Utility Routine to erase a J-TLB entry
+ * Caller needs to setup Index Reg (manually or via getIndex)
+ */
+static inline void __tlb_entry_erase(void)
+{
+	write_aux_reg(ARC_REG_TLBPD1, 0);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
+	write_aux_reg(ARC_REG_TLBPD0, 0);
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+}
+
+static void utlb_invalidate(void)
+{
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
+}
+
+#ifdef CONFIG_ARC_MMU_V3
+
+static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
+{
+	unsigned int idx;
+
+	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid);
+
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
+	idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+	return idx;
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+	unsigned int idx;
+
+	/* Locate the TLB entry for this vaddr + ASID */
+	idx = tlb_entry_lkup(vaddr_n_asid);
+
+	/* No error means entry found, zero it out */
+	if (likely(!(idx & TLB_LKUP_ERR))) {
+		__tlb_entry_erase();
+	} else {
+		/* Duplicate entry error */
+		WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n",
+					   vaddr_n_asid);
+	}
+}
+
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
+{
+	unsigned int idx;
+
+	/*
+	 * First verify if entry for this vaddr+ASID already exists
+	 * This also sets up PD0 (vaddr, ASID..) for final commit
+	 */
+	idx = tlb_entry_lkup(pd0);
+
+	/*
+	 * If Not already present get a free slot from MMU.
+	 * Otherwise, Probe would have located the entry and set INDEX Reg
+	 * with existing location. This will cause Write CMD to over-write
+	 * existing entry with new PD0 and PD1
+	 */
+	if (likely(idx & TLB_LKUP_ERR))
+		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+
+	/* setup the other half of TLB entry (pfn, rwx..) */
+	write_aux_reg(ARC_REG_TLBPD1, pd1);
+
+	/*
+	 * Commit the Entry to MMU
+	 * It doesn't sound safe to use the TLBWriteNI cmd here
+	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
+	 */
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+}
+
+#else	/* MMUv4 */
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT);
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
+}
+
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
+{
+	write_aux_reg(ARC_REG_TLBPD0, pd0);
+
+	if (!is_pae40_enabled()) {
+		write_aux_reg(ARC_REG_TLBPD1, pd1);
+	} else {
+		write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF);
+		write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+	}
+
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
+}
+
+#endif
+
+/*
+ * Un-conditionally (without lookup) erase the entire MMU contents
+ */
+
+noinline void local_flush_tlb_all(void)
+{
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	unsigned long flags;
+	unsigned int entry;
+	int num_tlb = mmu->sets * mmu->ways;
+
+	local_irq_save(flags);
+
+	/* Load PD0 and PD1 with template for a Blank Entry */
+	write_aux_reg(ARC_REG_TLBPD1, 0);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
+	write_aux_reg(ARC_REG_TLBPD0, 0);
+
+	for (entry = 0; entry < num_tlb; entry++) {
+		/* write this entry to the TLB */
+		write_aux_reg(ARC_REG_TLBINDEX, entry);
+		write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI);
+	}
+
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+		const int stlb_idx = 0x800;
+
+		/* Blank sTLB entry */
+		write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ);
+
+		for (entry = stlb_idx; entry < stlb_idx + 16; entry++) {
+			write_aux_reg(ARC_REG_TLBINDEX, entry);
+			write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI);
+		}
+	}
+
+	utlb_invalidate();
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Flush the entire MM for userland. The fastest way is to move to Next ASID
+ */
+noinline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	/*
+	 * Small optimisation courtesy IA64
+	 * flush_mm called during fork,exit,munmap etc, multiple times as well.
+	 * Only for fork( ) do we need to move parent to a new MMU ctxt,
+	 * all other cases are NOPs, hence this check.
+	 */
+	if (atomic_read(&mm->mm_users) == 0)
+		return;
+
+	/*
+	 * - Move to a new ASID, but only if the mm is still wired in
+	 *   (Android Binder ended up calling this for vma->mm != tsk->mm,
+	 *    causing h/w - s/w ASID to get out of sync)
+	 * - Also get_new_mmu_context() new implementation allocates a new
+	 *   ASID only if it is not allocated already - so unallocate first
+	 */
+	destroy_context(mm);
+	if (current->mm == mm)
+		get_new_mmu_context(mm);
+}
+
+/*
+ * Flush a Range of TLB entries for userland.
+ * @start is inclusive, while @end is exclusive
+ * Difference between this and Kernel Range Flush is
+ *  -Here the fastest way (if range is too large) is to move to next ASID
+ *      without doing any explicit Shootdown
+ *  -In case of kernel Flush, entry has to be shot down explicitly
+ */
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end)
+{
+	const unsigned int cpu = smp_processor_id();
+	unsigned long flags;
+
+	/* If range @start to @end is more than 32 TLB entries deep,
+	 * its better to move to a new ASID rather than searching for
+	 * individual entries and then shooting them down
+	 *
+	 * The calc above is rough, doesn't account for unaligned parts,
+	 * since this is heuristics based anyways
+	 */
+	if (unlikely((end - start) >= PAGE_SIZE * 32)) {
+		local_flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	/*
+	 * @start moved to page start: this alone suffices for checking
+	 * loop end condition below, w/o need for aligning @end to end
+	 * e.g. 2000 to 4001 will anyhow loop twice
+	 */
+	start &= PAGE_MASK;
+
+	local_irq_save(flags);
+
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
+		while (start < end) {
+			tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu));
+			start += PAGE_SIZE;
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+/* Flush the kernel TLB entries - vmalloc/modules (Global from MMU perspective)
+ *  @start, @end interpreted as kvaddr
+ * Interestingly, shared TLB entries can also be flushed using just
+ * @start,@end alone (interpreted as user vaddr), although technically SASID
+ * is also needed. However our smart TLbProbe lookup takes care of that.
+ */
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+
+	/* exactly same as above, except for TLB entry not taking ASID */
+
+	if (unlikely((end - start) >= PAGE_SIZE * 32)) {
+		local_flush_tlb_all();
+		return;
+	}
+
+	start &= PAGE_MASK;
+
+	local_irq_save(flags);
+	while (start < end) {
+		tlb_entry_erase(start);
+		start += PAGE_SIZE;
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Delete TLB entry in MMU for a given page (??? address)
+ * NOTE One TLB entry contains translation for single PAGE
+ */
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	const unsigned int cpu = smp_processor_id();
+	unsigned long flags;
+
+	/* Note that it is critical that interrupts are DISABLED between
+	 * checking the ASID and using it flush the TLB entry
+	 */
+	local_irq_save(flags);
+
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
+		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu));
+	}
+
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_SMP
+
+struct tlb_args {
+	struct vm_area_struct *ta_vma;
+	unsigned long ta_start;
+	unsigned long ta_end;
+};
+
+static inline void ipi_flush_tlb_page(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void ipi_flush_pmd_tlb_range(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+#endif
+
+static inline void ipi_flush_tlb_kernel_range(void *arg)
+{
+	struct tlb_args *ta = (struct tlb_args *)arg;
+
+	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
+}
+
+void flush_tlb_all(void)
+{
+	on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm,
+			 mm, 1);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = uaddr
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			 unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1);
+}
+#endif
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+}
+#endif
+
+/*
+ * Routine to create a TLB entry
+ */
+void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
+{
+	unsigned long flags;
+	unsigned int asid_or_sasid, rwx;
+	unsigned long pd0;
+	phys_addr_t pd1;
+
+	/*
+	 * create_tlb() assumes that current->mm == vma->mm, since
+	 * -it ASID for TLB entry is fetched from MMU ASID reg (valid for curr)
+	 * -completes the lazy write to SASID reg (again valid for curr tsk)
+	 *
+	 * Removing the assumption involves
+	 * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
+	 * -More importantly it makes this handler inconsistent with fast-path
+	 *  TLB Refill handler which always deals with "current"
+	 *
+	 * Lets see the use cases when current->mm != vma->mm and we land here
+	 *  1. execve->copy_strings()->__get_user_pages->handle_mm_fault
+	 *     Here VM wants to pre-install a TLB entry for user stack while
+	 *     current->mm still points to pre-execve mm (hence the condition).
+	 *     However the stack vaddr is soon relocated (randomization) and
+	 *     move_page_tables() tries to undo that TLB entry.
+	 *     Thus not creating TLB entry is not any worse.
+	 *
+	 *  2. ptrace(POKETEXT) causes a CoW - debugger(current) inserting a
+	 *     breakpoint in debugged task. Not creating a TLB now is not
+	 *     performance critical.
+	 *
+	 * Both the cases above are not good enough for code churn.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+
+	vaddr &= PAGE_MASK;
+
+	/* update this PTE credentials */
+	pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
+
+	/* Create HW TLB(PD0,PD1) from PTE  */
+
+	/* ASID for this task */
+	asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
+
+	pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
+
+	/*
+	 * ARC MMU provides fully orthogonal access bits for K/U mode,
+	 * however Linux only saves 1 set to save PTE real-estate
+	 * Here we convert 3 PTE bits into 6 MMU bits:
+	 * -Kernel only entries have Kr Kw Kx 0 0 0
+	 * -User entries have mirrored K and U bits
+	 */
+	rwx = pte_val(*ptep) & PTE_BITS_RWX;
+
+	if (pte_val(*ptep) & _PAGE_GLOBAL)
+		rwx <<= 3;		/* r w x => Kr Kw Kx 0 0 0 */
+	else
+		rwx |= (rwx << 3);	/* r w x => Kr Kw Kx Ur Uw Ux */
+
+	pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1);
+
+	tlb_entry_insert(pd0, pd1);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Called at the end of pagefault, for a userspace mapped page
+ *  -pre-install the corresponding TLB entry into MMU
+ *  -Finalize the delayed D-cache flush of kernel mapping of page due to
+ *  	flush_dcache_page(), copy_user_page()
+ *
+ * Note that flush (when done) involves both WBACK - so physical page is
+ * in sync as well as INV - so any non-congruent aliases don't remain
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
+		      pte_t *ptep)
+{
+	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
+	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
+	struct page *page = pfn_to_page(pte_pfn(*ptep));
+
+	create_tlb(vma, vaddr, ptep);
+
+	if (page == ZERO_PAGE(0)) {
+		return;
+	}
+
+	/*
+	 * Exec page : Independent of aliasing/page-color considerations,
+	 *	       since icache doesn't snoop dcache on ARC, any dirty
+	 *	       K-mapping of a code page needs to be wback+inv so that
+	 *	       icache fetch by userspace sees code correctly.
+	 * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it
+	 *	       so userspace sees the right data.
+	 *  (Avoids the flush for Non-exec + congruent mapping case)
+	 */
+	if ((vma->vm_flags & VM_EXEC) ||
+	     addr_not_cache_congruent(paddr, vaddr)) {
+
+		int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
+		if (dirty) {
+			/* wback + inv dcache lines (K-mapping) */
+			__flush_dcache_page(paddr, paddr);
+
+			/* invalidate any existing icache lines (U-mapping) */
+			if (vma->vm_flags & VM_EXEC)
+				__inv_icache_page(paddr, vaddr);
+		}
+	}
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+/*
+ * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP
+ * support.
+ *
+ * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a
+ * new bit "SZ" in TLB page descriptor to distinguish between them.
+ * Super Page size is configurable in hardware (4K to 16M), but fixed once
+ * RTL builds.
+ *
+ * The exact THP size a Linux configuration will support is a function of:
+ *  - MMU page size (typical 8K, RTL fixed)
+ *  - software page walker address split between PGD:PTE:PFN (typical
+ *    11:8:13, but can be changed with 1 line)
+ * So for above default, THP size supported is 8K * (2^8) = 2M
+ *
+ * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime
+ * reduces to 1 level (as PTE is folded into PGD and canonically referred
+ * to as PMD).
+ * Thus THP PMD accessors are implemented in terms of PTE (just like sparc)
+ */
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd)
+{
+	pte_t pte = __pte(pmd_val(*pmd));
+	update_mmu_cache(vma, addr, &pte);
+}
+
+void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			       unsigned long end)
+{
+	unsigned int cpu;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) {
+		unsigned int asid = hw_pid(vma->vm_mm, cpu);
+
+		/* No need to loop here: this will always be for 1 Huge Page */
+		tlb_entry_erase(start | _PAGE_HW_SZ | asid);
+	}
+
+	local_irq_restore(flags);
+}
+
+#endif
+
+/* Read the Cache Build Configuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation is done here, simply read/convert the BCRs
+ */
+void read_decode_mmu_bcr(void)
+{
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	unsigned int tmp;
+	struct bcr_mmu_3 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
+		     u_itlb:4, u_dtlb:4;
+#else
+	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
+		     ways:4, ver:8;
+#endif
+	} *mmu3;
+
+	struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+		     n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+	/*           DTLB      ITLB      JES        JE         JA      */
+	unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+		     pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+	} *mmu4;
+
+	tmp = read_aux_reg(ARC_REG_MMU_BCR);
+	mmu->ver = (tmp >> 24);
+
+	if (is_isa_arcompact() && mmu->ver == 3) {
+		mmu3 = (struct bcr_mmu_3 *)&tmp;
+		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
+		mmu->sets = 1 << mmu3->sets;
+		mmu->ways = 1 << mmu3->ways;
+		mmu->u_dtlb = mmu3->u_dtlb;
+		mmu->u_itlb = mmu3->u_itlb;
+		mmu->sasid = mmu3->sasid;
+	} else {
+		mmu4 = (struct bcr_mmu_4 *)&tmp;
+		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
+		mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
+		mmu->sets = 64 << mmu4->n_entry;
+		mmu->ways = mmu4->n_ways * 2;
+		mmu->u_dtlb = mmu4->u_dtlb * 4;
+		mmu->u_itlb = mmu4->u_itlb * 4;
+		mmu->sasid = mmu4->sasid;
+		pae_exists = mmu->pae = mmu4->pae;
+	}
+}
+
+char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
+{
+	int n = 0;
+	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
+	char super_pg[64] = "";
+
+	if (p_mmu->s_pg_sz_m)
+		scnprintf(super_pg, 64, "%dM Super Page %s",
+			  p_mmu->s_pg_sz_m,
+			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
+
+	n += scnprintf(buf + n, len - n,
+		      "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
+		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,  CONFIG_PGTABLE_LEVELS,
+		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
+		       p_mmu->u_dtlb, p_mmu->u_itlb,
+		       IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
+
+	return buf;
+}
+
+int pae40_exist_but_not_enab(void)
+{
+	return pae_exists && !is_pae40_enabled();
+}
+
+void arc_mmu_init(void)
+{
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	char str[256];
+	int compat = 0;
+
+	pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
+
+	/*
+	 * Can't be done in processor.h due to header include dependencies
+	 */
+	BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));
+
+	/*
+	 * stack top size sanity check,
+	 * Can't be done in processor.h due to header include dependencies
+	 */
+	BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE));
+
+	/*
+	 * Ensure that MMU features assumed by kernel exist in hardware.
+	 *  - For older ARC700 cpus, only v3 supported
+	 *  - For HS cpus, v4 was baseline and v5 is backwards compatible
+	 *    (will run older software).
+	 */
+	if (is_isa_arcompact() && mmu->ver == 3)
+		compat = 1;
+	else if (is_isa_arcv2() && mmu->ver >= 4)
+		compat = 1;
+
+	if (!compat)
+		panic("MMU ver %d doesn't match kernel built for\n", mmu->ver);
+
+	if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
+		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
+
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+	    mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE))
+		panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
+		      (unsigned long)TO_MB(HPAGE_PMD_SIZE));
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
+		panic("Hardware doesn't support PAE40\n");
+
+	/* Enable the MMU with ASID 0 */
+	mmu_setup_asid(NULL, 0);
+
+	/* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */
+	mmu_setup_pgd(NULL, swapper_pg_dir);
+
+	if (pae40_exist_but_not_enab())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+}
+
+/*
+ * TLB Programmer's Model uses Linear Indexes: 0 to {255, 511} for 128 x {2,4}
+ * The mapping is Column-first.
+ *		---------------------	-----------
+ *		|way0|way1|way2|way3|	|way0|way1|
+ *		---------------------	-----------
+ * [set0]	|  0 |  1 |  2 |  3 |	|  0 |  1 |
+ * [set1]	|  4 |  5 |  6 |  7 |	|  2 |  3 |
+ *		~		    ~	~	  ~
+ * [set127]	| 508| 509| 510| 511|	| 254| 255|
+ *		---------------------	-----------
+ * For normal operations we don't(must not) care how above works since
+ * MMU cmd getIndex(vaddr) abstracts that out.
+ * However for walking WAYS of a SET, we need to know this
+ */
+#define SET_WAY_TO_IDX(mmu, set, way)  ((set) * mmu->ways + (way))
+
+/* Handling of Duplicate PD (TLB entry) in MMU.
+ * -Could be due to buggy customer tapeouts or obscure kernel bugs
+ * -MMU complaints not at the time of duplicate PD installation, but at the
+ *      time of lookup matching multiple ways.
+ * -Ideally these should never happen - but if they do - workaround by deleting
+ *      the duplicate one.
+ * -Knob to be verbose abt it.(TODO: hook them up to debugfs)
+ */
+volatile int dup_pd_silent; /* Be silent abt it or complain (default) */
+
+void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
+			  struct pt_regs *regs)
+{
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	unsigned long flags;
+	int set, n_ways = mmu->ways;
+
+	n_ways = min(n_ways, 4);
+	BUG_ON(mmu->ways > 4);
+
+	local_irq_save(flags);
+
+	/* loop thru all sets of TLB */
+	for (set = 0; set < mmu->sets; set++) {
+
+		int is_valid, way;
+		unsigned int pd0[4];
+
+		/* read out all the ways of current set */
+		for (way = 0, is_valid = 0; way < n_ways; way++) {
+			write_aux_reg(ARC_REG_TLBINDEX,
+					  SET_WAY_TO_IDX(mmu, set, way));
+			write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead);
+			pd0[way] = read_aux_reg(ARC_REG_TLBPD0);
+			is_valid |= pd0[way] & _PAGE_PRESENT;
+			pd0[way] &= PAGE_MASK;
+		}
+
+		/* If all the WAYS in SET are empty, skip to next SET */
+		if (!is_valid)
+			continue;
+
+		/* Scan the set for duplicate ways: needs a nested loop */
+		for (way = 0; way < n_ways - 1; way++) {
+
+			int n;
+
+			if (!pd0[way])
+				continue;
+
+			for (n = way + 1; n < n_ways; n++) {
+				if (pd0[way] != pd0[n])
+					continue;
+
+				if (!dup_pd_silent)
+					pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n",
+						pd0[way], set, way, n);
+
+				/*
+				 * clear entry @way and not @n.
+				 * This is critical to our optimised loop
+				 */
+				pd0[way] = 0;
+				write_aux_reg(ARC_REG_TLBINDEX,
+						SET_WAY_TO_IDX(mmu, set, way));
+				__tlb_entry_erase();
+			}
+		}
+	}
+
+	local_irq_restore(flags);
+}
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
new file mode 100644
index 0000000000..e054780a8f
--- /dev/null
+++ b/arch/arc/mm/tlbex.S
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * TLB Exception Handling for ARC
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Vineetg: April 2011 :
+ *  -MMU v1: moved out legacy code into a seperate file
+ *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
+ *      helps avoid a shift when preparing PD0 from PTE
+ *
+ * Vineetg: July 2009
+ *  -For MMU V2, we need not do heuristics at the time of commiting a D-TLB
+ *   entry, so that it doesn't knock out it's I-TLB entry
+ *  -Some more fine tuning:
+ *   bmsk instead of add, asl.cc instead of branch, delay slot utilise etc
+ *
+ * Vineetg: July 2009
+ *  -Practically rewrote the I/D TLB Miss handlers
+ *   Now 40 and 135 instructions a peice as compared to 131 and 449 resp.
+ *   Hence Leaner by 1.5 K
+ *   Used Conditional arithmetic to replace excessive branching
+ *   Also used short instructions wherever possible
+ *
+ * Vineetg: Aug 13th 2008
+ *  -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing
+ *   more information in case of a Fatality
+ *
+ * Vineetg: March 25th Bug #92690
+ *  -Added Debug Code to check if sw-ASID == hw-ASID
+
+ * Rahul Trivedi, Amit Bhor: Codito Technologies 2004
+ */
+
+#include <linux/linkage.h>
+#include <linux/pgtable.h>
+#include <asm/entry.h>
+#include <asm/mmu.h>
+#include <asm/arcregs.h>
+#include <asm/cache.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_ISA_ARCOMPACT
+;-----------------------------------------------------------------
+; ARC700 Exception Handling doesn't auto-switch stack and it only provides
+; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
+;
+; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
+; "global" is used to free-up FIRST core reg to be able to code the rest of
+; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
+; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
+; need to be saved as well by extending the "global" to be 4 words. Hence
+;	".size   ex_saved_reg1, 16"
+; [All of this dance is to avoid stack switching for each TLB Miss, since we
+; only need to save only a handful of regs, as opposed to complete reg file]
+;
+; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
+; core reg as it will not be SMP safe.
+; Thus scratch AUX reg is used (and no longer used to cache task PGD).
+; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
+; Epilogue thus has to locate the "per-cpu" storage for regs.
+; To avoid cache line bouncing the per-cpu global is aligned/sized per
+; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
+;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
+
+; As simple as that....
+;--------------------------------------------------------------------------
+
+; scratch memory to save [r0-r3] used to code TLB refill Handler
+ARCFP_DATA ex_saved_reg1
+	.align 1 << L1_CACHE_SHIFT
+	.type   ex_saved_reg1, @object
+#ifdef CONFIG_SMP
+	.size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
+ex_saved_reg1:
+	.zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
+#else
+	.size   ex_saved_reg1, 16
+ex_saved_reg1:
+	.zero 16
+#endif
+
+.macro TLBMISS_FREEUP_REGS
+#ifdef CONFIG_SMP
+	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
+	GET_CPU_ID  r0			; get to per cpu scratch mem,
+	asl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
+	add r0, @ex_saved_reg1, r0
+#else
+	st    r0, [@ex_saved_reg1]
+	mov_s r0, @ex_saved_reg1
+#endif
+	st_s  r1, [r0, 4]
+	st_s  r2, [r0, 8]
+	st_s  r3, [r0, 12]
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+#ifdef CONFIG_SMP
+	GET_CPU_ID  r0			; get to per cpu scratch mem
+	asl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
+	add r0, @ex_saved_reg1, r0
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	lr    r0, [ARC_REG_SCRATCH_DATA0]
+#else
+	mov_s r0, @ex_saved_reg1
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	ld_s  r0, [r0]
+#endif
+.endm
+
+#else	/* ARCv2 */
+
+.macro TLBMISS_FREEUP_REGS
+#ifdef CONFIG_ARC_HAS_LL64
+	std   r0, [sp, -16]
+	std   r2, [sp, -8]
+#else
+	PUSH  r0
+	PUSH  r1
+	PUSH  r2
+	PUSH  r3
+#endif
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+#ifdef CONFIG_ARC_HAS_LL64
+	ldd   r0, [sp, -16]
+	ldd   r2, [sp, -8]
+#else
+	POP   r3
+	POP   r2
+	POP   r1
+	POP   r0
+#endif
+.endm
+
+#endif
+
+;============================================================================
+;TLB Miss handling Code
+;============================================================================
+
+#ifndef PMD_SHIFT
+#define PMD_SHIFT PUD_SHIFT
+#endif
+
+#ifndef PUD_SHIFT
+#define PUD_SHIFT PGDIR_SHIFT
+#endif
+
+;-----------------------------------------------------------------------------
+; This macro does the page-table lookup for the faulting address.
+; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
+.macro LOAD_FAULT_PTE
+
+	lr  r2, [efa]
+
+#ifdef CONFIG_ISA_ARCV2
+	lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
+#else
+	GET_CURR_TASK_ON_CPU  r1
+	ld  r1, [r1, TASK_ACT_MM]
+	ld  r1, [r1, MM_PGD]
+#endif
+
+	lsr     r0, r2, PGDIR_SHIFT     ; Bits for indexing into PGD
+	ld.as   r3, [r1, r0]            ; PGD entry corresp to faulting addr
+	tst	r3, r3
+	bz	do_slow_path_pf         ; if no Page Table, do page fault
+
+#if CONFIG_PGTABLE_LEVELS > 3
+	lsr     r0, r2, PUD_SHIFT	; Bits for indexing into PUD
+	and	r0, r0, (PTRS_PER_PUD - 1)
+	ld.as	r1, [r3, r0]		; PMD entry
+	tst	r1, r1
+	bz	do_slow_path_pf
+	mov	r3, r1
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+	lsr     r0, r2, PMD_SHIFT	; Bits for indexing into PMD
+	and	r0, r0, (PTRS_PER_PMD - 1)
+	ld.as	r1, [r3, r0]		; PMD entry
+	tst	r1, r1
+	bz	do_slow_path_pf
+	mov	r3, r1
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	and.f	0, r3, _PAGE_HW_SZ	; Is this Huge PMD (thp)
+	add2.nz	r1, r1, r0
+	bnz.d	2f		; YES: PGD == PMD has THP PTE: stop pgd walk
+	mov.nz	r0, r3
+
+#endif
+	and	r1, r3, PAGE_MASK
+
+	; Get the PTE entry: The idea is
+	; (1) x = addr >> PAGE_SHIFT 	-> masks page-off bits from @fault-addr
+	; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
+	; (3) z = (pgtbl + y * 4)
+
+#ifdef CONFIG_ARC_HAS_PAE40
+#define PTE_SIZE_LOG	3	/* 8 == 2 ^ 3 */
+#else
+#define PTE_SIZE_LOG	2	/* 4 == 2 ^ 2 */
+#endif
+
+	; multiply in step (3) above avoided by shifting lesser in step (1)
+	lsr     r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
+	and     r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
+	ld.aw   r0, [r1, r0]            ; r0: PTE (lower word only for PAE40)
+					; r1: PTE ptr
+
+2:
+
+.endm
+
+;-----------------------------------------------------------------
+; Convert Linux PTE entry into TLB entry
+; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
+;    (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
+; IN: r0 = PTE, r1 = ptr to PTE
+
+.macro CONV_PTE_TO_TLB
+	and    r3, r0, PTE_BITS_RWX	;          r  w  x
+	asl    r2, r3, 3		; Kr Kw Kx 0  0  0 (GLOBAL, kernel only)
+	and.f  0,  r0, _PAGE_GLOBAL
+	or.z   r2, r2, r3		; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page)
+
+	and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
+	or  r3, r3, r2
+
+	sr  r3, [ARC_REG_TLBPD1]    	; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
+#ifdef	CONFIG_ARC_HAS_PAE40
+	ld	r3, [r1, 4]		; paddr[39..32]
+	sr	r3, [ARC_REG_TLBPD1HI]
+#endif
+
+	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
+
+	lr  r3,[ARC_REG_TLBPD0]     ; MMU prepares PD0 with vaddr and asid
+
+	or  r3, r3, r2              ; S | vaddr | {sasid|asid}
+	sr  r3,[ARC_REG_TLBPD0]     ; rewrite PD0
+.endm
+
+;-----------------------------------------------------------------
+; Commit the TLB entry into MMU
+
+.macro COMMIT_ENTRY_TO_MMU
+#ifdef CONFIG_ARC_MMU_V3
+
+	/* Get free TLB slot: Set = computed from vaddr, way = random */
+	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
+
+	/* Commit the Write */
+	sr TLBWriteNI, [ARC_REG_TLBCOMMAND]
+
+#else
+	sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
+#endif
+
+88:
+.endm
+
+
+ARCFP_CODE	;Fast Path Code, candidate for ICCM
+
+;-----------------------------------------------------------------------------
+; I-TLB Miss Exception Handler
+;-----------------------------------------------------------------------------
+
+ENTRY(EV_TLBMissI)
+
+	TLBMISS_FREEUP_REGS
+
+	;----------------------------------------------------------------
+	; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA
+	LOAD_FAULT_PTE
+
+	;----------------------------------------------------------------
+	; VERIFY_PTE: Check if PTE permissions approp for executing code
+	cmp_s   r2, VMALLOC_START
+	mov_s   r2, (_PAGE_PRESENT | _PAGE_EXECUTE)
+	or.hs   r2, r2, _PAGE_GLOBAL
+
+	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
+	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
+	bnz     do_slow_path_pf
+
+	; Let Linux VM know that the page was accessed
+	or      r0, r0, _PAGE_ACCESSED  ; set Accessed Bit
+	st_s    r0, [r1]                ; Write back PTE
+
+	CONV_PTE_TO_TLB
+	COMMIT_ENTRY_TO_MMU
+	TLBMISS_RESTORE_REGS
+EV_TLBMissI_fast_ret:	; additional label for VDK OS-kit instrumentation
+	rtie
+
+END(EV_TLBMissI)
+
+;-----------------------------------------------------------------------------
+; D-TLB Miss Exception Handler
+;-----------------------------------------------------------------------------
+
+ENTRY(EV_TLBMissD)
+
+	TLBMISS_FREEUP_REGS
+
+	;----------------------------------------------------------------
+	; Get the PTE corresponding to V-addr accessed
+	; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA
+	LOAD_FAULT_PTE
+
+	;----------------------------------------------------------------
+	; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
+
+	cmp_s	r2, VMALLOC_START
+	mov_s   r2, _PAGE_PRESENT	; common bit for K/U PTE
+	or.hs	r2, r2, _PAGE_GLOBAL	; kernel PTE only
+
+	; Linux PTE [RWX] bits are semantically overloaded:
+	; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc)
+	; -Otherwise they are user-mode permissions, and those are exactly
+	;  same for kernel mode as well (e.g. copy_(to|from)_user)
+
+	lr      r3, [ecr]
+	btst_s  r3, ECR_C_BIT_DTLB_LD_MISS	; Read Access
+	or.nz   r2, r2, _PAGE_READ      	; chk for Read flag in PTE
+	btst_s  r3, ECR_C_BIT_DTLB_ST_MISS	; Write Access
+	or.nz   r2, r2, _PAGE_WRITE     	; chk for Write flag in PTE
+	; Above laddering takes care of XCHG access (both R and W)
+
+	; By now, r2 setup with all the Flags we need to check in PTE
+	and     r3, r0, r2              ; Mask out NON Flag bits from PTE
+	brne.d  r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test)
+
+	;----------------------------------------------------------------
+	; UPDATE_PTE: Let Linux VM know that page was accessed/dirty
+	or      r0, r0, _PAGE_ACCESSED        ; Accessed bit always
+	or.nz   r0, r0, _PAGE_DIRTY           ; if Write, set Dirty bit as well
+	st_s    r0, [r1]                      ; Write back PTE
+
+	CONV_PTE_TO_TLB
+
+	COMMIT_ENTRY_TO_MMU
+	TLBMISS_RESTORE_REGS
+EV_TLBMissD_fast_ret:	; additional label for VDK OS-kit instrumentation
+	rtie
+
+;-------- Common routine to call Linux Page Fault Handler -----------
+do_slow_path_pf:
+
+#ifdef CONFIG_ISA_ARCV2
+	; Set Z flag if exception in U mode. Hardware micro-ops do this on any
+	; taken interrupt/exception, and thus is already the case at the entry
+	; above, but ensuing code would have already clobbered.
+	; EXCEPTION_PROLOGUE called in slow path, relies on correct Z flag set
+
+	lr	r2, [erstatus]
+	and	r2, r2, STATUS_U_MASK
+	bxor.f	0, r2, STATUS_U_BIT
+#endif
+
+	; Restore the 4-scratch regs saved by fast path miss handler
+	TLBMISS_RESTORE_REGS
+
+	; Slow path TLB Miss handled as a regular ARC Exception
+	; (stack switching / save the complete reg-file).
+	b  call_do_page_fault
+END(EV_TLBMissD)
diff --git a/arch/arc/oprofile/Makefile b/arch/arc/oprofile/Makefile
new file mode 100644
index 0000000000..698367bb41
--- /dev/null
+++ b/arch/arc/oprofile/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o \
+		timer_int.o )
+
+oprofile-y	:= $(DRIVER_OBJS) common.o
diff --git a/arch/arc/oprofile/common.c b/arch/arc/oprofile/common.c
new file mode 100644
index 0000000000..86bf589953
--- /dev/null
+++ b/arch/arc/oprofile/common.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Based on orig code from @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/perf_event.h>
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	/*
+	 * A failure here, forces oprofile core to switch to Timer based PC
+	 * sampling, which will happen if say perf is not enabled/available
+	 */
+	return oprofile_perf_init(ops);
+}
+
+void oprofile_arch_exit(void)
+{
+	oprofile_perf_exit();
+}
diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig
new file mode 100644
index 0000000000..b9652c69d1
--- /dev/null
+++ b/arch/arc/plat-axs10x/Kconfig
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+#
+
+menuconfig ARC_PLAT_AXS10X
+	bool "Synopsys ARC AXS10x Software Development Platforms"
+	select DW_APB_ICTL
+	select GPIO_DWAPB
+	select OF_GPIO
+	select HAVE_PCI
+	select GENERIC_IRQ_CHIP
+	select GPIOLIB
+	select AXS101 if ISA_ARCOMPACT
+	select AXS103 if ISA_ARCV2
+	help
+	  Support for the ARC AXS10x Software Development Platforms.
+
+	  The AXS10x Platforms consist of a mainboard with peripherals,
+	  on which several daughter cards can be placed. The daughter cards
+	  typically contain a CPU and memory.
+
+if ARC_PLAT_AXS10X
+
+config AXS101
+	depends on ISA_ARCOMPACT
+	bool "AXS101 with AXC001 CPU Card (ARC 770D/EM6/AS221)"
+	help
+	  This adds support for the 770D/EM6/AS221 CPU Card. Only the ARC
+	  770D is supported in Linux.
+
+	  The AXS101 Platform consists of an AXS10x mainboard with
+	  this daughtercard. Please use the axs101.dts device tree
+	  with this configuration.
+
+config AXS103
+	bool "AXS103 with AXC003 CPU Card (ARC HS38x)"
+	depends on ISA_ARCV2
+	help
+	  This adds support for the HS38x CPU Card.
+
+	  The AXS103 Platform consists of an AXS10x mainboard with
+	  this daughtercard. Please use the axs103.dts device tree
+	  with this configuration.
+
+endif
diff --git a/arch/arc/plat-axs10x/Makefile b/arch/arc/plat-axs10x/Makefile
new file mode 100644
index 0000000000..cebe5716ee
--- /dev/null
+++ b/arch/arc/plat-axs10x/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+#
+
+obj-$(CONFIG_ARC_PLAT_AXS10X) += axs10x.o
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
new file mode 100644
index 0000000000..b821df7b00
--- /dev/null
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AXS101/AXS103 Software Development Platform
+ *
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/libfdt.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/io.h>
+#include <asm/mach_desc.h>
+#include <soc/arc/mcip.h>
+
+#define AXS_MB_CGU		0xE0010000
+#define AXS_MB_CREG		0xE0011000
+
+#define CREG_MB_IRQ_MUX		(AXS_MB_CREG + 0x214)
+#define CREG_MB_SW_RESET	(AXS_MB_CREG + 0x220)
+#define CREG_MB_VER		(AXS_MB_CREG + 0x230)
+#define CREG_MB_CONFIG		(AXS_MB_CREG + 0x234)
+
+#define AXC001_CREG		0xF0001000
+#define AXC001_GPIO_INTC	0xF0003000
+
+static void __init axs10x_enable_gpio_intc_wire(void)
+{
+	/*
+	 * Peripherals on CPU Card and Mother Board are wired to cpu intc via
+	 * intermediate DW APB GPIO blocks (mainly for debouncing)
+	 *
+	 *         ---------------------
+	 *        |  snps,arc700-intc |
+	 *        ---------------------
+	 *          | #7          | #15
+	 * -------------------   -------------------
+	 * | snps,dw-apb-gpio |  | snps,dw-apb-gpio |
+	 * -------------------   -------------------
+	 *        | #12                     |
+	 *        |                 [ Debug UART on cpu card ]
+	 *        |
+	 * ------------------------
+	 * | snps,dw-apb-intc (MB)|
+	 * ------------------------
+	 *  |      |       |      |
+	 * [eth] [uart]        [... other perip on Main Board]
+	 *
+	 * Current implementation of "irq-dw-apb-ictl" driver doesn't work well
+	 * with stacked INTCs. In particular problem happens if its master INTC
+	 * not yet instantiated. See discussion here -
+	 * https://lore.kernel.org/lkml/54F6FE2C.7020309@synopsys.com
+	 *
+	 * So setup the first gpio block as a passive pass thru and hide it from
+	 * DT hardware topology - connect MB intc directly to cpu intc
+	 * The GPIO "wire" needs to be init nevertheless (here)
+	 *
+	 * One side adv is that peripheral interrupt handling avoids one nested
+	 * intc ISR hop
+	 */
+#define GPIO_INTEN		(AXC001_GPIO_INTC + 0x30)
+#define GPIO_INTMASK		(AXC001_GPIO_INTC + 0x34)
+#define GPIO_INTTYPE_LEVEL	(AXC001_GPIO_INTC + 0x38)
+#define GPIO_INT_POLARITY	(AXC001_GPIO_INTC + 0x3c)
+#define MB_TO_GPIO_IRQ		12
+
+	iowrite32(~(1 << MB_TO_GPIO_IRQ), (void __iomem *) GPIO_INTMASK);
+	iowrite32(0, (void __iomem *) GPIO_INTTYPE_LEVEL);
+	iowrite32(~0, (void __iomem *) GPIO_INT_POLARITY);
+	iowrite32(1 << MB_TO_GPIO_IRQ, (void __iomem *) GPIO_INTEN);
+}
+
+static void __init axs10x_print_board_ver(unsigned int creg, const char *str)
+{
+	union ver {
+		struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+			unsigned int pad:11, y:12, m:4, d:5;
+#else
+			unsigned int d:5, m:4, y:12, pad:11;
+#endif
+		};
+		unsigned int val;
+	} board;
+
+	board.val = ioread32((void __iomem *)creg);
+	pr_info("AXS: %s FPGA Date: %u-%u-%u\n", str, board.d, board.m,
+		board.y);
+}
+
+static void __init axs10x_early_init(void)
+{
+	int mb_rev;
+	char mb[32];
+
+	/* Determine motherboard version */
+	if (ioread32((void __iomem *) CREG_MB_CONFIG) & (1 << 28))
+		mb_rev = 3;	/* HT-3 (rev3.0) */
+	else
+		mb_rev = 2;	/* HT-2 (rev2.0) */
+
+	axs10x_enable_gpio_intc_wire();
+
+	scnprintf(mb, 32, "MainBoard v%d", mb_rev);
+	axs10x_print_board_ver(CREG_MB_VER, mb);
+}
+
+#ifdef CONFIG_AXS101
+
+#define CREG_CPU_ADDR_770	(AXC001_CREG + 0x20)
+#define CREG_CPU_ADDR_TUNN	(AXC001_CREG + 0x60)
+#define CREG_CPU_ADDR_770_UPD	(AXC001_CREG + 0x34)
+#define CREG_CPU_ADDR_TUNN_UPD	(AXC001_CREG + 0x74)
+
+#define CREG_CPU_ARC770_IRQ_MUX	(AXC001_CREG + 0x114)
+#define CREG_CPU_GPIO_UART_MUX	(AXC001_CREG + 0x120)
+
+/*
+ * Set up System Memory Map for ARC cpu / peripherals controllers
+ *
+ * Each AXI master has a 4GB memory map specified as 16 apertures of 256MB, each
+ * of which maps to a corresponding 256MB aperture in Target slave memory map.
+ *
+ * e.g. ARC cpu AXI Master's aperture 8 (0x8000_0000) is mapped to aperture 0
+ * (0x0000_0000) of DDR Port 0 (slave #1)
+ *
+ * Access from cpu to MB controllers such as GMAC is setup using AXI Tunnel:
+ * which has master/slaves on both ends.
+ * e.g. aperture 14 (0xE000_0000) of ARC cpu is mapped to aperture 14
+ * (0xE000_0000) of CPU Card AXI Tunnel slave (slave #3) which is mapped to
+ * MB AXI Tunnel Master, which also has a mem map setup
+ *
+ * In the reverse direction, MB AXI Masters (e.g. GMAC) mem map is setup
+ * to map to MB AXI Tunnel slave which connects to CPU Card AXI Tunnel Master
+ */
+struct aperture {
+	unsigned int slave_sel:4, slave_off:4, pad:24;
+};
+
+/* CPU Card target slaves */
+#define AXC001_SLV_NONE			0
+#define AXC001_SLV_DDR_PORT0		1
+#define AXC001_SLV_SRAM			2
+#define AXC001_SLV_AXI_TUNNEL		3
+#define AXC001_SLV_AXI2APB		6
+#define AXC001_SLV_DDR_PORT1		7
+
+/* MB AXI Target slaves */
+#define AXS_MB_SLV_NONE			0
+#define AXS_MB_SLV_AXI_TUNNEL_CPU	1
+#define AXS_MB_SLV_AXI_TUNNEL_HAPS	2
+#define AXS_MB_SLV_SRAM			3
+#define AXS_MB_SLV_CONTROL		4
+
+/* MB AXI masters */
+#define AXS_MB_MST_TUNNEL_CPU		0
+#define AXS_MB_MST_USB_OHCI		10
+
+/*
+ * memmap for ARC core on CPU Card
+ */
+static const struct aperture axc001_memmap[16] = {
+	{AXC001_SLV_AXI_TUNNEL,		0x0},
+	{AXC001_SLV_AXI_TUNNEL,		0x1},
+	{AXC001_SLV_SRAM,		0x0}, /* 0x2000_0000: Local SRAM */
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_DDR_PORT0,		0x0}, /* 0x8000_0000: DDR   0..256M */
+	{AXC001_SLV_DDR_PORT0,		0x1}, /* 0x9000_0000: DDR 256..512M */
+	{AXC001_SLV_DDR_PORT0,		0x2},
+	{AXC001_SLV_DDR_PORT0,		0x3},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_AXI_TUNNEL,		0xD},
+	{AXC001_SLV_AXI_TUNNEL,		0xE}, /* MB: CREG, CGU... */
+	{AXC001_SLV_AXI2APB,		0x0}, /* CPU Card local CREG, CGU... */
+};
+
+/*
+ * memmap for CPU Card AXI Tunnel Master (for access by MB controllers)
+ * GMAC (MB) -> MB AXI Tunnel slave -> CPU Card AXI Tunnel Master -> DDR
+ */
+static const struct aperture axc001_axi_tunnel_memmap[16] = {
+	{AXC001_SLV_AXI_TUNNEL,		0x0},
+	{AXC001_SLV_AXI_TUNNEL,		0x1},
+	{AXC001_SLV_SRAM,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_DDR_PORT1,		0x0},
+	{AXC001_SLV_DDR_PORT1,		0x1},
+	{AXC001_SLV_DDR_PORT1,		0x2},
+	{AXC001_SLV_DDR_PORT1,		0x3},
+	{AXC001_SLV_NONE,		0x0},
+	{AXC001_SLV_AXI_TUNNEL,		0xD},
+	{AXC001_SLV_AXI_TUNNEL,		0xE},
+	{AXC001_SLV_AXI2APB,		0x0},
+};
+
+/*
+ * memmap for MB AXI Masters
+ * Same mem map for all perip controllers as well as MB AXI Tunnel Master
+ */
+static const struct aperture axs_mb_memmap[16] = {
+	{AXS_MB_SLV_SRAM,		0x0},
+	{AXS_MB_SLV_SRAM,		0x0},
+	{AXS_MB_SLV_NONE,		0x0},
+	{AXS_MB_SLV_NONE,		0x0},
+	{AXS_MB_SLV_NONE,		0x0},
+	{AXS_MB_SLV_NONE,		0x0},
+	{AXS_MB_SLV_NONE,		0x0},
+	{AXS_MB_SLV_NONE,		0x0},
+	{AXS_MB_SLV_AXI_TUNNEL_CPU,	0x8},	/* DDR on CPU Card */
+	{AXS_MB_SLV_AXI_TUNNEL_CPU,	0x9},	/* DDR on CPU Card */
+	{AXS_MB_SLV_AXI_TUNNEL_CPU,	0xA},
+	{AXS_MB_SLV_AXI_TUNNEL_CPU,	0xB},
+	{AXS_MB_SLV_NONE,		0x0},
+	{AXS_MB_SLV_AXI_TUNNEL_HAPS,	0xD},
+	{AXS_MB_SLV_CONTROL,		0x0},	/* MB Local CREG, CGU... */
+	{AXS_MB_SLV_AXI_TUNNEL_CPU,	0xF},
+};
+
+static noinline void __init
+axs101_set_memmap(void __iomem *base, const struct aperture map[16])
+{
+	unsigned int slave_select, slave_offset;
+	int i;
+
+	slave_select = slave_offset = 0;
+	for (i = 0; i < 8; i++) {
+		slave_select |= map[i].slave_sel << (i << 2);
+		slave_offset |= map[i].slave_off << (i << 2);
+	}
+
+	iowrite32(slave_select, base + 0x0);	/* SLV0 */
+	iowrite32(slave_offset, base + 0x8);	/* OFFSET0 */
+
+	slave_select = slave_offset = 0;
+	for (i = 0; i < 8; i++) {
+		slave_select |= map[i+8].slave_sel << (i << 2);
+		slave_offset |= map[i+8].slave_off << (i << 2);
+	}
+
+	iowrite32(slave_select, base + 0x4);	/* SLV1 */
+	iowrite32(slave_offset, base + 0xC);	/* OFFSET1 */
+}
+
+static void __init axs101_early_init(void)
+{
+	int i;
+
+	/* ARC 770D memory view */
+	axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_770, axc001_memmap);
+	iowrite32(1, (void __iomem *) CREG_CPU_ADDR_770_UPD);
+
+	/* AXI tunnel memory map (incoming traffic from MB into CPU Card */
+	axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_TUNN,
+			      axc001_axi_tunnel_memmap);
+	iowrite32(1, (void __iomem *) CREG_CPU_ADDR_TUNN_UPD);
+
+	/* MB peripherals memory map */
+	for (i = AXS_MB_MST_TUNNEL_CPU; i <= AXS_MB_MST_USB_OHCI; i++)
+		axs101_set_memmap((void __iomem *) AXS_MB_CREG + (i << 4),
+				      axs_mb_memmap);
+
+	iowrite32(0x3ff, (void __iomem *) AXS_MB_CREG + 0x100); /* Update */
+
+	/* GPIO pins 18 and 19 are used as UART rx and tx, respectively. */
+	iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX);
+
+	/* Set up the MB interrupt system: mux interrupts to GPIO7) */
+	iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX);
+
+	/* reset ethernet and ULPI interfaces */
+	iowrite32(0x18, (void __iomem *) CREG_MB_SW_RESET);
+
+	/* map GPIO 14:10 to ARC 9:5 (IRQ mux change for MB v2 onwards) */
+	iowrite32(0x52, (void __iomem *) CREG_CPU_ARC770_IRQ_MUX);
+
+	axs10x_early_init();
+}
+
+#endif	/* CONFIG_AXS101 */
+
+#ifdef CONFIG_AXS103
+
+#define AXC003_CREG	0xF0001000
+#define AXC003_MST_AXI_TUNNEL	0
+#define AXC003_MST_HS38		1
+
+#define CREG_CPU_AXI_M0_IRQ_MUX	(AXC003_CREG + 0x440)
+#define CREG_CPU_GPIO_UART_MUX	(AXC003_CREG + 0x480)
+#define CREG_CPU_TUN_IO_CTRL	(AXC003_CREG + 0x494)
+
+
+static void __init axs103_early_init(void)
+{
+#ifdef CONFIG_ARC_MCIP
+	/*
+	 * AXS103 configurations for SMP/QUAD configurations share device tree
+	 * which defaults to 100 MHz. However recent failures of Quad config
+	 * revealed P&R timing violations so clamp it down to safe 50 MHz
+	 * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
+	 * of fudging the freq in DT
+	 */
+#define AXS103_QUAD_CORE_CPU_FREQ_HZ	50000000
+
+	unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
+	if (num_cores > 2) {
+		u32 freq;
+		int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
+		const struct fdt_property *prop;
+
+		prop = fdt_get_property(initial_boot_params, off,
+					"assigned-clock-rates", NULL);
+		freq = be32_to_cpu(*(u32 *)(prop->data));
+
+		/* Patching .dtb in-place with new core clock value */
+		if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) {
+			freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ);
+			fdt_setprop_inplace(initial_boot_params, off,
+					    "assigned-clock-rates", &freq, sizeof(freq));
+		}
+	}
+#endif
+
+	/* Memory maps already config in pre-bootloader */
+
+	/* set GPIO mux to UART */
+	iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX);
+
+	iowrite32((0x00100000U | 0x000C0000U | 0x00003322U),
+		  (void __iomem *) CREG_CPU_TUN_IO_CTRL);
+
+	/* Set up the AXS_MB interrupt system.*/
+	iowrite32(12, (void __iomem *) (CREG_CPU_AXI_M0_IRQ_MUX
+					 + (AXC003_MST_HS38 << 2)));
+
+	/* connect ICTL - Main Board with GPIO line */
+	iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX);
+
+	axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card");
+
+	axs10x_early_init();
+}
+#endif
+
+#ifdef CONFIG_AXS101
+
+static const char *axs101_compat[] __initconst = {
+	"snps,axs101",
+	NULL,
+};
+
+MACHINE_START(AXS101, "axs101")
+	.dt_compat	= axs101_compat,
+	.init_early	= axs101_early_init,
+MACHINE_END
+
+#endif	/* CONFIG_AXS101 */
+
+#ifdef CONFIG_AXS103
+
+static const char *axs103_compat[] __initconst = {
+	"snps,axs103",
+	NULL,
+};
+
+MACHINE_START(AXS103, "axs103")
+	.dt_compat	= axs103_compat,
+	.init_early	= axs103_early_init,
+MACHINE_END
+
+/*
+ * For the VDK OS-kit, to get the offset to pid and command fields
+ */
+char coware_swa_pid_offset[TASK_PID];
+char coware_swa_comm_offset[TASK_COMM];
+
+#endif	/* CONFIG_AXS103 */
diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig
new file mode 100644
index 0000000000..a2d10c29fb
--- /dev/null
+++ b/arch/arc/plat-hsdk/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+#
+
+menuconfig ARC_SOC_HSDK
+	bool "ARC HS Development Kit SOC"
+	depends on ISA_ARCV2
+	select ARC_HAS_ACCL_REGS
+	select ARC_IRQ_NO_AUTOSAVE
+	select ARC_FPU_SAVE_RESTORE
+	select CLK_HSDK
+	select RESET_CONTROLLER
+	select RESET_HSDK
+	select HAVE_PCI
diff --git a/arch/arc/plat-hsdk/Makefile b/arch/arc/plat-hsdk/Makefile
new file mode 100644
index 0000000000..bb2921e824
--- /dev/null
+++ b/arch/arc/plat-hsdk/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+#
+
+obj-y := platform.o
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
new file mode 100644
index 0000000000..c4a875b223
--- /dev/null
+++ b/arch/arc/plat-hsdk/platform.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARC HSDK Platform support code
+ *
+ * Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/smp.h>
+#include <asm/arcregs.h>
+#include <asm/io.h>
+#include <asm/mach_desc.h>
+
+int arc_hsdk_axi_dmac_coherent __section(".data") = 0;
+
+#define ARC_CCM_UNUSED_ADDR	0x60000000
+
+
+#define ARC_PERIPHERAL_BASE	0xf0000000
+#define CREG_BASE		(ARC_PERIPHERAL_BASE + 0x1000)
+
+#define SDIO_BASE		(ARC_PERIPHERAL_BASE + 0xA000)
+#define SDIO_UHS_REG_EXT	(SDIO_BASE + 0x108)
+#define SDIO_UHS_REG_EXT_DIV_2	(2 << 30)
+
+#define HSDK_GPIO_INTC          (ARC_PERIPHERAL_BASE + 0x3000)
+
+static void __init hsdk_enable_gpio_intc_wire(void)
+{
+	/*
+	 * Peripherals on CPU Card are wired to cpu intc via intermediate
+	 * DW APB GPIO blocks (mainly for debouncing)
+	 *
+	 *         ---------------------
+	 *        |  snps,archs-intc  |
+	 *        ---------------------
+	 *                  |
+	 *        ----------------------
+	 *        | snps,archs-idu-intc |
+	 *        ----------------------
+	 *         |   |     |   |    |
+	 *         | [eth] [USB]    [... other peripherals]
+	 *         |
+	 * -------------------
+	 * | snps,dw-apb-intc |
+	 * -------------------
+	 *  |      |   |   |
+	 * [Bt] [HAPS]   [... other peripherals]
+	 *
+	 * Current implementation of "irq-dw-apb-ictl" driver doesn't work well
+	 * with stacked INTCs. In particular problem happens if its master INTC
+	 * not yet instantiated. See discussion here -
+	 * https://lore.kernel.org/lkml/54F6FE2C.7020309@synopsys.com
+	 *
+	 * So setup the first gpio block as a passive pass thru and hide it from
+	 * DT hardware topology - connect intc directly to cpu intc
+	 * The GPIO "wire" needs to be init nevertheless (here)
+	 *
+	 * One side adv is that peripheral interrupt handling avoids one nested
+	 * intc ISR hop
+	 *
+	 * According to HSDK User's Manual [1], "Table 2 Interrupt Mapping"
+	 * we have the following GPIO input lines used as sources of interrupt:
+	 * - GPIO[0] - Bluetooth interrupt of RS9113 module
+	 * - GPIO[2] - HAPS interrupt (on HapsTrak 3 connector)
+	 * - GPIO[3] - Audio codec (MAX9880A) interrupt
+	 * - GPIO[8-23] - Available on Arduino and PMOD_x headers
+	 * For now there's no use of Arduino and PMOD_x headers in Linux
+	 * use-case so we only enable lines 0, 2 and 3.
+	 *
+	 * [1] https://github.com/foss-for-synopsys-dwc-arc-processors/ARC-Development-Systems-Forum/wiki/docs/ARC_HSDK_User_Guide.pdf
+	 */
+#define GPIO_INTEN              (HSDK_GPIO_INTC + 0x30)
+#define GPIO_INTMASK            (HSDK_GPIO_INTC + 0x34)
+#define GPIO_INTTYPE_LEVEL      (HSDK_GPIO_INTC + 0x38)
+#define GPIO_INT_POLARITY       (HSDK_GPIO_INTC + 0x3c)
+#define GPIO_INT_CONNECTED_MASK	0x0d
+
+	iowrite32(0xffffffff, (void __iomem *) GPIO_INTMASK);
+	iowrite32(~GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTMASK);
+	iowrite32(0x00000000, (void __iomem *) GPIO_INTTYPE_LEVEL);
+	iowrite32(0xffffffff, (void __iomem *) GPIO_INT_POLARITY);
+	iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN);
+}
+
+static int __init hsdk_tweak_node_coherency(const char *path, bool coherent)
+{
+	void *fdt = initial_boot_params;
+	const void *prop;
+	int node, ret;
+	bool dt_coh_set;
+
+	node = fdt_path_offset(fdt, path);
+	if (node < 0)
+		goto tweak_fail;
+
+	prop = fdt_getprop(fdt, node, "dma-coherent", &ret);
+	if (!prop && ret != -FDT_ERR_NOTFOUND)
+		goto tweak_fail;
+
+	dt_coh_set = ret != -FDT_ERR_NOTFOUND;
+	ret = 0;
+
+	/* need to remove "dma-coherent" property */
+	if (dt_coh_set && !coherent)
+		ret = fdt_delprop(fdt, node, "dma-coherent");
+
+	/* need to set "dma-coherent" property */
+	if (!dt_coh_set && coherent)
+		ret = fdt_setprop(fdt, node, "dma-coherent", NULL, 0);
+
+	if (ret < 0)
+		goto tweak_fail;
+
+	return 0;
+
+tweak_fail:
+	pr_err("failed to tweak %s to %scoherent\n", path, coherent ? "" : "non");
+	return -EFAULT;
+}
+
+enum hsdk_axi_masters {
+	M_HS_CORE = 0,
+	M_HS_RTT,
+	M_AXI_TUN,
+	M_HDMI_VIDEO,
+	M_HDMI_AUDIO,
+	M_USB_HOST,
+	M_ETHERNET,
+	M_SDIO,
+	M_GPU,
+	M_DMAC_0,
+	M_DMAC_1,
+	M_DVFS
+};
+
+#define UPDATE_VAL	1
+
+/*
+ * This is modified configuration of AXI bridge. Default settings
+ * are specified in "Table 111 CREG Address Decoder register reset values".
+ *
+ * AXI_M_m_SLV{0|1} - Slave Select register for master 'm'.
+ * Possible slaves are:
+ *  - 0  => no slave selected
+ *  - 1  => DDR controller port #1
+ *  - 2  => SRAM controller
+ *  - 3  => AXI tunnel
+ *  - 4  => EBI controller
+ *  - 5  => ROM controller
+ *  - 6  => AXI2APB bridge
+ *  - 7  => DDR controller port #2
+ *  - 8  => DDR controller port #3
+ *  - 9  => HS38x4 IOC
+ *  - 10 => HS38x4 DMI
+ * AXI_M_m_OFFSET{0|1} - Addr Offset register for master 'm'
+ *
+ * Please read ARC HS Development IC Specification, section 17.2 for more
+ * information about apertures configuration.
+ *
+ * m	master		AXI_M_m_SLV0	AXI_M_m_SLV1	AXI_M_m_OFFSET0	AXI_M_m_OFFSET1
+ * 0	HS (CBU)	0x11111111	0x63111111	0xFEDCBA98	0x0E543210
+ * 1	HS (RTT)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
+ * 2	AXI Tunnel	0x88888888	0x88888888	0xFEDCBA98	0x76543210
+ * 3	HDMI-VIDEO	0x77777777	0x77777777	0xFEDCBA98	0x76543210
+ * 4	HDMI-ADUIO	0x77777777	0x77777777	0xFEDCBA98	0x76543210
+ * 5	USB-HOST	0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
+ * 6	ETHERNET	0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
+ * 7	SDIO		0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
+ * 8	GPU		0x77777777	0x77777777	0xFEDCBA98	0x76543210
+ * 9	DMAC (port #1)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
+ * 10	DMAC (port #2)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
+ * 11	DVFS		0x00000000	0x60000000	0x00000000	0x00000000
+ */
+
+#define CREG_AXI_M_SLV0(m)  ((void __iomem *)(CREG_BASE + 0x20 * (m)))
+#define CREG_AXI_M_SLV1(m)  ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x04))
+#define CREG_AXI_M_OFT0(m)  ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x08))
+#define CREG_AXI_M_OFT1(m)  ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x0C))
+#define CREG_AXI_M_UPDT(m)  ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x14))
+
+#define CREG_AXI_M_HS_CORE_BOOT	((void __iomem *)(CREG_BASE + 0x010))
+
+#define CREG_PAE		((void __iomem *)(CREG_BASE + 0x180))
+#define CREG_PAE_UPDT		((void __iomem *)(CREG_BASE + 0x194))
+
+static void __init hsdk_init_memory_bridge_axi_dmac(void)
+{
+	bool coherent = !!arc_hsdk_axi_dmac_coherent;
+	u32 axi_m_slv1, axi_m_oft1;
+
+	/*
+	 * Don't tweak memory bridge configuration if we failed to tweak DTB
+	 * as we will end up in a inconsistent state.
+	 */
+	if (hsdk_tweak_node_coherency("/soc/dmac@80000", coherent))
+		return;
+
+	if (coherent) {
+		axi_m_slv1 = 0x77999999;
+		axi_m_oft1 = 0x76DCBA98;
+	} else {
+		axi_m_slv1 = 0x77777777;
+		axi_m_oft1 = 0x76543210;
+	}
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0));
+	writel(axi_m_slv1, CREG_AXI_M_SLV1(M_DMAC_0));
+	writel(axi_m_oft1, CREG_AXI_M_OFT1(M_DMAC_0));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0));
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1));
+	writel(axi_m_slv1, CREG_AXI_M_SLV1(M_DMAC_1));
+	writel(axi_m_oft1, CREG_AXI_M_OFT1(M_DMAC_1));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1));
+}
+
+static void __init hsdk_init_memory_bridge(void)
+{
+	u32 reg;
+
+	/*
+	 * M_HS_CORE has one unique register - BOOT.
+	 * We need to clean boot mirror (BOOT[1:0]) bits in them to avoid first
+	 * aperture to be masked by 'boot mirror'.
+	 */
+	reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3);
+	writel(reg, CREG_AXI_M_HS_CORE_BOOT);
+	writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE));
+	writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE));
+	writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE));
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT));
+	writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT));
+	writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT));
+
+	writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN));
+	writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN));
+	writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN));
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO));
+	writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO));
+	writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO));
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO));
+	writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO));
+	writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO));
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST));
+	writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST));
+	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST));
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET));
+	writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET));
+	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET));
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO));
+	writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO));
+	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO));
+
+	writel(0x77777777, CREG_AXI_M_SLV0(M_GPU));
+	writel(0x77777777, CREG_AXI_M_SLV1(M_GPU));
+	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU));
+	writel(0x76543210, CREG_AXI_M_OFT1(M_GPU));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU));
+
+	writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS));
+	writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS));
+	writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS));
+	writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS));
+	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS));
+
+	hsdk_init_memory_bridge_axi_dmac();
+
+	/*
+	 * PAE remapping for DMA clients does not work due to an RTL bug, so
+	 * CREG_PAE register must be programmed to all zeroes, otherwise it
+	 * will cause problems with DMA to/from peripherals even if PAE40 is
+	 * not used.
+	 */
+	writel(0x00000000, CREG_PAE);
+	writel(UPDATE_VAL, CREG_PAE_UPDT);
+}
+
+static void __init hsdk_init_early(void)
+{
+	hsdk_init_memory_bridge();
+
+	/*
+	 * Switch SDIO external ciu clock divider from default div-by-8 to
+	 * minimum possible div-by-2.
+	 */
+	iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT);
+
+	hsdk_enable_gpio_intc_wire();
+}
+
+static const char *hsdk_compat[] __initconst = {
+	"snps,hsdk",
+	NULL,
+};
+
+MACHINE_START(SIMULATION, "hsdk")
+	.dt_compat	= hsdk_compat,
+	.init_early     = hsdk_init_early,
+MACHINE_END
diff --git a/arch/arc/plat-sim/Makefile b/arch/arc/plat-sim/Makefile
new file mode 100644
index 0000000000..ea9389bf8b
--- /dev/null
+++ b/arch/arc/plat-sim/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+
+obj-y := platform.o
diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c
new file mode 100644
index 0000000000..2bde2a6e33
--- /dev/null
+++ b/arch/arc/plat-sim/platform.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ARC simulation Platform support code
+ *
+ * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+#include <linux/init.h>
+#include <asm/mach_desc.h>
+
+/*----------------------- Machine Descriptions ------------------------------
+ *
+ * Machine description is simply a set of platform/board specific callbacks
+ * This is not directly related to DeviceTree based dynamic device creation,
+ * however as part of early device tree scan, we also select the right
+ * callback set, by matching the DT compatible name.
+ */
+
+static const char *simulation_compat[] __initconst = {
+#ifdef CONFIG_ISA_ARCOMPACT
+	"snps,nsim",
+	"snps,nsimosci",
+#else
+	"snps,nsimosci_hs",
+	"snps,zebu_hs",
+#endif
+	NULL,
+};
+
+MACHINE_START(SIMULATION, "simulation")
+	.dt_compat	= simulation_compat,
+MACHINE_END
diff --git a/arch/arc/plat-tb10x/Kconfig b/arch/arc/plat-tb10x/Kconfig
new file mode 100644
index 0000000000..158d53aad4
--- /dev/null
+++ b/arch/arc/plat-tb10x/Kconfig
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Abilis Systems TB10x platform kernel configuration file
+#
+# Author: Christian Ruppert <christian.ruppert@abilis.com>
+#
+
+
+menuconfig ARC_PLAT_TB10X
+	bool "Abilis TB10x"
+	select PINCTRL
+	select PINCTRL_TB10X
+	select PINMUX
+	select GPIOLIB
+	select GPIO_TB10X
+	select TB10X_IRQC
+	help
+	  Support for platforms based on the TB10x home media gateway SOC by
+	  Abilis Systems. TB10x is based on the ARC700 CPU architecture.
+	  Say Y if you are building a kernel for one of the SOCs in this
+	  series (e.g. TB100 or TB101). If in doubt say N.
diff --git a/arch/arc/plat-tb10x/Makefile b/arch/arc/plat-tb10x/Makefile
new file mode 100644
index 0000000000..8e97d454e7
--- /dev/null
+++ b/arch/arc/plat-tb10x/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Abilis Systems TB10x platform Makefile
+#
+# Author: Christian Ruppert <christian.ruppert@abilis.com>
+#
+
+
+KBUILD_CFLAGS   += -Iarch/arc/plat-tb10x/include
+
+obj-y += tb10x.o
diff --git a/arch/arc/plat-tb10x/tb10x.c b/arch/arc/plat-tb10x/tb10x.c
new file mode 100644
index 0000000000..11d23420f9
--- /dev/null
+++ b/arch/arc/plat-tb10x/tb10x.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Abilis Systems TB10x platform initialisation
+ *
+ * Copyright (C) Abilis Systems 2012
+ *
+ * Author: Christian Ruppert <christian.ruppert@abilis.com>
+ */
+
+#include <linux/init.h>
+#include <asm/mach_desc.h>
+
+static const char *tb10x_compat[] __initdata = {
+	"abilis,arc-tb10x",
+	NULL,
+};
+
+MACHINE_START(TB10x, "tb10x")
+	.dt_compat	= tb10x_compat,
+MACHINE_END
diff --git a/arch/arm/ARMnn/Android.bp b/arch/arm/ARMnn/Android.bp
new file mode 100644
index 0000000000..8414eb5160
--- /dev/null
+++ b/arch/arm/ARMnn/Android.bp
@@ -0,0 +1,8 @@
+//
+// Copyright © 2017 ARM Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+// Dummy Android.bp file to keep backward compatibility with previous
+// build systems/environments.
+
diff --git a/arch/arm/ARMnn/Android.mk b/arch/arm/ARMnn/Android.mk
new file mode 100644
index 0000000000..b7aec59119
--- /dev/null
+++ b/arch/arm/ARMnn/Android.mk
@@ -0,0 +1,512 @@
+#
+# Copyright © 2017 ARM Ltd. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+LOCAL_PATH := $(call my-dir)
+
+# Configure these paths if you move the source or Khronos headers
+OPENCL_HEADER_PATH := $(LOCAL_PATH)/../clframework/include
+NN_HEADER_PATH := $(LOCAL_PATH)/../../../../frameworks/ml/nn/runtime/include
+ARMNN_HEADER_PATH := $(LOCAL_PATH)/include
+ARMNN_PROFILING_INCLUDE_PATH := $(LOCAL_PATH)/profiling
+ARMNN_PROFILING_COMMON_INCLUDE_PATH := $(LOCAL_PATH)/profiling/common/include
+ARMNN_TIMELINE_DECODER_INCLUDE_PATH := $(LOCAL_PATH)/src/timelineDecoder
+ARMNN_THIRD_PARTY_INCLUDE_PATH := $(LOCAL_PATH)/third-party
+ARMNN_MAIN_HEADER_PATH := $(LOCAL_PATH)/src
+ARMNN_SOURCE_HEADER_PATH := $(LOCAL_PATH)/src/armnn
+ARMNN_SOURCE_UTILS_HEADER_PATH := $(LOCAL_PATH)/src/armnnUtils
+ARMNN_TEST_UTILS_SOURCE_PATH := $(LOCAL_PATH)/src/armnnTestUtils
+ARMNN_BACKENDS_HEADER_PATH := $(LOCAL_PATH)/src/backends
+ARMNN_PROFILING_HEADER_PATH := $(LOCAL_PATH)/src/profiling
+ARMNN_SERIALIZER_HEADER_PATH := $(LOCAL_PATH)/src/armnnSerializer
+ARMNN_DESERIALIZER_HEADER_PATH := $(LOCAL_PATH)/src/armnnDeserializer
+
+# find the common.mk and backend.mk files in the backend source folders
+ARMNN_BACKEND_COMMON_MAKEFILE_LOCAL_PATHS := $(wildcard $(LOCAL_PATH)/src/backends/*/common.mk)
+ARMNN_BACKEND_COMMON_MAKEFILE_PATHS := $(subst $(LOCAL_PATH),,$(ARMNN_BACKEND_COMMON_MAKEFILE_LOCAL_PATHS))
+ARMNN_BACKEND_COMMON_MAKEFILE_DIRS := $(subst /common.mk,,$(ARMNN_BACKEND_COMMON_MAKEFILE_PATHS))
+
+ARMNN_BACKEND_MAKEFILE_LOCAL_PATHS := $(wildcard $(LOCAL_PATH)/src/backends/*/backend.mk)
+ARMNN_BACKEND_MAKEFILE_PATHS := $(subst $(LOCAL_PATH),,$(ARMNN_BACKEND_MAKEFILE_LOCAL_PATHS))
+ARMNN_BACKEND_MAKEFILE_DIRS := $(subst /backend.mk,,$(ARMNN_BACKEND_MAKEFILE_PATHS))
+
+##############
+# libarmnn.a #
+##############
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libarmnn
+ifeq ($(Q_OR_LATER),1)
+# "eng" is deprecated in Android Q
+LOCAL_MODULE_TAGS := optional
+else
+LOCAL_MODULE_TAGS := eng optional
+endif
+LOCAL_ARM_MODE := arm
+LOCAL_PROPRIETARY_MODULE := true
+
+# Placeholder to hold all backend source files and include paths, common and specific to the backends
+ARMNN_BACKEND_SOURCES :=
+ARMNN_BACKEND_INCLUDES :=
+
+#
+# Iterate through the backend common and specific include paths, include them into the
+# current makefile and append the sources held by the COMMON_SOURCES and BACKEND_SOURCES variable
+# (included from the given makefile) to the ARMNN_BACKEND_SOURCES list
+# and optional include paths set by BACKEND_INCLUDES to the ARMNN_BACKEND_INCLUDES list
+#
+$(foreach mkPath,$(ARMNN_BACKEND_COMMON_MAKEFILE_DIRS),\
+        $(eval include $(LOCAL_PATH)/$(mkPath)/common.mk)\
+        $(eval ARMNN_BACKEND_SOURCES := $(ARMNN_BACKEND_SOURCES)\
+        $(patsubst %,$(mkPath)/%,$(COMMON_SOURCES))))
+
+$(foreach mkPath,$(ARMNN_BACKEND_MAKEFILE_DIRS),\
+        $(eval include $(LOCAL_PATH)/$(mkPath)/backend.mk)\
+        $(eval ARMNN_BACKEND_SOURCES := $(ARMNN_BACKEND_SOURCES)\
+        $(patsubst %,$(mkPath)/%,$(BACKEND_SOURCES))))
+
+$(foreach mkPath,$(ARMNN_BACKEND_MAKEFILE_DIRS),\
+        $(eval include $(LOCAL_PATH)/$(mkPath)/backend.mk)\
+        $(eval ARMNN_BACKEND_INCLUDES += $(BACKEND_INCLUDES)))
+
+# Mark source files as dependent on Android.mk and backend makefiles
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk \
+                                 $(ARMNN_BACKEND_MAKEFILE_LOCAL_PATHS)
+
+LOCAL_EXPORT_C_INCLUDES := \
+        $(ARMNN_MAIN_HEADER_PATH) \
+        $(ARMNN_SOURCE_HEADER_PATH) \
+        $(ARMNN_PROFILING_INCLUDE_PATH) \
+        $(ARMNN_PROFILING_COMMON_INCLUDE_PATH) \
+        $(ARMNN_TIMELINE_DECODER_INCLUDE_PATH) \
+        $(ARMNN_THIRD_PARTY_INCLUDE_PATH) \
+        $(ARMNN_SOURCE_UTILS_HEADER_PATH) \
+        $(ARMNN_PROFILING_HEADER_PATH) \
+        $(ARMNN_BACKENDS_HEADER_PATH) \
+        $(ARMNN_SERIALIZER_HEADER_PATH) \
+        $(ARMNN_DESERIALIZER_HEADER_PATH)
+
+LOCAL_C_INCLUDES := \
+        $(OPENCL_HEADER_PATH) \
+        $(NN_HEADER_PATH) \
+        $(ARMNN_HEADER_PATH) \
+        $(ARMNN_PROFILING_INCLUDE_PATH) \
+        $(ARMNN_PROFILING_COMMON_INCLUDE_PATH) \
+        $(ARMNN_TIMELINE_DECODER_INCLUDE_PATH) \
+        $(ARMNN_THIRD_PARTY_INCLUDE_PATH) \
+        $(ARMNN_MAIN_HEADER_PATH) \
+        $(ARMNN_SOURCE_HEADER_PATH) \
+        $(ARMNN_SOURCE_UTILS_HEADER_PATH) \
+        $(ARMNN_PROFILING_HEADER_PATH) \
+        $(ARMNN_BACKENDS_HEADER_PATH) \
+        $(ARMNN_SERIALIZER_HEADER_PATH) \
+        $(ARMNN_DESERIALIZER_HEADER_PATH) \
+        $(ARMNN_BACKEND_INCLUDES)
+
+LOCAL_SRC_FILES := \
+        $(ARMNN_BACKEND_SOURCES) \
+        profiling/common/src/CommandHandlerFunctor.cpp \
+        profiling/common/src/CommandHandlerKey.cpp \
+        profiling/common/src/CommandHandlerRegistry.cpp \
+        profiling/common/src/CommonProfilingUtils.cpp \
+        profiling/common/src/NetworkSockets.cpp \
+        profiling/common/src/PacketVersionResolver.cpp \
+        profiling/common/src/SwTrace.cpp \
+        profiling/common/src/LabelsAndEventClasses.cpp \
+        profiling/server/src/timelineDecoder/TimelineCaptureCommandHandler.cpp \
+        profiling/server/src/timelineDecoder/TimelineDecoder.cpp \
+        profiling/server/src/timelineDecoder/TimelineDirectoryCaptureCommandHandler.cpp \
+        src/armnn/BackendHelper.cpp \
+        src/armnn/BackendRegistry.cpp \
+        src/armnn/Descriptors.cpp \
+        src/armnn/Exceptions.cpp \
+        src/armnn/Graph.cpp \
+        src/armnn/ILayerSupport.cpp \
+        src/armnn/InternalTypes.cpp \
+        src/armnn/JsonPrinter.cpp \
+        src/armnn/Layer.cpp \
+        src/armnn/LoadedNetwork.cpp \
+        src/armnn/Logging.cpp \
+        src/armnn/Network.cpp \
+        src/armnn/NetworkUtils.cpp \
+        src/armnn/Observable.cpp \
+        src/armnn/Optimizer.cpp \
+        src/armnn/OutputHandler.cpp \
+        src/armnn/ProfilingEvent.cpp \
+        src/armnn/Profiling.cpp \
+        src/armnn/Runtime.cpp \
+        src/armnn/SerializeLayerParameters.cpp \
+        src/armnn/SubgraphView.cpp \
+        src/armnn/SubgraphViewSelector.cpp \
+        src/armnn/Tensor.cpp \
+        src/armnn/Threadpool.cpp \
+        src/armnn/TypesUtils.cpp \
+        src/armnn/Utils.cpp \
+        src/armnn/WallClockTimer.cpp \
+        src/armnn/WorkingMemHandle.cpp \
+        src/armnnUtils/CompatibleTypes.cpp \
+        src/armnnUtils/DataLayoutIndexed.cpp \
+        src/armnnUtils/DotSerializer.cpp \
+        src/armnnUtils/FloatingPointConverter.cpp \
+        src/armnnUtils/HeapProfiling.cpp \
+        src/armnnUtils/LeakChecking.cpp \
+        src/armnnUtils/ParserHelper.cpp \
+        src/armnnUtils/Permute.cpp \
+        src/armnnUtils/TensorUtils.cpp \
+        src/armnnUtils/VerificationHelpers.cpp \
+        src/armnnUtils/Filesystem.cpp \
+        src/armnnUtils/Processes.cpp \
+        src/armnnUtils/Threads.cpp \
+        src/armnnUtils/Transpose.cpp \
+        src/armnn/layers/ActivationLayer.cpp \
+        src/armnn/layers/AdditionLayer.cpp \
+        src/armnn/layers/ArgMinMaxLayer.cpp \
+        src/armnn/layers/BatchNormalizationLayer.cpp \
+        src/armnn/layers/BatchToSpaceNdLayer.cpp \
+        src/armnn/layers/CastLayer.cpp \
+        src/armnn/layers/ChannelShuffleLayer.cpp \
+        src/armnn/layers/ComparisonLayer.cpp \
+        src/armnn/layers/ConcatLayer.cpp \
+        src/armnn/layers/ConstantLayer.cpp \
+        src/armnn/layers/Convolution2dLayer.cpp \
+        src/armnn/layers/Convolution3dLayer.cpp \
+        src/armnn/layers/ConvertBf16ToFp32Layer.cpp \
+        src/armnn/layers/ConvertFp16ToFp32Layer.cpp \
+        src/armnn/layers/ConvertFp32ToBf16Layer.cpp \
+        src/armnn/layers/ConvertFp32ToFp16Layer.cpp \
+        src/armnn/layers/DebugLayer.cpp \
+        src/armnn/layers/DepthToSpaceLayer.cpp \
+        src/armnn/layers/DepthwiseConvolution2dLayer.cpp \
+        src/armnn/layers/DequantizeLayer.cpp \
+        src/armnn/layers/DetectionPostProcessLayer.cpp \
+        src/armnn/layers/DivisionLayer.cpp \
+        src/armnn/layers/ElementwiseBaseLayer.cpp \
+        src/armnn/layers/ElementwiseUnaryLayer.cpp \
+        src/armnn/layers/FakeQuantizationLayer.cpp \
+        src/armnn/layers/FillLayer.cpp \
+        src/armnn/layers/FloorLayer.cpp \
+        src/armnn/layers/FullyConnectedLayer.cpp \
+        src/armnn/layers/GatherLayer.cpp \
+        src/armnn/layers/InputLayer.cpp \
+        src/armnn/layers/InstanceNormalizationLayer.cpp \
+        src/armnn/layers/L2NormalizationLayer.cpp \
+        src/armnn/layers/LogicalBinaryLayer.cpp \
+        src/armnn/layers/LogSoftmaxLayer.cpp \
+        src/armnn/layers/LstmLayer.cpp \
+        src/armnn/layers/MapLayer.cpp \
+        src/armnn/layers/MaximumLayer.cpp \
+        src/armnn/layers/MeanLayer.cpp \
+        src/armnn/layers/MemCopyLayer.cpp \
+        src/armnn/layers/MemImportLayer.cpp \
+        src/armnn/layers/MergeLayer.cpp \
+        src/armnn/layers/MinimumLayer.cpp \
+        src/armnn/layers/MultiplicationLayer.cpp \
+        src/armnn/layers/NormalizationLayer.cpp \
+        src/armnn/layers/OutputLayer.cpp \
+        src/armnn/layers/PadLayer.cpp \
+        src/armnn/layers/PermuteLayer.cpp \
+        src/armnn/layers/Pooling2dLayer.cpp \
+        src/armnn/layers/Pooling3dLayer.cpp \
+        src/armnn/layers/PreCompiledLayer.cpp \
+        src/armnn/layers/PreluLayer.cpp \
+        src/armnn/layers/QLstmLayer.cpp \
+        src/armnn/layers/QuantizeLayer.cpp \
+        src/armnn/layers/QuantizedLstmLayer.cpp \
+        src/armnn/layers/RankLayer.cpp \
+        src/armnn/layers/ReduceLayer.cpp \
+        src/armnn/layers/ReshapeLayer.cpp \
+        src/armnn/layers/ResizeLayer.cpp \
+        src/armnn/layers/ShapeLayer.cpp \
+        src/armnn/layers/SliceLayer.cpp \
+        src/armnn/layers/SoftmaxLayer.cpp \
+        src/armnn/layers/SpaceToBatchNdLayer.cpp \
+        src/armnn/layers/SpaceToDepthLayer.cpp \
+        src/armnn/layers/SplitterLayer.cpp \
+        src/armnn/layers/StackLayer.cpp \
+        src/armnn/layers/StandInLayer.cpp \
+        src/armnn/layers/StridedSliceLayer.cpp \
+        src/armnn/layers/SubtractionLayer.cpp \
+        src/armnn/layers/SwitchLayer.cpp \
+        src/armnn/layers/TransposeConvolution2dLayer.cpp \
+        src/armnn/layers/TransposeLayer.cpp \
+        src/armnn/layers/UnidirectionalSequenceLstmLayer.cpp \
+        src/armnn/layers/UnmapLayer.cpp \
+        src/profiling/ActivateTimelineReportingCommandHandler.cpp \
+        src/profiling/BufferManager.cpp \
+        src/profiling/CommandHandler.cpp \
+        src/profiling/ConnectionAcknowledgedCommandHandler.cpp \
+        src/profiling/CounterDirectory.cpp \
+        src/profiling/CounterIdMap.cpp \
+        src/profiling/DeactivateTimelineReportingCommandHandler.cpp \
+        src/profiling/DirectoryCaptureCommandHandler.cpp \
+        src/profiling/FileOnlyProfilingConnection.cpp \
+        src/profiling/Holder.cpp \
+        src/profiling/PacketBuffer.cpp \
+        src/profiling/PeriodicCounterCapture.cpp \
+        src/profiling/PeriodicCounterSelectionCommandHandler.cpp \
+        src/profiling/PerJobCounterSelectionCommandHandler.cpp \
+        src/profiling/ProfilingConnectionDumpToFileDecorator.cpp \
+        src/profiling/ProfilingConnectionFactory.cpp \
+        src/profiling/ProfilingService.cpp \
+        src/profiling/ProfilingStateMachine.cpp \
+        src/profiling/ProfilingUtils.cpp \
+        src/profiling/RegisterBackendCounters.cpp \
+        src/profiling/RequestCounterDirectoryCommandHandler.cpp \
+        src/profiling/SendCounterPacket.cpp \
+        src/profiling/SendThread.cpp \
+        src/profiling/SendTimelinePacket.cpp \
+        src/profiling/SocketProfilingConnection.cpp \
+        src/profiling/TimelinePacketWriterFactory.cpp \
+        src/profiling/TimelineUtilityMethods.cpp \
+        src/profiling/backends/BackendProfiling.cpp \
+        src/armnnSerializer/Serializer.cpp \
+        src/armnnSerializer/SerializerUtils.cpp \
+        src/armnnDeserializer/Deserializer.cpp
+
+LOCAL_STATIC_LIBRARIES := \
+        libflatbuffers-framework \
+        arm_compute_library 
+
+LOCAL_SHARED_LIBRARIES := \
+        liblog
+
+LOCAL_CFLAGS := \
+        -std=$(CPP_VERSION) \
+        -fexceptions \
+        -Wno-unused-parameter \
+        -frtti \
+        -DFMT_HEADER_ONLY
+
+# The variable to enable/disable the CL backend (ARMNN_COMPUTE_CL_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1)
+LOCAL_CFLAGS += \
+        -DARMCOMPUTECL_ENABLED
+endif # ARMNN_COMPUTE_CL_ENABLED == 1
+# The variable to enable/disable the NEON backend (ARMNN_COMPUTE_NEON_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_NEON_ENABLED),1)
+LOCAL_CFLAGS += \
+        -DARMCOMPUTENEON_ENABLED
+endif # ARMNN_COMPUTE_NEON_ENABLED == 1
+# The variable to enable/disable the REFERENCE backend (ARMNN_REF_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_REF_ENABLED),1)
+LOCAL_CFLAGS += \
+        -DARMNNREF_ENABLED
+endif # ARMNN_REF_ENABLED == 1
+
+ifeq ($(Q_OR_LATER),1)
+LOCAL_CFLAGS += \
+        -DARMNN_MIXED_PRECISION_FP16_POOLING
+endif # PLATFORM_VERSION == Q or later
+
+include $(BUILD_STATIC_LIBRARY)
+
+###############
+# armnn-tests #
+###############
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := armnn-tests
+ifeq ($(Q_OR_LATER),1)
+# "eng" is deprecated in Android Q
+LOCAL_MODULE_TAGS := optional
+else
+LOCAL_MODULE_TAGS := eng optional
+endif
+LOCAL_ARM_MODE := arm
+LOCAL_PROPRIETARY_MODULE := true
+
+# placeholder to hold all backend unit test source files
+ARMNN_BACKEND_TEST_SOURCES :=
+ARMNN_BACKEND_TEST_INCLUDES :=
+
+#
+# iterate through the backend common and specific include paths, include them into the current
+# makefile and append the sources held by the COMMON_TEST_SOURCES and BACKEND_TEST_SOURCES
+# (included from the given makefile) to the ARMNN_BACKEND_TEST_SOURCES list
+#
+$(foreach mkPath,$(ARMNN_BACKEND_COMMON_MAKEFILE_DIRS), \
+        $(eval include $(LOCAL_PATH)/$(mkPath)/common.mk) \
+        $(eval ARMNN_BACKEND_TEST_SOURCES := $(ARMNN_BACKEND_TEST_SOURCES) \
+        $(patsubst %,$(mkPath)/%,$(COMMON_TEST_SOURCES))))
+
+$(foreach mkPath,$(ARMNN_BACKEND_MAKEFILE_DIRS), \
+        $(eval include $(LOCAL_PATH)/$(mkPath)/backend.mk) \
+        $(eval ARMNN_BACKEND_TEST_SOURCES := $(ARMNN_BACKEND_TEST_SOURCES) \
+        $(patsubst %,$(mkPath)/%,$(BACKEND_TEST_SOURCES))))
+
+$(foreach mkPath,$(ARMNN_BACKEND_MAKEFILE_DIRS),\
+        $(eval include $(LOCAL_PATH)/$(mkPath)/backend.mk)\
+        $(eval ARMNN_BACKEND_TEST_INCLUDES += $(BACKEND_TEST_INCLUDES)))
+
+# Placeholder to hold all backend link files.
+ARMNN_BACKEND_TEST_STATIC_LIBRARIES :=
+ARMNN_BACKEND_TEST_SHARED_LIBRARIES :=
+
+# Iterate through the Arm NN backends and specific include paths, include them into the
+# current makefile and append the linkfiles held by
+# the optional BACKEND_STATIC_LIBRARIES and optional BACKEND_SHARED_LIBRARIES variable
+# (included from the given makefile) to
+# the ARMNN_BACKEND_STATIC_LIBRARIES and ARMNN_BACKEND_SHARED_LIBRARIES lists
+
+$(foreach mkPath,$(ARMNN_BACKEND_MAKEFILE_DIRS),\
+        $(eval include $(LOCAL_PATH)/$(mkPath)/backend.mk)\
+        $(eval ARMNN_BACKEND_TEST_STATIC_LIBRARIES += $(BACKEND_TEST_STATIC_LIBRARIES)))
+
+$(foreach mkPath,$(ARMNN_BACKEND_MAKEFILE_DIRS),\
+        $(eval include $(LOCAL_PATH)/$(mkPath)/backend.mk)\
+        $(eval ARMNN_BACKEND_TEST_SHARED_LIBRARIES += $(BACKEND_TEST_SHARED_LIBRARIES)))
+
+
+# Mark source files as dependent on Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+
+LOCAL_C_INCLUDES := \
+        $(OPENCL_HEADER_PATH) \
+        $(NN_HEADER_PATH) \
+        $(ARMNN_HEADER_PATH) \
+        $(ARMNN_PROFILING_INCLUDE_PATH) \
+        $(ARMNN_PROFILING_COMMON_INCLUDE_PATH) \
+        $(ARMNN_TIMELINE_DECODER_INCLUDE_PATH) \
+        $(ARMNN_THIRD_PARTY_INCLUDE_PATH) \
+        $(ARMNN_MAIN_HEADER_PATH) \
+        $(ARMNN_SOURCE_HEADER_PATH) \
+        $(ARMNN_SOURCE_UTILS_HEADER_PATH) \
+        $(ARMNN_TEST_UTILS_SOURCE_PATH) \
+        $(ARMNN_PROFILING_HEADER_PATH) \
+        $(ARMNN_BACKENDS_HEADER_PATH) \
+        $(ARMNN_SERIALIZER_HEADER_PATH) \
+        $(ARMNN_DESERIALIZER_HEADER_PATH) \
+        $(ARMNN_BACKEND_INCLUDES)
+
+
+LOCAL_CFLAGS := \
+        -std=$(CPP_VERSION) \
+        -fexceptions \
+        -frtti \
+
+# The variable to enable/disable the CL backend (ARMNN_COMPUTE_CL_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_CL_ENABLED),1)
+LOCAL_CFLAGS += \
+        -DARMCOMPUTECL_ENABLED
+endif # ARMNN_COMPUTE_CL_ENABLED == 1
+# The variable to enable/disable the NEON backend (ARMNN_COMPUTE_NEON_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_COMPUTE_NEON_ENABLED),1)
+LOCAL_CFLAGS += \
+        -DARMCOMPUTENEON_ENABLED
+endif # ARMNN_COMPUTE_NEON_ENABLED == 1
+# The variable to enable/disable the REFERENCE backend (ARMNN_REF_ENABLED) is declared in android-nn-driver/Android.mk
+ifeq ($(ARMNN_REF_ENABLED),1)
+LOCAL_CFLAGS += \
+        -DARMNNREF_ENABLED
+endif # ARMNN_REF_ENABLED == 1
+
+LOCAL_SRC_FILES := \
+        $(ARMNN_BACKEND_TEST_SOURCES) \
+        src/armnn/test/ConstTensorLayerVisitor.cpp \
+        src/armnn/test/EndToEndTest.cpp \
+        src/armnn/ExecutionFrame.cpp \
+        src/armnn/test/ExecutionFrameTest.cpp \
+        src/armnn/test/FloatingPointConverterTest.cpp \
+        src/armnn/test/FlowControl.cpp \
+        src/armnn/test/GraphTests.cpp \
+        src/armnn/test/InferOutputTests.cpp \
+        src/armnn/test/InstrumentTests.cpp \
+        src/armnnUtils/ModelAccuracyChecker.cpp \
+        src/armnn/test/ModelAccuracyCheckerTest.cpp \
+        src/armnn/test/NetworkTests.cpp \
+        src/armnn/test/ObservableTest.cpp \
+        src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp \
+        src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp \
+        src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp \
+        src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp \
+        src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp \
+        src/armnn/test/optimizations/FuseActivationTests.cpp \
+        src/armnn/test/optimizations/InsertDebugLayerTests.cpp \
+        src/armnn/test/optimizations/MovePermuteUpTests.cpp \
+        src/armnn/test/optimizations/OptimizeConsecutiveReshapesTests.cpp \
+        src/armnn/test/optimizations/OptimizeInverseConversionsTests.cpp \
+        src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp \
+        src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp \
+        src/armnn/test/optimizations/PermuteAsReshapeTests.cpp \
+        src/armnn/test/optimizations/RedirectMembersToConstantInputsTests.cpp \
+        src/armnn/test/optimizations/ReduceMultipleAxesTests.cpp \
+        src/armnn/test/optimizations/SquashEqualSiblingsTests.cpp \
+        src/armnn/test/optimizations/TransposeAsReshapeTests.cpp \
+        src/armnn/test/OptimizerTests.cpp \
+        src/armnn/test/OptionalTest.cpp \
+        src/armnn/test/ProfilerTests.cpp \
+        src/armnn/test/ProfilingEventTest.cpp \
+        src/armnnUtils/PrototxtConversions.cpp \
+        src/armnnUtils/test/PrototxtConversionsTest.cpp \
+        src/armnn/test/SubgraphViewTests.cpp \
+        src/armnn/test/TensorHandleStrategyTest.cpp \
+        src/armnn/test/TensorTest.cpp \
+        src/armnn/test/TestInputOutputLayerVisitor.cpp \
+        src/armnn/test/TestLayerVisitor.cpp \
+        src/armnn/test/TestNameAndDescriptorLayerVisitor.cpp \
+        src/armnn/test/TestNameOnlyLayerVisitor.cpp \
+        src/armnn/test/UtilsTests.cpp \
+        src/armnnUtils/test/ParserHelperTest.cpp \
+        src/armnnUtils/test/QuantizeHelperTest.cpp \
+        src/armnnUtils/test/TensorUtilsTest.cpp \
+        src/armnnTestUtils/CommonTestUtils.cpp \
+        src/armnnTestUtils/GraphUtils.cpp \
+        src/armnnTestUtils/MockBackend.cpp \
+        src/armnnTestUtils/MockMemoryManager.cpp \
+        src/armnnTestUtils/MockTensorHandle.cpp \
+        src/armnnTestUtils/MockTensorHandleFactory.cpp \
+        src/armnnTestUtils/TensorCopyUtils.cpp \
+        src/armnnTestUtils/TestUtils.cpp \
+        src/armnnTestUtils/UnitTests.cpp \
+        src/profiling/test/BufferTests.cpp \
+        src/profiling/test/FileOnlyProfilingDecoratorTests.cpp \
+        src/profiling/test/PrintPacketHeaderHandler.cpp \
+        src/profiling/test/ProfilingConnectionDumpToFileDecoratorTests.cpp \
+        src/profiling/test/ProfilingGuidTest.cpp \
+        src/profiling/test/ProfilingTests.cpp \
+        src/profiling/test/ProfilingTestUtils.cpp \
+        src/profiling/test/SendCounterPacketTests.cpp \
+        src/profiling/test/SendTimelinePacketTests.cpp \
+        src/profiling/test/TestTimelinePacketHandler.cpp \
+        src/profiling/test/TimelineModel.cpp \
+        src/profiling/test/TimelinePacketTests.cpp \
+        src/profiling/test/TimelineUtilityMethodsTests.cpp \
+        src/armnnSerializer/test/ActivationSerializationTests.cpp \
+        src/armnnSerializer/test/ComparisonSerializationTests.cpp \
+        src/armnnSerializer/test/LstmSerializationTests.cpp \
+        src/armnnSerializer/test/SerializerTests.cpp \
+        src/armnnSerializer/test/SerializerTestUtils.cpp
+
+ifeq ($(ARMNN_REF_ENABLED),1)
+LOCAL_SRC_FILES += \
+        src/armnn/test/DebugCallbackTest.cpp \
+        src/armnn/test/RuntimeTests.cpp
+endif
+
+LOCAL_STATIC_LIBRARIES := \
+        libneuralnetworks_common \
+        libflatbuffers-framework \
+        arm_compute_library \
+        $(ARMNN_BACKEND_TEST_STATIC_LIBRARIES)
+
+LOCAL_WHOLE_STATIC_LIBRARIES := libarmnn
+
+LOCAL_SHARED_LIBRARIES := \
+        libbase \
+        libhidlbase \
+        libhidltransport \
+        libhidlmemory \
+        liblog \
+        libutils \
+        android.hardware.neuralnetworks@1.0 \
+        android.hidl.allocator@1.0 \
+        android.hidl.memory@1.0 \
+        $(ARMNN_BACKEND_TEST_SHARED_LIBRARIES)
+
+ifeq ($(ARMNN_INCLUDE_LIBOPENCL),1)
+LOCAL_SHARED_LIBRARIES += \
+        libOpenCL
+endif
+
+include $(BUILD_EXECUTABLE)
diff --git a/arch/arm/ARMnn/BuildGuideAndroidNDK.md b/arch/arm/ARMnn/BuildGuideAndroidNDK.md
new file mode 100644
index 0000000000..5068a8f5c8
--- /dev/null
+++ b/arch/arm/ARMnn/BuildGuideAndroidNDK.md
@@ -0,0 +1,259 @@
+# How to use the Android NDK to build Arm NN
+
+- [Introduction](#introduction)
+- [Download the Android NDK and make a standalone toolchain](#download-the-android-ndk-and-make-a-standalone-toolchain)
+- [Build Google's Protobuf library](#build-google-s-protobuf-library)
+- [Download Arm NN](#download-arm-nn)
+- [Build Arm Compute Library](#build-arm-compute-library)
+- [Build Arm NN](#build-arm-nn)
+- [Build Standalone Sample Dynamic Backend](#build-standalone-sample-dynamic-backend)
+- [Run the Arm NN unit tests on an Android device](#run-the-armnn-unit-tests-on-an-android-device)
+
+
+## Introduction
+These are step by step instructions for using the Android NDK to build Arm NN.
+They have been tested on a clean install of Ubuntu 18.04 and 20.04, and should also work with other OS versions.
+The instructions show how to build the Arm NN core library.
+Building protobuf is optional. We have given steps should the user wish to build it (i.e. as an Onnx dependency).
+All downloaded or generated files will be saved inside the `$HOME/armnn-devenv` directory.
+
+## Download the Android NDK and make a standalone toolchain
+
+* Download the Android NDK from [the official website](https://developer.android.com/ndk/downloads/index.html):
+ ```bash
+ mkdir -p $HOME/armnn-devenv/
+ cd $HOME/armnn-devenv/
+ # For Mac OS, change the NDK download link accordingly.
+ wget https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip
+ unzip android-ndk-r20b-linux-x86_64.zip
+ export NDK=$HOME/armnn-devenv/android-ndk-r20b
+ export NDK_TOOLCHAIN_ROOT=$NDK/toolchains/llvm/prebuilt/linux-x86_64
+ export PATH=$NDK_TOOLCHAIN_ROOT/bin/:$PATH
+
+ # You may want to append the above export variables commands to your `~/.bashrc` (or `~/.bash_profile` in Mac OS).
+ ```
+
+* With the android ndk-20b, you don't need to use the make_standalone_toolchain script to create a toolchain for a specific version of android. Android's current preference is for you to just specify the architecture and operating system while setting the compiler and just use the ndk directory.
+
+## Build Google's Protobuf library (Optional)
+
+* Clone protobuf: 
+  (Requires Git if not previously installed: `sudo apt install git`)
+```bash
+mkdir $HOME/armnn-devenv/google
+cd $HOME/armnn-devenv/google
+git clone https://github.com/google/protobuf.git
+cd protobuf
+git checkout -b v3.12.0 v3.12.0
+```
+
+* Build a native (x86) version of the protobuf libraries and compiler (protoc): 
+  (Requires cUrl, autoconf, llibtool, and other build dependencies if not previously installed: `sudo apt install curl autoconf libtool build-essential g++`)
+```bash
+./autogen.sh
+mkdir x86_build
+cd x86_build
+../configure --prefix=$HOME/armnn-devenv/google/x86_pb_install
+make install -j16
+cd ..
+```
+
+* Build the arm64 version of the protobuf libraries:
+```bash
+mkdir arm64_build
+cd arm64_build
+CC=aarch64-linux-android<Android_API>-clang \
+CXX=aarch64-linux-android<Android_API>-clang++ \
+CFLAGS="-fPIE -fPIC" \
+    LDFLAGS="-llog -lz -lc++_static" \
+   ../configure --host=aarch64-linux-android \
+   --prefix=$HOME/armnn-devenv/google/arm64_pb_install \
+   --enable-cross-compile \
+   --with-protoc=$HOME/armnn-devenv/google/x86_pb_install/bin/protoc
+make install -j16
+cd ..
+```
+
+Note: The ANDROID_API variable should be set to the Android API version number you are using. E.g. "30" for Android R.
+
+## Download Arm NN
+* Clone Arm NN: 
+  (Requires Git if not previously installed: `sudo apt install git`)
+
+```bash
+cd $HOME/armnn-devenv
+git clone https://github.com/ARM-software/armnn.git
+```
+
+* Checkout Arm NN branch:
+```bash
+cd armnn
+git checkout <branch_name>
+git pull
+```
+
+For example, if you want to check out the 21.11 release branch:
+```bash
+git checkout branches/armnn_21_11
+git pull
+```
+
+## Build Arm Compute Library
+* Clone Arm Compute Library:
+
+```bash
+cd $HOME/armnn-devenv
+git clone https://github.com/ARM-software/ComputeLibrary.git
+```
+* Checkout Arm Compute Library release tag:
+```bash
+cd ComputeLibrary
+git checkout <tag_name>
+```
+For example, if you want to checkout the 21.11 release tag:
+```bash
+git checkout v21.11
+```
+
+Arm NN and Arm Compute Library are developed closely together. If you would like to use a particular release of Arm NN you will need the same release tag of ACL too.
+
+Arm NN provides a script that downloads the version of Arm Compute Library that Arm NN was tested with:
+```bash
+git checkout $(../armnn/scripts/get_compute_library.sh -p) 
+```
+* the Arm Compute Library: 
+  (Requires SCons if not previously installed: `sudo apt install scons`)
+```bash
+scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" \
+ benchmark_tests=0 validation_tests=0 os=android -j16
+```
+
+## Build Arm NN
+
+* Build Arm NN:
+  (Requires CMake if not previously installed: `sudo apt install cmake`)
+```bash
+mkdir $HOME/armnn-devenv/armnn/build
+cd $HOME/armnn-devenv/armnn/build
+CXX=aarch64-linux-android<Android_API>-clang++ \
+CC=aarch64-linux-android<Android_API>-clang \
+CXX_FLAGS="-fPIE -fPIC" \
+cmake .. \
+    -DCMAKE_ANDROID_NDK=$NDK \
+    -DCMAKE_SYSTEM_NAME=Android \
+    -DCMAKE_SYSTEM_VERSION=<Android_API> \
+    -DCMAKE_ANDROID_ARCH_ABI=arm64-v8a \
+    -DCMAKE_EXE_LINKER_FLAGS="-pie -llog -lz" \
+    -DARMCOMPUTE_ROOT=$HOME/armnn-devenv/ComputeLibrary/ \
+    -DARMCOMPUTE_BUILD_DIR=$HOME/armnn-devenv/ComputeLibrary/build \
+    -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1 -DARMNNREF=1 \
+    -DPROTOBUF_ROOT=$HOME/armnn-devenv/google/arm64_pb_install/
+```
+
+To include standalone sample dynamic backend tests, add the argument to enable the tests and the dynamic backend path to the CMake command:
+
+```bash
+-DSAMPLE_DYNAMIC_BACKEND=1 \
+-DDYNAMIC_BACKEND_PATHS=$SAMPLE_DYNAMIC_BACKEND_PATH
+# Where $SAMPLE_DYNAMIC_BACKEND_PATH is the path where libArm_SampleDynamic_backend.so library file is pushed
+```
+
+ * Run the build
+```bash
+make -j16
+```
+
+## Build Standalone Sample Dynamic Backend
+* The sample dynamic backend is located in armnn/src/dynamic/sample
+```bash
+mkdir build
+cd build
+```
+
+* Use CMake to configure the build environment, update the following script and run it from the armnn/src/dynamic/sample/build directory to set up the Arm NN build:
+```bash
+#!/bin/bash
+CXX=aarch64-linux-android<Android_API>-clang++ \
+CC=aarch64-linux-android<Android_API>-clang \
+CXX_FLAGS="-fPIE -fPIC" \
+cmake \
+-DCMAKE_C_COMPILER_WORKS=TRUE \
+-DCMAKE_CXX_COMPILER_WORKS=TRUE \
+-DCMAKE_ANDROID_NDK=$NDK \
+-DCMAKE_SYSTEM_NAME=Android \
+-DCMAKE_SYSTEM_VERSION=$ANDROID_API \
+-DCMAKE_ANDROID_ARCH_ABI=arm64-v8a \
+-DCMAKE_SYSROOT=$HOME/armnn-devenv/android-ndk-r20b/toolchains/llvm/prebuilt/linux-x86_64/sysroot \
+-DCMAKE_CXX_FLAGS=--std=c++14 \
+-DCMAKE_EXE_LINKER_FLAGS="-pie -llog" \
+-DCMAKE_MODULE_LINKER_FLAGS="-llog" \
+-DARMNN_PATH=$HOME/armnn-devenv/armnn/build/libarmnn.so ..
+```
+
+* Run the build
+```bash
+make
+```
+
+## Run the Arm NN unit tests on an Android device
+
+
+* Push the build results to an Android device and make symbolic links for shared libraries:
+  Currently adb version we have used for testing is 1.0.41.
+```bash
+adb push libarmnn.so /data/local/tmp/
+    adb push libtimelineDecoder.so /data/local/tmp/
+adb push UnitTests /data/local/tmp/
+adb push $NDK/sources/cxx-stl/llvm-libc++/libs/arm64-v8a/libc++_shared.so /data/local/tmp/
+adb push $HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so /data/local/tmp/libprotobuf.so.23.0.0
+adb shell 'ln -s libprotobuf.so.23.0.0 /data/local/tmp/libprotobuf.so.23'
+adb shell 'ln -s libprotobuf.so.23.0.0 /data/local/tmp/libprotobuf.so'
+```
+
+* Push the files needed for the unit tests (they are a mix of files, directories and symbolic links):
+```bash
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/testSharedObject
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/backendsCommon/test/testSharedObject/* /data/local/tmp/src/backends/backendsCommon/test/testSharedObject/
+
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/testDynamicBackend
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/backendsCommon/test/testDynamicBackend/* /data/local/tmp/src/backends/backendsCommon/test/testDynamicBackend/
+
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath1
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/backendsCommon/test/backendsTestPath1/* /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath1/
+
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath2
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/backendsCommon/test/backendsTestPath2/Arm_CpuAcc_backend.so /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath2/
+adb shell ln -s Arm_CpuAcc_backend.so /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath2/Arm_CpuAcc_backend.so.1
+adb shell ln -s Arm_CpuAcc_backend.so.1 /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath2/Arm_CpuAcc_backend.so.1.2
+adb shell ln -s Arm_CpuAcc_backend.so.1.2 /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath2/Arm_CpuAcc_backend.so.1.2.3
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/backendsCommon/test/backendsTestPath2/Arm_GpuAcc_backend.so /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath2/
+adb shell ln -s nothing /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath2/Arm_no_backend.so
+
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath3
+
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath5
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/backendsCommon/test/backendsTestPath5/* /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath5/
+
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath6
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/backendsCommon/test/backendsTestPath6/* /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath6/
+
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath7
+
+adb shell mkdir -p /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath9
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/backendsCommon/test/backendsTestPath9/* /data/local/tmp/src/backends/backendsCommon/test/backendsTestPath9/
+
+adb shell mkdir -p /data/local/tmp/src/backends/dynamic/reference
+adb push -p $HOME/armnn-devenv/armnn/build/src/backends/dynamic/reference/Arm_CpuRef_backend.so /data/local/tmp/src/backends/dynamic/reference/
+
+# If the standalone sample dynamic tests are enabled, also push libArm_SampleDynamic_backend.so library file to the folder specified as $SAMPLE_DYNAMIC_BACKEND_PATH when Arm NN is built.
+# This is the example when $SAMPLE_DYNAMIC_BACKEND_PATH is specified as /data/local/tmp/dynamic/sample/:
+
+adb shell mkdir -p /data/local/tmp/dynamic/sample/
+adb push -p $HOME/armnn-devenv/armnn/src/dynamic/sample/build/libArm_SampleDynamic_backend.so /data/local/tmp/dynamic/sample/
+```
+
+* Run Arm NN unit tests:
+```bash
+adb shell 'LD_LIBRARY_PATH=/data/local/tmp:/vendor/lib64:/vendor/lib64/egl /data/local/tmp/UnitTests'
+```
+If libarmnnUtils.a is present in `$HOME/armnn-devenv/armnn/build/` and the unit tests run without failure then the build was successful.
diff --git a/arch/arm/ARMnn/BuildGuideCrossCompilation.md b/arch/arm/ARMnn/BuildGuideCrossCompilation.md
new file mode 100644
index 0000000000..d5d1a227e1
--- /dev/null
+++ b/arch/arm/ARMnn/BuildGuideCrossCompilation.md
@@ -0,0 +1,366 @@
+# How to Cross-Compile Arm NN on x86_64 for arm64
+
+- [Introduction](#introduction)
+- [Cross-compiling ToolChain](#cross-compiling-toolchain)
+- [Install Cmake](#build-cmake)
+- [Build and install Google's Protobuf library](#build-and-install-google-s-protobuf-library)
+- [Download Arm NN](#download-arm-nn)
+- [Build Arm Compute Library](#build-arm-compute-library)
+- [Build Flatbuffer](#build-flatbuffer)
+- [Build Onnx](#build-onnx)
+- [Build TfLite](#build-tflite)
+- [Build Arm NN](#build-armnn)
+- [Generate TF Lite Schema](#generate-tflite-schema)
+- [Build Standalone Sample Dynamic Backend](#build-standalone-sample-dynamic-backend)
+- [Run Unit Tests](#run-unit-tests)
+- [Troubleshooting and Errors:](#troubleshooting-and-errors-)
+
+
+## Introduction
+These are the step by step instructions on Cross-Compiling Arm NN under an x86_64 system to target an Arm64 system. This build flow has been tested with Ubuntu 18.04 and 20.04 and it depends on the same version of Ubuntu or Debian being installed on both the build host and target machines. The instructions assume you are using a bash shell and show how to build the Arm NN core library, Protobuf, Tflite, Flatbuffer and Compute Libraries.
+Start by creating a directory to contain all components:
+
+'''
+mkdir $HOME/armnn-devenv
+cd $HOME/armnn-devenv
+'''
+
+## Cross-compiling ToolChain
+* Install the standard cross-compilation libraries for arm64:
+```
+sudo apt install crossbuild-essential-arm64
+```
+
+## Install Cmake
+Cmake 3.19rc3 is required to build TF Lite Delegate.
+
+'''
+sudo apt-get install libssl-dev
+wget https://github.com/Kitware/CMake/releases/download/v3.19.0-rc3/cmake-3.19.0-rc3.tar.gz
+tar -zxvf cmake-3.19.0-rc3.tar.gz
+cd cmake-3.19.0-rc3
+./bootstrap --prefix=$HOME/armnn-devenv/cmake/install
+make all install
+cd..
+'''
+
+
+## Build and install Google's Protobuf library
+
+We support protobuf version 3.12.0
+* Get protobuf from here: https://github.com/protocolbuffers/protobuf: 
+  (Requires Git if not previously installed: `sudo apt install git`) 
+```bash
+git clone -b v3.12.0 https://github.com/google/protobuf.git protobuf
+cd protobuf
+git submodule update --init --recursive
+./autogen.sh
+```
+* Build a native (x86_64) version of the protobuf libraries and compiler (protoc):
+  (Requires cUrl, autoconf, llibtool, and other build dependencies if not previously installed: sudo apt install curl autoconf libtool build-essential g++)
+```
+mkdir x86_64_build
+cd x86_64_build
+../configure --prefix=$HOME/armnn-devenv/google/x86_64_pb_install
+make install -j16
+cd ..
+```
+* Build the arm64 version of the protobuf libraries:
+```
+mkdir arm64_build
+cd arm64_build
+CC=aarch64-linux-gnu-gcc \
+CXX=aarch64-linux-gnu-g++ \
+../configure --host=aarch64-linux \
+--prefix=$HOME/armnn-devenv/google/arm64_pb_install \
+--with-protoc=$HOME/armnn-devenv/google/x86_64_pb_install/bin/protoc
+make install -j16
+cd ..
+```
+
+## Download Arm NN
+* Clone Arm NN:
+```bash
+cd $HOME/armnn-devenv
+git clone https://github.com/ARM-software/armnn.git
+```
+
+* Checkout Arm NN branch:
+```bash
+cd armnn
+git checkout <branch_name>
+git pull
+```
+For example, if you want to check out the 21.11 release branch:
+```bash
+git checkout branches/armnn_21_11
+git pull
+```
+
+## Build Arm Compute Library
+* Clone Arm Compute Library:
+
+```bash
+cd $HOME/armnn-devenv
+git clone https://github.com/ARM-software/ComputeLibrary.git
+```
+* Checkout Arm Compute Library release tag:
+```bash
+cd ComputeLibrary
+git checkout <tag_name>
+```
+Arm NN and Arm Compute Library are developed closely together. If you would like to use the Arm NN 21.11 release you will need the 21.11 release of ACL too. For example, if you want to checkout the 21.11 release tag:
+```bash
+git checkout v21.11
+```
+Arm NN provides a script that downloads the version of Arm Compute Library that Arm NN was tested with:
+```bash
+git checkout $(../armnn/scripts/get_compute_library.sh -p) 
+```
+* Build the Arm Compute Library:
+  (Requires SCons if not previously installed: `sudo apt install scons`)
+```bash
+scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j4
+```
+
+## Build Flatbuffer
+* Building Flatbuffer version 1.12.0
+```bash
+cd $HOME/armnn-devenv
+wget -O flatbuffers-1.12.0.tar.gz https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz
+tar xf flatbuffers-1.12.0.tar.gz
+cd flatbuffers-1.12.0
+rm -f CMakeCache.txt
+mkdir build
+cd build
+cmake .. -DFLATBUFFERS_BUILD_FLATC=1 \
+     -DCMAKE_INSTALL_PREFIX:PATH=$HOME/armnn-devenv/flatbuffers \
+     -DFLATBUFFERS_BUILD_TESTS=0
+make all install
+```
+
+* Build arm64 version of flatbuffer
+```bash
+cd ..
+mkdir build-arm64
+cd build-arm64
+# Add -fPIC to allow us to use the libraries in shared objects.
+CXXFLAGS="-fPIC" cmake .. -DCMAKE_C_COMPILER=/usr/bin/aarch64-linux-gnu-gcc \
+     -DCMAKE_CXX_COMPILER=/usr/bin/aarch64-linux-gnu-g++ \
+     -DFLATBUFFERS_BUILD_FLATC=1 \
+     -DCMAKE_INSTALL_PREFIX:PATH=$HOME/armnn-devenv/flatbuffers-arm64 \
+     -DFLATBUFFERS_BUILD_TESTS=0
+make all install
+```
+
+## Build Onnx
+* Building Onnx
+```bash
+cd $HOME/armnn-devenv
+git clone https://github.com/onnx/onnx.git
+cd onnx
+git fetch https://github.com/onnx/onnx.git 553df22c67bee5f0fe6599cff60f1afc6748c635 && git checkout FETCH_HEAD
+LD_LIBRARY_PATH=$HOME/armnn-devenv/google/x86_64_pb_install/lib:$LD_LIBRARY_PATH \
+$HOME/armnn-devenv/google/x86_64_pb_install/bin/protoc \
+onnx/onnx.proto --proto_path=. --proto_path=../google/x86_64_pb_install/include --cpp_out $HOME/armnn-devenv/onnx
+```
+
+## Build TfLite
+* Arm NN provides a script, armnn/scripts/get_tensorflow.sh, that can be used to check out the version of TensorFlow that Arm NN was tested with:
+```bash
+cd $HOME/armnn-devenv
+git clone https://github.com/tensorflow/tensorflow.git
+cd tensorflow/
+git checkout $(../armnn/scripts/get_tensorflow.sh -p) # Checks out the latest tested version of TF
+cd ..
+```
+
+* You will need to download gcc-arm-8.3-2019.03 toolchain and continue building TF Lite as following:
+```
+curl -LO https://storage.googleapis.com/mirror.tensorflow.org/developer.arm.com/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz
+mkdir tflite-toolchains
+tar xvf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C tflite-toolchains
+mkdir tflite/build
+cd tflite/build
+ARMCC_PREFIX=$HOME/armnn-devenv/tflite-toolchains/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/bin/aarch64-linux-gnu- \
+ARMCC_FLAGS="-funsafe-math-optimizations" \
+cmake -DCMAKE_C_COMPILER=${ARMCC_PREFIX}gcc \
+      -DCMAKE_CXX_COMPILER=${ARMCC_PREFIX}g++ \
+      -DCMAKE_C_FLAGS="${ARMCC_FLAGS}" -DCMAKE_CXX_FLAGS="${ARMCC_FLAGS}" \
+      -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON  -DCMAKE_SYSTEM_NAME=Linux \
+      -DTFLITE_ENABLE_XNNPACK=OFF \
+      -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
+      $HOME/armnn-devenv/tensorflow/tensorflow/lite/ \
+cmake --build .
+```
+
+## Generate TF Lite Schema
+```
+cd $HOME/armnn-devenv
+cp ../tensorflow/tensorflow/lite/schema/schema.fbs .
+../flatbuffers-1.12.0/build/flatc -c --gen-object-api --reflect-types --reflect-names schema.fbs
+```
+
+## Build Arm NN
+* Compile Arm NN for arm64:
+```bash
+cd $HOME/armnn-devenv/armnn
+mkdir build
+cd build
+```
+
+* Use CMake to configure your build environment, update the following script and run it from the armnn/build directory to set up the Arm NN build:
+```bash
+#!/bin/bash
+CXX=aarch64-linux-gnu-g++ CC=aarch64-linux-gnu-gcc cmake .. \
+-DARMCOMPUTE_ROOT=$HOME/armnn-devenv/ComputeLibrary \
+-DARMCOMPUTE_BUILD_DIR=$HOME/armnn-devenv/ComputeLibrary/build/ \
+-DARMCOMPUTENEON=1 -DARMCOMPUTECL=1 -DARMNNREF=1 \
+-DONNX_GENERATED_SOURCES=$HOME/armnn-devenv/onnx \
+-DBUILD_ONNX_PARSER=1 \
+-DBUILD_TF_LITE_PARSER=1 \
+-DTENSORFLOW_ROOT=$HOME/armnn-devenv/tensorflow \
+-DFLATBUFFERS_ROOT=$HOME/armnn-devenv/flatbuffers-arm64 \
+-DFLATC_DIR=$HOME/armnn-devenv/flatbuffers-1.12.0/build \
+-DPROTOBUF_ROOT=$HOME/armnn-devenv/google/x86_64_pb_install \
+-DPROTOBUF_ROOT=$HOME/armnn-devenv/google/x86_64_pb_install/ \
+-DPROTOBUF_LIBRARY_DEBUG=$HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so.23.0.0 \
+-DPROTOBUF_LIBRARY_RELEASE=$HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so.23.0.0
+```
+
+* If you want to include standalone sample dynamic backend tests, add the argument to enable the tests and the dynamic backend path to the CMake command:
+```bash
+-DSAMPLE_DYNAMIC_BACKEND=1 \
+-DDYNAMIC_BACKEND_PATHS=$SAMPLE_DYNAMIC_BACKEND_PATH
+```
+* If you want to build Arm NN TF Lite Delegate, add the arguments:
+```bash
+-DTFLITE_LIB_ROOT=$HOME/armnn-devenv/tflite/build \
+-DTF_LITE_SCHEMA_INCLUDE_PATH=$HOME/armnn-devenv/tflite \
+-DBUILD_ARMNN_TFLITE_DELEGATE=1
+```
+* Run the build
+```bash
+make -j32
+```
+
+## Build Standalone Sample Dynamic Backend
+* The sample dynamic backend is located in armnn/src/dynamic/sample
+```bash
+cd $HOME/armnn-devenv/armnn/src/dynamic/sample
+mkdir build
+cd build
+```
+
+* Use CMake to configure your build environment, update the following script and run it from the armnn/src/dynamic/sample/build directory to set up the Arm NN build:
+```bash
+#!/bin/bash
+CXX=aarch64-linux-gnu-g++ CC=aarch64-linux-gnu-gcc cmake .. \
+-DCMAKE_CXX_FLAGS=--std=c++14 \
+-DARMNN_PATH=$HOME/armnn-devenv/armnn/build/libarmnn.so
+```
+
+* Run the build
+```bash
+make
+```
+
+## Run Unit Tests
+* Copy the build folder to an arm64 linux machine
+* Copy the libprotobuf.so.23.0.0 library file to the build folder
+* If you enable the standalone sample dynamic tests, also copy libArm_SampleDynamic_backend.so library file to the folder specified as $SAMPLE_DYNAMIC_BACKEND_PATH when you build Arm NN 
+* cd to the build folder on your arm64 machine and set your LD_LIBRARY_PATH to its current location:
+
+```bash
+cd build/
+```
+
+* Create a symbolic link to libprotobuf.so.23.0.0:
+
+```bash
+ln -s libprotobuf.so.23.0.0 ./libprotobuf.so.23
+```
+
+* Run the UnitTests:
+
+```bash
+LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./UnitTests
+[doctest] doctest version is "2.4.6"
+[doctest] run with "--help" for options
+===============================================================================
+[doctest] test cases:   4817 |   4817 passed | 0 failed | 0 skipped
+[doctest] assertions: 807634 | 807634 passed | 0 failed |
+[doctest] Status: SUCCESS!
+```
+
+* Run the Delegate UnitTests:
+
+```bash
+LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./delegate/DelegateUnitTests
+```
+
+## Troubleshooting and Errors:
+### Missing libz.so.1
+* When compiling armNN:
+```bash
+/usr/lib/gcc-cross/aarch64-linux-gnu/5/../../../../aarch64-linux-gnu/bin/ld: warning: libz.so.1, needed by /home/<username>/armNN/usr/lib64/libprotobuf.so.23.0.0, not found (try using -rpath or -rpath-link)
+```
+
+* Missing arm64 libraries for libz.so.1, these can be added by adding a second architecture to dpkg and explicitly installing them:
+```bash
+sudo dpkg --add-architecture arm64
+sudo apt-get install zlib1g:arm64
+sudo apt-get update
+sudo ldconfig
+```
+* If apt-get update returns 404 errors for arm64 repos refer to section 5 below.
+* Alternatively the missing arm64 version of libz.so.1 can be downloaded and installed from a .deb package here:
+      https://launchpad.net/ubuntu/wily/arm64/zlib1g/1:1.2.8.dfsg-2ubuntu4
+```bash
+sudo dpkg -i zlib1g_1.2.8.dfsg-2ubuntu4_arm64.deb
+```
+<br><br>
+
+### Unable to install arm64 packages after adding arm64 architecture
+* Using sudo apt-get update should add all of the required repos for arm64 but if it does not or you are getting 404 errors the following instructions can be used to add the repos manually:
+* From stackoverflow:
+https://askubuntu.com/questions/430705/how-to-use-apt-get-to-download-multi-arch-library/430718
+* Open /etc/apt/sources.list with your preferred text editor.
+
+* Mark all the current (default) repos as \[arch=<current_os_arch>], e.g.
+```bash
+deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ xenial main restricted
+```
+* Then add the following:
+```bash
+deb [arch=arm64] http://ports.ubuntu.com/ xenial main restricted
+deb [arch=arm64] http://ports.ubuntu.com/ xenial-updates main restricted
+deb [arch=arm64] http://ports.ubuntu.com/ xenial universe
+deb [arch=arm64] http://ports.ubuntu.com/ xenial-updates universe
+deb [arch=arm64] http://ports.ubuntu.com/ xenial multiverse
+deb [arch=arm64] http://ports.ubuntu.com/ xenial-updates multiverse
+deb [arch=arm64] http://ports.ubuntu.com/ xenial-backports main restricted universe multiverse
+```
+* Update and install again:
+```bash
+sudo apt-get install zlib1g:arm64
+sudo apt-get update
+sudo ldconfig
+```
+<br><br>
+
+### Undefined references to google::protobuf:: functions
+* Missing or out of date protobuf compilation libraries.
+    Use the command 'protoc --version' to check which version of protobuf is available (version 3.12.0 is required).
+    Follow the instructions above to install protobuf 3.12.0
+<br><br>
+
+### Errors on strict-aliasing rules when compiling the Compute Library
+* When compiling the Compute Library there are multiple errors on strict-aliasing rules:
+ ```
+cc1plus: error: unrecognized command line option ‘-Wno-implicit-fallthrough’ [-Werror]
+ ```
+* Add Werror=0 to the scons command:
+```
+scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j8 Werror=0
+```
diff --git a/arch/arm/ARMnn/CMakeLists.txt b/arch/arm/ARMnn/CMakeLists.txt
new file mode 100644
index 0000000000..3ba5985efd
--- /dev/null
+++ b/arch/arm/ARMnn/CMakeLists.txt
@@ -0,0 +1,1060 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# Copyright 2020 NXP
+# SPDX-License-Identifier: MIT
+#
+cmake_minimum_required (VERSION 3.0.2) # 3.0.2 required for return() statement used in AddDllCopyCommands.cmake
+project(armnn)
+
+set(additional_cmake_files)
+list(APPEND additional_cmake_files
+    cmake/ArmnnVersion.cmake
+    cmake/DelegateVersion.cmake
+    cmake/ParserVersion.cmake
+    cmake/Utils.cmake
+    cmake/GlobalConfig.cmake
+    cmake/AddDllCopyCommands.cmake)
+
+foreach(cmake_file ${additional_cmake_files})
+    include(${cmake_file})
+endforeach()
+
+cmake_policy(SET CMP0057 NEW)
+
+# If the root of the tensorflow source tree has not been specified
+# and we are trying to build the tensorflow lite parser make
+# our best guess as to where it will be i.e. one directory above
+# the one this CMakeLists.txt file lives in, in a subdirectory
+# called tensorflow
+if (BUILD_TF_LITE_PARSER AND "${TENSORFLOW_ROOT}" STREQUAL "")
+  set(TENSORFLOW_ROOT "${PROJECT_SOURCE_DIR}/../tensorflow" CACHE INTERNAL "")
+  message(STATUS "Set TENSORFLOW_ROOT: ${TENSORFLOW_ROOT}")
+else()
+  message(STATUS "TENSORFLOW_ROOT: ${TENSORFLOW_ROOT}")
+endif()
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/delegate/cmake/Modules/")
+
+set(as_subproject Armnn)
+
+macro(find_package)
+    if(NOT ${ARGV0} IN_LIST as_subproject)
+        _find_package(${ARGV})
+    endif()
+endmacro()
+
+if (DYNAMIC_BACKEND_PATHS)
+    # It's expected to have the format: DYNAMIC_BACKEND_PATHS="PATH_1:PATH_2...:PATH_N"
+    add_definitions('-DDYNAMIC_BACKEND_PATHS="${DYNAMIC_BACKEND_PATHS}"')
+endif()
+
+include(GNUInstallDirs)
+
+add_subdirectory(samples)
+add_subdirectory(src/armnnTfLiteParser)
+add_subdirectory(src/armnnSerializer)
+add_subdirectory(src/armnnDeserializer)
+add_subdirectory(src/armnnTestUtils)
+
+
+if (BUILD_ARMNN_TFLITE_DELEGATE)
+    set(ARMNN_SUB_PROJECT ON)
+    add_subdirectory(delegate)
+    add_definitions(-DARMNN_TF_LITE_DELEGATE)
+endif()
+
+if (BUILD_TESTS)
+    add_subdirectory(tests)
+endif()
+
+# Include the additional cmake files in their own target so that they will appear nicely in IDEs
+add_custom_target(AdditionalCMakeFiles SOURCES ${additional_cmake_files})
+
+set(armnnUtils_sources)
+list(APPEND armnnUtils_sources
+    include/armnnUtils/CompatibleTypes.hpp
+    include/armnnUtils/DataLayoutIndexed.hpp
+    include/armnnUtils/Permute.hpp
+    include/armnnUtils/Filesystem.hpp
+    include/armnnUtils/FloatingPointComparison.hpp
+    include/armnnUtils/FloatingPointConverter.hpp
+    include/armnnUtils/QuantizeHelper.hpp
+    include/armnnUtils/TContainer.hpp
+    include/armnnUtils/TensorUtils.hpp
+    include/armnnUtils/Threads.hpp
+    include/armnnUtils/Transpose.hpp
+    src/armnnUtils/BFloat16.hpp
+    src/armnnUtils/CompatibleTypes.cpp
+    src/armnnUtils/Filesystem.cpp
+    src/armnnUtils/GraphTopologicalSort.hpp
+    src/armnnUtils/Half.hpp
+    src/armnnUtils/Permute.cpp
+    src/armnnUtils/DataLayoutIndexed.cpp
+    src/armnnUtils/DotSerializer.cpp
+    src/armnnUtils/DotSerializer.hpp
+    src/armnnUtils/HeapProfiling.cpp
+    src/armnnUtils/HeapProfiling.hpp
+    src/armnnUtils/LeakChecking.cpp
+    src/armnnUtils/LeakChecking.hpp
+    src/armnnUtils/ModelAccuracyChecker.cpp
+    src/armnnUtils/ModelAccuracyChecker.hpp
+    src/armnnUtils/FloatingPointConverter.cpp
+    src/armnnUtils/VerificationHelpers.hpp
+    src/armnnUtils/VerificationHelpers.cpp
+    src/armnnUtils/ParserHelper.hpp
+    src/armnnUtils/ParserHelper.cpp
+    src/armnnUtils/ParserPrototxtFixture.hpp
+    src/armnnUtils/Processes.hpp
+    src/armnnUtils/Processes.cpp
+    src/armnnUtils/PrototxtConversions.hpp
+    src/armnnUtils/PrototxtConversions.cpp
+    src/armnnUtils/TensorIOUtils.hpp
+    src/armnnUtils/TensorUtils.cpp
+    src/armnnUtils/Threads.cpp
+    src/armnnUtils/Transpose.cpp
+    )
+
+add_library_ex(armnnUtils STATIC ${armnnUtils_sources})
+target_include_directories(armnnUtils PRIVATE src/backends)
+
+if(BUILD_ONNX_PARSER)
+    set(armnn_onnx_parser_sources)
+    list(APPEND armnn_onnx_parser_sources
+        include/armnnOnnxParser/IOnnxParser.hpp
+        include/armnnOnnxParser/Version.hpp
+        src/armnnOnnxParser/OnnxParser.hpp
+        src/armnnOnnxParser/OnnxParser.cpp
+        ${ONNX_GENERATED_SOURCES}/onnx/onnx.pb.cc
+        )
+    # The generated onnx protobuf .cc files are not warning clean and we can't fix them.
+    if(COMPILER_IS_GNU_LIKE)
+        set_source_files_properties(${ONNX_GENERATED_SOURCES}/onnx/onnx.pb.cc PROPERTIES COMPILE_FLAGS "-Wno-conversion -Wno-sign-conversion")
+    endif()
+
+    add_library_ex(armnnOnnxParser SHARED ${armnn_onnx_parser_sources})
+
+    target_include_directories(armnnOnnxParser PRIVATE src/armnnUtils)
+
+    target_link_libraries(armnnOnnxParser armnn)
+
+    # Protobuf
+    target_link_libraries(armnnOnnxParser ${PROTOBUF_LIBRARIES})
+    set_target_properties(armnnOnnxParser PROPERTIES VERSION ${ONNX_PARSER_LIB_VERSION} SOVERSION ${ONNX_PARSER_LIB_SOVERSION})
+endif()
+
+list(APPEND armnn_sources
+    include/armnn/ArmNN.hpp
+    include/armnn/BackendHelper.hpp
+    include/armnn/BackendId.hpp
+    include/armnn/BackendOptions.hpp
+    include/armnn/BackendRegistry.hpp
+    include/armnn/Conversion.hpp
+    include/armnn/Deprecated.hpp
+    include/armnn/Descriptors.hpp
+    include/armnn/DescriptorsFwd.hpp
+    include/armnn/Exceptions.hpp
+    include/armnn/backends/ILayerSupport.hpp
+    include/armnn/backends/ICustomAllocator.hpp
+    include/armnn/IAsyncExecutionCallback.hpp
+    include/armnn/ILayerVisitor.hpp
+    include/armnn/INetwork.hpp
+    include/armnn/IProfiler.hpp
+    include/armnn/IRuntime.hpp
+    include/armnn/IStrategy.hpp
+    include/armnn/IWorkingMemHandle.hpp
+    include/armnn/LayerVisitorBase.hpp
+    include/armnn/Logging.hpp
+    include/armnn/LstmParams.hpp
+    include/armnn/MemorySources.hpp
+    include/armnn/NetworkFwd.hpp
+    include/armnn/Optional.hpp
+    include/armnn/QuantizedLstmParams.hpp
+    include/armnn/StrategyBase.hpp
+    include/armnn/Tensor.hpp
+    include/armnn/TensorFwd.hpp
+    include/armnn/Threadpool.hpp
+    include/armnn/Types.hpp
+    include/armnn/TypesUtils.hpp
+    include/armnn/Utils.hpp
+    include/armnn/Version.hpp
+    include/armnn/profiling/ISendTimelinePacket.hpp
+    include/armnn/utility/Assert.hpp
+    include/armnn/utility/IgnoreUnused.hpp
+    include/armnn/utility/NumericCast.hpp
+    include/armnn/utility/PolymorphicDowncast.hpp
+    include/armnn/utility/StringUtils.hpp
+    include/armnn/utility/TransformIterator.hpp
+    include/armnn/backends/SubgraphView.hpp
+    src/armnn/layers/LayerCloneBase.hpp
+    src/armnn/layers/LayerWithParameters.hpp
+    src/armnn/layers/ActivationLayer.hpp
+    src/armnn/layers/ActivationLayer.cpp
+    src/armnn/layers/AdditionLayer.hpp
+    src/armnn/layers/AdditionLayer.cpp
+    src/armnn/layers/ArgMinMaxLayer.hpp
+    src/armnn/layers/ArgMinMaxLayer.cpp
+    src/armnn/layers/BatchNormalizationLayer.hpp
+    src/armnn/layers/BatchNormalizationLayer.cpp
+    src/armnn/layers/BatchToSpaceNdLayer.hpp
+    src/armnn/layers/BatchToSpaceNdLayer.cpp
+    src/armnn/layers/CastLayer.hpp
+    src/armnn/layers/CastLayer.cpp
+    src/armnn/layers/ChannelShuffleLayer.hpp
+    src/armnn/layers/ChannelShuffleLayer.cpp
+    src/armnn/layers/ComparisonLayer.hpp
+    src/armnn/layers/ComparisonLayer.cpp
+    src/armnn/layers/ConcatLayer.hpp
+    src/armnn/layers/ConcatLayer.cpp
+    src/armnn/layers/ConstantLayer.hpp
+    src/armnn/layers/ConstantLayer.cpp
+    src/armnn/layers/Convolution2dLayer.hpp
+    src/armnn/layers/Convolution2dLayer.cpp
+    src/armnn/layers/Convolution3dLayer.hpp
+    src/armnn/layers/Convolution3dLayer.cpp
+    src/armnn/layers/ConvertBf16ToFp32Layer.cpp
+    src/armnn/layers/ConvertBf16ToFp32Layer.hpp
+    src/armnn/layers/ConvertFp16ToFp32Layer.hpp
+    src/armnn/layers/ConvertFp16ToFp32Layer.cpp
+    src/armnn/layers/ConvertFp32ToBf16Layer.hpp
+    src/armnn/layers/ConvertFp32ToBf16Layer.cpp
+    src/armnn/layers/ConvertFp32ToFp16Layer.hpp
+    src/armnn/layers/ConvertFp32ToFp16Layer.cpp
+    src/armnn/layers/DebugLayer.hpp
+    src/armnn/layers/DebugLayer.cpp
+    src/armnn/layers/DepthToSpaceLayer.hpp
+    src/armnn/layers/DepthToSpaceLayer.cpp
+    src/armnn/layers/DepthwiseConvolution2dLayer.hpp
+    src/armnn/layers/DepthwiseConvolution2dLayer.cpp
+    src/armnn/layers/DequantizeLayer.hpp
+    src/armnn/layers/DequantizeLayer.cpp
+    src/armnn/layers/DetectionPostProcessLayer.hpp
+    src/armnn/layers/DetectionPostProcessLayer.cpp
+    src/armnn/layers/ElementwiseBaseLayer.hpp
+    src/armnn/layers/ElementwiseBaseLayer.cpp
+    src/armnn/layers/ElementwiseUnaryLayer.hpp
+    src/armnn/layers/ElementwiseUnaryLayer.cpp
+    src/armnn/layers/FakeQuantizationLayer.hpp
+    src/armnn/layers/FakeQuantizationLayer.cpp
+    src/armnn/layers/FillLayer.hpp
+    src/armnn/layers/FillLayer.cpp
+    src/armnn/layers/FloorLayer.hpp
+    src/armnn/layers/FloorLayer.cpp
+    src/armnn/layers/FullyConnectedLayer.hpp
+    src/armnn/layers/FullyConnectedLayer.cpp
+    src/armnn/layers/GatherLayer.cpp
+    src/armnn/layers/GatherLayer.hpp
+    src/armnn/layers/InputLayer.hpp
+    src/armnn/layers/InputLayer.cpp
+    src/armnn/layers/InstanceNormalizationLayer.hpp
+    src/armnn/layers/InstanceNormalizationLayer.cpp
+    src/armnn/layers/L2NormalizationLayer.hpp
+    src/armnn/layers/L2NormalizationLayer.cpp
+    src/armnn/layers/LogicalBinaryLayer.hpp
+    src/armnn/layers/LogicalBinaryLayer.cpp
+    src/armnn/layers/LogSoftmaxLayer.hpp
+    src/armnn/layers/LogSoftmaxLayer.cpp
+    src/armnn/layers/LstmLayer.cpp
+    src/armnn/layers/LstmLayer.hpp
+    src/armnn/layers/MapLayer.cpp
+    src/armnn/layers/MapLayer.hpp
+    src/armnn/layers/MaximumLayer.cpp
+    src/armnn/layers/MaximumLayer.hpp
+    src/armnn/layers/MeanLayer.hpp
+    src/armnn/layers/MeanLayer.cpp
+    src/armnn/layers/MemCopyLayer.hpp
+    src/armnn/layers/MemCopyLayer.cpp
+    src/armnn/layers/MemImportLayer.hpp
+    src/armnn/layers/MemImportLayer.cpp
+    src/armnn/layers/MergeLayer.hpp
+    src/armnn/layers/MergeLayer.cpp
+    src/armnn/layers/MinimumLayer.cpp
+    src/armnn/layers/MinimumLayer.hpp
+    src/armnn/layers/MultiplicationLayer.hpp
+    src/armnn/layers/MultiplicationLayer.cpp
+    src/armnn/layers/NormalizationLayer.hpp
+    src/armnn/layers/NormalizationLayer.cpp
+    src/armnn/layers/OutputLayer.hpp
+    src/armnn/layers/OutputLayer.cpp
+    src/armnn/layers/PadLayer.hpp
+    src/armnn/layers/PadLayer.cpp
+    src/armnn/layers/PermuteLayer.hpp
+    src/armnn/layers/PermuteLayer.cpp
+    src/armnn/layers/Pooling2dLayer.hpp
+    src/armnn/layers/Pooling2dLayer.cpp
+    src/armnn/layers/Pooling3dLayer.hpp
+    src/armnn/layers/Pooling3dLayer.cpp
+    src/armnn/layers/QuantizeLayer.cpp
+    src/armnn/layers/QuantizeLayer.hpp
+    src/armnn/layers/QLstmLayer.hpp
+    src/armnn/layers/QLstmLayer.cpp
+    src/armnn/layers/QuantizedLstmLayer.hpp
+    src/armnn/layers/QuantizedLstmLayer.cpp
+    src/armnn/layers/DivisionLayer.cpp
+    src/armnn/layers/DivisionLayer.hpp
+    src/armnn/layers/PreCompiledLayer.hpp
+    src/armnn/layers/PreCompiledLayer.cpp
+    src/armnn/layers/PreluLayer.hpp
+    src/armnn/layers/PreluLayer.cpp
+    src/armnn/layers/RankLayer.hpp
+    src/armnn/layers/RankLayer.cpp
+    src/armnn/layers/ReduceLayer.hpp
+    src/armnn/layers/ReduceLayer.cpp
+    src/armnn/layers/ReshapeLayer.hpp
+    src/armnn/layers/ReshapeLayer.cpp
+    src/armnn/layers/ResizeLayer.hpp
+    src/armnn/layers/ResizeLayer.cpp
+    src/armnn/layers/ShapeLayer.cpp
+    src/armnn/layers/ShapeLayer.hpp
+    src/armnn/layers/SliceLayer.cpp
+    src/armnn/layers/SliceLayer.hpp
+    src/armnn/layers/SoftmaxLayer.hpp
+    src/armnn/layers/SoftmaxLayer.cpp
+    src/armnn/layers/SpaceToBatchNdLayer.hpp
+    src/armnn/layers/SpaceToBatchNdLayer.cpp
+    src/armnn/layers/SpaceToDepthLayer.hpp
+    src/armnn/layers/SpaceToDepthLayer.cpp
+    src/armnn/layers/SplitterLayer.hpp
+    src/armnn/layers/SplitterLayer.cpp
+    src/armnn/layers/StackLayer.hpp
+    src/armnn/layers/StackLayer.cpp
+    src/armnn/layers/StandInLayer.cpp
+    src/armnn/layers/StandInLayer.hpp
+    src/armnn/layers/StridedSliceLayer.cpp
+    src/armnn/layers/StridedSliceLayer.hpp
+    src/armnn/layers/SubtractionLayer.cpp
+    src/armnn/layers/SubtractionLayer.hpp
+    src/armnn/layers/SwitchLayer.cpp
+    src/armnn/layers/SwitchLayer.hpp
+    src/armnn/layers/TransposeConvolution2dLayer.cpp
+    src/armnn/layers/TransposeConvolution2dLayer.hpp
+    src/armnn/layers/TransposeLayer.hpp
+    src/armnn/layers/TransposeLayer.cpp
+    src/armnn/layers/UnidirectionalSequenceLstmLayer.cpp
+    src/armnn/layers/UnidirectionalSequenceLstmLayer.hpp
+    src/armnn/layers/UnmapLayer.cpp
+    src/armnn/layers/UnmapLayer.hpp
+    src/armnn/AsyncExecutionCallback.cpp
+    src/armnn/AsyncExecutionCallback.hpp
+    src/armnn/BackendRegistry.cpp
+    src/armnn/BackendSettings.hpp
+    src/armnn/BackendHelper.cpp
+    src/armnn/Descriptors.cpp
+    src/armnn/DeviceSpec.hpp
+    src/armnn/DllExport.hpp
+    src/armnn/Exceptions.cpp
+    src/armnn/ExecutionFrame.cpp
+    src/armnn/ExecutionFrame.hpp
+    src/armnn/Graph.cpp
+    src/armnn/Graph.hpp
+    src/armnn/IGraphObservable.hpp
+    src/armnn/ILayerSupport.cpp
+    src/armnn/Instrument.hpp
+    src/armnn/InternalTypes.cpp
+    src/armnn/InternalTypes.hpp
+    src/armnn/ISubgraphViewConverter.hpp
+    src/armnn/JsonPrinter.cpp
+    src/armnn/JsonPrinter.hpp
+    src/armnn/Layer.cpp
+    src/armnn/LayerFwd.hpp
+    src/armnn/Layer.hpp
+    src/armnn/LayersFwd.hpp
+    src/armnn/LayerSupportCommon.hpp
+    src/armnn/LoadedNetwork.cpp
+    src/armnn/LoadedNetwork.hpp
+    src/armnn/Logging.cpp
+    src/armnn/Network.cpp
+    src/armnn/Network.hpp
+    src/armnn/NetworkUtils.cpp
+    src/armnn/NetworkUtils.hpp
+    src/armnn/Observable.cpp
+    src/armnn/Observable.hpp
+    src/armnn/Optimizer.cpp
+    src/armnn/Optimizer.hpp
+    src/armnn/OutputHandler.cpp
+    src/armnn/OutputHandler.hpp
+    src/armnn/Profiling.cpp
+    src/armnn/ProfilingEvent.cpp
+    src/armnn/ProfilingDetails.hpp
+    src/armnn/ProfilingEvent.hpp
+    src/armnn/Profiling.hpp
+    src/armnn/Runtime.cpp
+    src/armnn/Runtime.hpp
+    src/armnn/RangeTracker.cpp
+    src/armnn/RangeTracker.hpp
+    src/armnn/ResolveType.hpp
+    src/armnn/SerializeLayerParameters.cpp
+    src/armnn/SerializeLayerParameters.hpp
+    src/armnn/SubgraphView.cpp
+    src/armnn/SubgraphViewSelector.cpp
+    src/armnn/SubgraphViewSelector.hpp
+    src/armnn/Tensor.cpp
+    src/armnn/Threadpool.cpp
+    src/armnn/TypesUtils.cpp
+    src/armnn/Utils.cpp
+    src/armnn/WallClockTimer.cpp
+    src/armnn/WallClockTimer.hpp
+    src/armnn/WorkingMemDescriptor.hpp
+    src/armnn/WorkingMemHandle.cpp
+    src/armnn/WorkingMemHandle.hpp
+    src/armnn/optimizations/AddBroadcastReshapeLayer.hpp
+    src/armnn/optimizations/AddDebug.hpp
+    src/armnn/optimizations/All.hpp
+    src/armnn/optimizations/ConvertConstants.hpp
+    src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
+    src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp
+    src/armnn/optimizations/FoldPadIntoLayer2d.hpp
+    src/armnn/optimizations/MovePermuteUp.hpp
+    src/armnn/optimizations/MoveTransposeUp.hpp
+    src/armnn/optimizations/Optimization.hpp
+    src/armnn/optimizations/OptimizeConsecutiveReshapes.hpp
+    src/armnn/optimizations/OptimizeInverseConversions.hpp
+    src/armnn/optimizations/OptimizeInversePermutes.hpp
+    src/armnn/optimizations/PermuteAndBatchToSpaceAsDepthToSpace.hpp
+    src/armnn/optimizations/PermuteAsReshape.hpp
+    src/armnn/optimizations/SquashEqualSiblings.hpp
+    src/profiling/ActivateTimelineReportingCommandHandler.cpp
+    src/profiling/ActivateTimelineReportingCommandHandler.hpp
+    src/profiling/BufferManager.cpp
+    src/profiling/BufferManager.hpp
+    src/profiling/CommandHandler.cpp
+    src/profiling/CommandHandler.hpp
+    src/profiling/ConnectionAcknowledgedCommandHandler.cpp
+    src/profiling/ConnectionAcknowledgedCommandHandler.hpp
+    src/profiling/CounterDirectory.cpp
+    src/profiling/CounterDirectory.hpp
+    src/profiling/CounterIdMap.cpp
+    src/profiling/CounterIdMap.hpp
+    src/profiling/DeactivateTimelineReportingCommandHandler.cpp
+    src/profiling/DeactivateTimelineReportingCommandHandler.hpp
+    src/profiling/DirectoryCaptureCommandHandler.cpp
+    src/profiling/DirectoryCaptureCommandHandler.hpp
+    src/profiling/FileOnlyProfilingConnection.cpp
+    src/profiling/FileOnlyProfilingConnection.hpp
+    src/profiling/Holder.cpp
+    src/profiling/Holder.hpp
+    src/profiling/IBufferManager.hpp
+    src/profiling/IConsumer.hpp
+    src/profiling/ICounterDirectory.hpp
+    src/profiling/ICounterRegistry.hpp
+    src/profiling/ICounterValues.hpp
+    src/profiling/INotifyBackends.hpp
+    src/profiling/IReportStructure.hpp
+    src/profiling/ISendCounterPacket.hpp
+    src/profiling/ISendThread.hpp
+    src/profiling/IPacketBuffer.hpp
+    src/profiling/IPeriodicCounterCapture.hpp
+    src/profiling/IProfilingConnection.hpp
+    src/profiling/IProfilingConnectionFactory.hpp
+    src/profiling/NullProfilingConnection.hpp
+    src/profiling/PacketBuffer.cpp
+    src/profiling/PacketBuffer.hpp
+    src/profiling/PeriodicCounterCapture.hpp
+    src/profiling/PeriodicCounterCapture.cpp
+    src/profiling/PeriodicCounterSelectionCommandHandler.cpp
+    src/profiling/PeriodicCounterSelectionCommandHandler.hpp
+    src/profiling/PerJobCounterSelectionCommandHandler.cpp
+    src/profiling/PerJobCounterSelectionCommandHandler.hpp
+    src/profiling/ProfilingConnectionDumpToFileDecorator.cpp
+    src/profiling/ProfilingConnectionDumpToFileDecorator.hpp
+    src/profiling/ProfilingConnectionFactory.cpp
+    src/profiling/ProfilingConnectionFactory.hpp
+    src/profiling/ProfilingService.cpp
+    src/profiling/ProfilingService.hpp
+    src/profiling/ProfilingStateMachine.cpp
+    src/profiling/ProfilingStateMachine.hpp
+    src/profiling/ProfilingUtils.cpp
+    src/profiling/ProfilingUtils.hpp
+    src/profiling/RegisterBackendCounters.cpp
+    src/profiling/RegisterBackendCounters.hpp
+    src/profiling/RequestCounterDirectoryCommandHandler.cpp
+    src/profiling/RequestCounterDirectoryCommandHandler.hpp
+    src/profiling/SendCounterPacket.cpp
+    src/profiling/SendCounterPacket.hpp
+    src/profiling/SendThread.cpp
+    src/profiling/SendThread.hpp
+    src/profiling/SendTimelinePacket.cpp
+    src/profiling/SendTimelinePacket.hpp
+    src/profiling/SocketProfilingConnection.cpp
+    src/profiling/SocketProfilingConnection.hpp
+    src/profiling/TimelinePacketWriterFactory.cpp
+    src/profiling/TimelinePacketWriterFactory.hpp
+    src/profiling/TimelineUtilityMethods.cpp
+    src/profiling/TimelineUtilityMethods.hpp
+    src/profiling/backends/BackendProfiling.cpp
+    src/profiling/backends/BackendProfiling.hpp
+    third-party/cxxopts/cxxopts.hpp
+    third-party/ghc/filesystem.hpp
+    third-party/half/half.hpp
+    third-party/mapbox/optional.hpp
+    third-party/mapbox/recursive_wrapper.hpp
+    third-party/mapbox/variant.hpp
+    third-party/mapbox/variant_cast.hpp
+    third-party/mapbox/variant_io.hpp
+    third-party/mapbox/variant_visitor.hpp
+    )
+
+# Files used for Streamline-based profiling backend
+if(PROFILING_BACKEND_STREAMLINE)
+    list(APPEND armnn_sources
+        ${GATOR_ROOT}/annotate/streamline_annotate.h
+        ${GATOR_ROOT}/annotate/streamline_annotate.c)
+endif()
+
+# the backends under src/backends extend the list of
+# object libs armnn to include in the build
+include(src/backends/backends.cmake)
+foreach(lib ${armnnLibraries})
+    message(STATUS "Adding object library dependency to armnn: ${lib}")
+    list(APPEND armnn_sources $<TARGET_OBJECTS:${lib}>)
+endforeach()
+
+add_library_ex(armnn SHARED ${armnn_sources})
+
+target_compile_definitions(armnn PRIVATE "ARMNN_COMPILING_DLL")
+
+target_include_directories(armnn
+    PUBLIC
+        $<INSTALL_INTERFACE:include>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    PRIVATE
+        src/armnn
+        src/armnnUtils
+        src/backends
+        src/profiling
+        profiling/common/include
+)
+
+target_link_libraries(armnn armnnUtils)
+# only link pipeCommon if it has been built
+if(BUILD_TIMELINE_DECODER)
+    target_link_libraries(armnn pipeCommon)
+endif()
+
+target_link_libraries(armnn ${CMAKE_DL_LIBS})
+if ("${CMAKE_SYSTEM_NAME}" STREQUAL Windows)
+    target_link_libraries(armnn Ws2_32.lib)
+endif()
+
+# Link fmt third-party library
+add_subdirectory(third-party/fmt)
+target_link_libraries(armnn fmt)
+
+if(BUILD_ONNX_PARSER)
+    install(TARGETS armnnOnnxParser
+            LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
+
+install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+
+if(ARMCOMPUTENEON OR ARMCOMPUTECL)
+    target_link_libraries(armnn ${ARMCOMPUTE_LIBRARIES})
+endif()
+
+if(PROFILING_BACKEND_STREAMLINE AND (NOT ("${CMAKE_SYSTEM_NAME}" STREQUAL Android)))
+    target_link_libraries(armnn ${CMAKE_THREAD_LIBS_INIT})
+endif()
+
+set_target_properties(armnn PROPERTIES VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION})
+
+if(BUILD_UNIT_TESTS)
+    set(unittest_sources)
+    list(APPEND unittest_sources
+        src/armnn/test/CloneTests.cpp
+        src/armnn/test/ConstTensorLayerVisitor.hpp
+        src/armnn/test/ConstTensorLayerVisitor.cpp
+        src/armnn/test/CreateWorkload.hpp
+        src/armnn/test/EndToEndTest.cpp
+        src/armnn/test/ExecutionFrameTest.cpp
+        src/armnn/test/FloatingPointConverterTest.cpp
+        src/armnn/test/FlowControl.cpp
+        src/armnn/test/GraphTests.cpp
+        src/armnn/test/GraphUtils.hpp
+        src/armnn/test/InstrumentTests.cpp
+        src/armnn/test/InferOutputTests.cpp
+        src/armnn/test/InferOutputTests.hpp
+        src/armnn/test/ModelAccuracyCheckerTest.cpp
+        src/armnn/test/NetworkTests.cpp
+        src/armnn/test/ObservableTest.cpp
+        src/armnn/test/OptimizerTests.cpp
+        src/armnn/test/optimizations/AddBroadcastReshapeLayerTests.cpp
+        src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
+        src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp
+        src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp
+        src/armnn/test/optimizations/FoldPadTests.cpp
+        src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
+        src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
+        src/armnn/test/optimizations/FuseActivationTests.cpp
+        src/armnn/test/optimizations/FuseBatchNormTests.cpp
+        src/armnn/test/optimizations/InsertDebugLayerTests.cpp
+        src/armnn/test/optimizations/MovePermuteUpTests.cpp
+        src/armnn/test/optimizations/MoveTransposeUpTests.cpp
+        src/armnn/test/optimizations/OptimizeConsecutiveReshapesTests.cpp
+        src/armnn/test/optimizations/OptimizeInverseConversionsTests.cpp
+        src/armnn/test/optimizations/OptimizeInversePermutesTests.cpp
+        src/armnn/test/optimizations/PermuteAndBatchToSpaceAsDepthToSpaceTests.cpp
+        src/armnn/test/optimizations/PermuteAsReshapeTests.cpp
+        src/armnn/test/optimizations/RedirectMembersToConstantInputsTests.cpp
+        src/armnn/test/optimizations/ReduceMultipleAxesTests.cpp
+        src/armnn/test/optimizations/SquashEqualSiblingsTests.cpp
+        src/armnn/test/optimizations/TransposeAsReshapeTests.cpp
+        src/armnn/test/OptionalTest.cpp
+        src/armnn/test/PredicateResult.hpp
+        src/armnn/test/ProfilerTests.cpp
+        src/armnn/test/ProfilingEventTest.cpp
+        src/armnn/test/ShapeInferenceTests.cpp
+        src/armnn/test/SubgraphViewTests.cpp
+        src/armnn/test/TensorHandleStrategyTest.cpp
+        src/armnn/test/TensorHelpers.hpp
+        src/armnn/test/TensorTest.cpp
+        src/armnn/test/TestInputOutputLayerVisitor.cpp
+        src/armnn/test/TestInputOutputLayerVisitor.hpp
+        src/armnn/test/TestLayerVisitor.cpp
+        src/armnn/test/TestLayerVisitor.hpp
+        src/armnn/test/TestNameOnlyLayerVisitor.cpp
+        src/armnn/test/TestNameOnlyLayerVisitor.hpp
+        src/armnn/test/TestUtils.hpp
+        src/armnn/test/UnitTests.hpp
+        src/armnn/test/TestNameAndDescriptorLayerVisitor.hpp
+        src/armnn/test/TestNameAndDescriptorLayerVisitor.cpp
+        src/armnn/test/UtilityTests.cpp
+        src/armnn/test/UtilsTests.cpp
+        src/armnnUtils/test/FloatingPointComparisonTest.cpp
+        src/armnnUtils/test/ParserHelperTest.cpp
+        src/armnnUtils/test/PrototxtConversionsTest.cpp
+        src/armnnUtils/test/QuantizeHelperTest.cpp
+        src/armnnUtils/test/TensorUtilsTest.cpp
+        src/armnnUtils/test/TransformIteratorTest.cpp
+        src/profiling/test/BufferTests.cpp
+        src/profiling/test/FileOnlyProfilingDecoratorTests.cpp
+        src/profiling/test/PrintPacketHeaderHandler.cpp
+        src/profiling/test/PrintPacketHeaderHandler.hpp
+        src/profiling/test/ProfilingConnectionDumpToFileDecoratorTests.cpp
+        src/profiling/test/ProfilingGuidTest.cpp
+        src/profiling/test/ProfilingMocks.hpp
+        src/profiling/test/ProfilingTests.cpp
+        src/profiling/test/ProfilingTests.hpp
+        src/profiling/test/ProfilingTestUtils.cpp
+        src/profiling/test/ProfilingTestUtils.hpp
+        src/profiling/test/RequestCountersPacketHandler.cpp
+        src/profiling/test/RequestCountersPacketHandler.hpp
+        src/profiling/test/SendCounterPacketTests.cpp
+        src/profiling/test/SendCounterPacketTests.hpp
+        src/profiling/test/TestTimelinePacketHandler.cpp
+        src/profiling/test/TestTimelinePacketHandler.hpp
+        src/profiling/test/TimelineModel.cpp
+        src/profiling/test/TimelineModel.hpp
+        src/profiling/test/SendTimelinePacketTests.cpp
+        src/profiling/test/TimelinePacketTests.cpp
+        src/profiling/test/TimelineUtilityMethodsTests.cpp
+        third-party/doctest/doctest.h
+        )
+
+    if(ARMNNREF)
+        list(APPEND unittest_sources
+            src/armnn/test/DebugCallbackTest.cpp
+            src/armnn/test/RuntimeTests.cpp
+            src/armnn/test/RuntimeTests.hpp
+            )
+    endif()
+
+    if(BUILD_TF_LITE_PARSER AND ARMNNREF)
+        list(APPEND unittest_sources
+             src/armnnTfLiteParser/test/Activations.cpp
+             src/armnnTfLiteParser/test/Addition.cpp
+             src/armnnTfLiteParser/test/ArgMinMax.cpp
+             src/armnnTfLiteParser/test/AvgPool2D.cpp
+             src/armnnTfLiteParser/test/BatchToSpaceND.cpp
+             src/armnnTfLiteParser/test/Cast.cpp
+             src/armnnTfLiteParser/test/Comparison.cpp
+             src/armnnTfLiteParser/test/Concatenation.cpp
+             src/armnnTfLiteParser/test/Constant.cpp
+             src/armnnTfLiteParser/test/Conv2D.cpp
+             src/armnnTfLiteParser/test/Conv3D.cpp
+             src/armnnTfLiteParser/test/DepthwiseConvolution2D.cpp
+             src/armnnTfLiteParser/test/DepthToSpace.cpp
+             src/armnnTfLiteParser/test/Dequantize.cpp
+             src/armnnTfLiteParser/test/DetectionPostProcess.cpp
+             src/armnnTfLiteParser/test/Div.cpp
+             src/armnnTfLiteParser/test/ElementWiseUnary.cpp
+             src/armnnTfLiteParser/test/ExpandDims.cpp
+             src/armnnTfLiteParser/test/FullyConnected.cpp
+             src/armnnTfLiteParser/test/Gather.cpp
+             src/armnnTfLiteParser/test/L2Normalization.cpp
+             src/armnnTfLiteParser/test/LeakyRelu.cpp
+             src/armnnTfLiteParser/test/LoadScopeDynamicTensor.cpp
+             src/armnnTfLiteParser/test/LocalResponseNormalization.cpp
+             src/armnnTfLiteParser/test/Maximum.cpp
+             src/armnnTfLiteParser/test/MaxPool2D.cpp
+             src/armnnTfLiteParser/test/Mean.cpp
+             src/armnnTfLiteParser/test/Minimum.cpp
+             src/armnnTfLiteParser/test/MirrorPad.cpp
+             src/armnnTfLiteParser/test/Multiplication.cpp
+             src/armnnTfLiteParser/test/Pack.cpp
+             src/armnnTfLiteParser/test/Pad.cpp
+             src/armnnTfLiteParser/test/PadV2.cpp
+             src/armnnTfLiteParser/test/Prelu.cpp
+             src/armnnTfLiteParser/test/Reduce.cpp
+             src/armnnTfLiteParser/test/Reshape.cpp
+             src/armnnTfLiteParser/test/ReshapeDynamic.cpp
+             src/armnnTfLiteParser/test/ResizeBilinear.cpp
+             src/armnnTfLiteParser/test/ResizeNearestNeighbor.cpp
+             src/armnnTfLiteParser/test/Quantize.cpp
+             src/armnnTfLiteParser/test/Softmax.cpp
+             src/armnnTfLiteParser/test/SpaceToBatchND.cpp
+             src/armnnTfLiteParser/test/Shape.cpp
+             src/armnnTfLiteParser/test/Slice.cpp
+             src/armnnTfLiteParser/test/Split.cpp
+             src/armnnTfLiteParser/test/SplitV.cpp
+             src/armnnTfLiteParser/test/Squeeze.cpp
+             src/armnnTfLiteParser/test/StridedSlice.cpp
+             src/armnnTfLiteParser/test/Sub.cpp
+             src/armnnTfLiteParser/test/Sum.cpp
+             src/armnnTfLiteParser/test/TransposeConv.cpp
+             src/armnnTfLiteParser/test/Transpose.cpp
+             src/armnnTfLiteParser/test/Unpack.cpp
+             src/armnnTfLiteParser/test/Unsupported.cpp
+             src/armnnTfLiteParser/test/LoadModel.cpp
+             src/armnnTfLiteParser/test/GetBuffer.cpp
+             src/armnnTfLiteParser/test/OutputShapeOfSqueeze.cpp
+             src/armnnTfLiteParser/test/InputOutputTensorNames.cpp
+             src/armnnTfLiteParser/test/GetTensorIds.cpp
+             src/armnnTfLiteParser/test/GetSubgraphInputsOutputs.cpp
+             src/armnnTfLiteParser/test/GetInputsOutputs.cpp
+             src/armnnTfLiteParser/test/TfLiteParser.cpp
+             )
+
+        # Generate SchemaText.cpp file which contains the TfLite schema text as a
+        # static C-array of bytes. This is needed at runtime for TfLite parser tests.
+        add_custom_command(
+            OUTPUT  SchemaText.cpp
+            COMMAND cp ${TF_LITE_SCHEMA_INCLUDE_PATH}/schema.fbs g_TfLiteSchemaText
+            COMMAND xxd -i g_TfLiteSchemaText SchemaText.cpp
+            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+            DEPENDS ${TF_LITE_SCHEMA_INCLUDE_PATH}/schema.fbs
+        )
+        list(APPEND unittest_sources ${CMAKE_CURRENT_BINARY_DIR}/SchemaText.cpp)
+    endif()
+
+    if(BUILD_ONNX_PARSER AND ARMNNREF)
+        list(APPEND unittest_sources
+            src/armnnOnnxParser/test/Addition.cpp
+            src/armnnOnnxParser/test/BatchNorm.cpp
+            src/armnnOnnxParser/test/Clip.cpp
+            src/armnnOnnxParser/test/Concat.cpp
+            src/armnnOnnxParser/test/Const.cpp
+            src/armnnOnnxParser/test/Constructor.cpp
+            src/armnnOnnxParser/test/Conv2D.cpp
+            src/armnnOnnxParser/test/CreateNetwork.cpp
+            src/armnnOnnxParser/test/DepthConv.cpp
+            src/armnnOnnxParser/test/Flatten.cpp
+            src/armnnOnnxParser/test/FullyConnected.cpp
+            src/armnnOnnxParser/test/Gather.cpp
+            src/armnnOnnxParser/test/Gemm.cpp
+            src/armnnOnnxParser/test/GetInputsOutputs.cpp
+            src/armnnOnnxParser/test/LoadScopeDynamicTensor.cpp
+            src/armnnOnnxParser/test/OnnxParserTestUtils.cpp
+            src/armnnOnnxParser/test/OnnxParserTestUtils.hpp
+            src/armnnOnnxParser/test/Pooling.cpp
+            src/armnnOnnxParser/test/ProtoxtFixture.cpp
+            src/armnnOnnxParser/test/Relu.cpp
+            src/armnnOnnxParser/test/Reshape.cpp
+            src/armnnOnnxParser/test/Shape.cpp
+            src/armnnOnnxParser/test/Unsqueeze.cpp
+            )
+    endif()
+
+    if(BUILD_ARMNN_SERIALIZER AND ARMNNREF)
+        enable_language(ASM)
+        list(APPEND unittest_sources
+            src/armnnSerializer/test/ActivationSerializationTests.cpp
+            src/armnnSerializer/test/ComparisonSerializationTests.cpp
+            src/armnnSerializer/test/LstmSerializationTests.cpp
+            src/armnnSerializer/test/SerializerTests.cpp
+            src/armnnSerializer/test/SerializerTestUtils.cpp
+            src/armnnSerializer/test/SerializerTestUtils.hpp
+            src/armnnDeserializer/test/DeserializeAbs.cpp
+            src/armnnDeserializer/test/DeserializeActivation.cpp
+            src/armnnDeserializer/test/DeserializeAdd.cpp
+            src/armnnDeserializer/test/DeserializeArgMinMax.cpp
+            src/armnnDeserializer/test/DeserializeBatchToSpaceNd.cpp
+            src/armnnDeserializer/test/DeserializeBatchNormalization.cpp
+            src/armnnDeserializer/test/DeserializeCast.cpp
+            src/armnnDeserializer/test/DeserializeChannelShuffle.cpp
+            src/armnnDeserializer/test/DeserializeComparison.cpp
+            src/armnnDeserializer/test/DeserializeConstant.cpp
+            src/armnnDeserializer/test/DeserializeConvolution2d.cpp
+            src/armnnDeserializer/test/DeserializeConvolution3d.cpp
+            src/armnnDeserializer/test/DeserializeDepthToSpace.cpp
+            src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp
+            src/armnnDeserializer/test/DeserializeDivision.cpp
+            src/armnnDeserializer/test/DeserializeFill.cpp
+            src/armnnDeserializer/test/DeserializeFloor.cpp
+            src/armnnDeserializer/test/DeserializeFullyConnected.cpp
+            src/armnnDeserializer/test/DeserializeGather.cpp
+            src/armnnDeserializer/test/DeserializeInstanceNormalization.cpp
+            src/armnnDeserializer/test/DeserializeL2Normalization.cpp
+            src/armnnDeserializer/test/DeserializeLogSoftmax.cpp
+            src/armnnDeserializer/test/DeserializeMean.cpp
+            src/armnnDeserializer/test/DeserializeMultiplication.cpp
+            src/armnnDeserializer/test/DeserializeNormalization.cpp
+            src/armnnDeserializer/test/DeserializePad.cpp
+            src/armnnDeserializer/test/DeserializePermute.cpp
+            src/armnnDeserializer/test/DeserializePooling2d.cpp
+            src/armnnDeserializer/test/DeserializePooling3d.cpp
+            src/armnnDeserializer/test/DeserializeRank.cpp
+            src/armnnDeserializer/test/DeserializeReduceSum.cpp
+            src/armnnDeserializer/test/DeserializeReshape.cpp
+            src/armnnDeserializer/test/DeserializeResizeBilinear.cpp
+            src/armnnDeserializer/test/DeserializeRsqrt.cpp
+            src/armnnDeserializer/test/DeserializeShape.cpp
+            src/armnnDeserializer/test/DeserializeSlice.cpp
+            src/armnnDeserializer/test/DeserializeSpaceToBatchNd.cpp
+            src/armnnDeserializer/test/DeserializeStridedSlice.cpp
+            src/armnnDeserializer/test/DeserializeSubtraction.cpp
+            src/armnnDeserializer/test/ParserFlatbuffersSerializeFixture.hpp
+            src/armnnDeserializer/test/SchemaSerialize.s
+            )
+        set_source_files_properties(src/armnnDeserializer/test/SchemaSerialize.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
+    endif()
+
+    if(BUILD_GATORD_MOCK)
+        list(APPEND unittest_sources
+            tests/profiling/gatordmock/tests/GatordMockTests.cpp
+            )
+    endif()
+
+    if(BUILD_TIMELINE_DECODER)
+        list(APPEND unittest_sources
+             src/timelineDecoder/tests/JSONTimelineDecoderTests.cpp
+             profiling/server/src/timelineDecoder/tests/TimelineTests.cpp
+             )
+    endif()
+
+    if(BUILD_BASE_PIPE_SERVER)
+        list(APPEND unittest_sources
+                profiling/server/src/basePipeServer/tests/BasePipeServerTests.cpp
+                )
+    endif()
+
+    foreach(lib ${armnnUnitTestLibraries})
+        message(STATUS "Adding object library dependency to UnitTests: ${lib}")
+        list(APPEND unittest_sources $<TARGET_OBJECTS:${lib}>)
+    endforeach()
+
+    add_executable(UnitTests ${unittest_sources})
+    target_include_directories(UnitTests PRIVATE src/armnn)
+    target_include_directories(UnitTests PRIVATE src/armnnUtils)
+    target_include_directories(UnitTests PRIVATE src/armnnTestUtils)
+    target_include_directories(UnitTests PRIVATE src/backends)
+    target_include_directories(UnitTests PRIVATE src/profiling)
+
+    if(VALGRIND_FOUND)
+        if(HEAP_PROFILING OR LEAK_CHECKING)
+            message(STATUS "Valgrind is disabled for heap profiling and leak checking builds.")
+        else()
+            # Valgrind works with gperftools version number <= 2.4
+            target_compile_definitions(UnitTests PRIVATE "WITH_VALGRIND=1")
+        endif()
+    endif()
+
+    target_link_libraries(UnitTests ${CMAKE_THREAD_LIBS_INIT})
+
+    target_link_libraries(UnitTests armnn)
+    target_link_libraries(UnitTests armnnUtils)
+    target_link_libraries(UnitTests armnnTestUtils)
+
+    if(BUILD_GATORD_MOCK)
+        target_link_libraries(UnitTests gatordMockService)
+    endif()
+
+    if(BUILD_TIMELINE_DECODER)
+        target_link_libraries(UnitTests timelineDecoder)
+        target_link_libraries(UnitTests timelineDecoderJson)
+    endif()
+
+    if(BUILD_TF_LITE_PARSER)
+        target_include_directories(UnitTests SYSTEM PRIVATE "${TF_LITE_SCHEMA_INCLUDE_PATH}")
+        target_include_directories(UnitTests SYSTEM PRIVATE "${FLATBUFFERS_INCLUDE_PATH}")
+        target_link_libraries(UnitTests armnnTfLiteParser)
+    endif()
+
+    if(BUILD_ARMNN_SERIALIZER AND ARMNNREF)
+        target_include_directories(UnitTests SYSTEM PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/armnnSerializer)
+        target_include_directories(UnitTests SYSTEM PRIVATE "${FLATBUFFERS_INCLUDE_PATH}")
+        target_link_libraries(UnitTests armnnSerializer)
+    endif()
+
+    if(BUILD_ONNX_PARSER)
+        target_link_libraries(UnitTests armnnOnnxParser)
+    endif()
+
+    if(BUILD_BASE_PIPE_SERVER)
+        target_link_libraries(UnitTests armnnBasePipeServer)
+    endif()
+
+    addDllCopyCommands(UnitTests)
+endif()
+
+if (BUILD_ARMNN_SERIALIZER AND (BUILD_TF_LITE_PARSER OR BUILD_ONNX_PARSER) AND ARMNNREF)
+    set(ArmnnConverter_sources
+        src/armnnConverter/ArmnnConverter.cpp)
+
+    add_executable_ex(ArmnnConverter ${ArmnnConverter_sources})
+    target_include_directories(ArmnnConverter PRIVATE src/armnn)
+    target_include_directories(ArmnnConverter PRIVATE src/armnnUtils)
+
+    if(BUILD_ONNX_PARSER)
+        target_link_libraries(ArmnnConverter armnnOnnxParser)
+    endif()
+
+    if(BUILD_TF_LITE_PARSER)
+        target_link_libraries(ArmnnConverter armnnTfLiteParser)
+    endif()
+
+    target_link_libraries(ArmnnConverter armnnSerializer)
+
+    target_link_libraries(ArmnnConverter armnn)
+    target_link_libraries(ArmnnConverter ${CMAKE_THREAD_LIBS_INIT})
+    addDllCopyCommands(ArmnnConverter)
+
+    install(TARGETS ArmnnConverter
+            RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
+
+if(BUILD_BASE_PIPE_SERVER)
+    add_subdirectory(profiling/server/src/basePipeServer)
+endif()
+
+if(BUILD_TIMELINE_DECODER)
+    add_subdirectory(profiling/server/src/timelineDecoder)
+    add_subdirectory(profiling/common/src)
+    add_subdirectory(src/timelineDecoder)
+endif()
+
+if(BUILD_GATORD_MOCK AND NOT BUILD_BASE_PIPE_SERVER)
+    message(ERROR, "In order to build GatordMock you must set BUILD_BASE_PIPE_SERVER = YES")
+endif()
+
+if(BUILD_GATORD_MOCK AND NOT BUILD_TIMELINE_DECODER)
+    message(ERROR, "In order to build GatordMock you must set BUILD_TIMELINE_DECODER = YES")
+endif()
+
+if(BUILD_GATORD_MOCK)
+    set(gatord_mock_sources)
+    list(APPEND gatord_mock_sources
+        tests/profiling/gatordmock/CommandFileParser.hpp
+        tests/profiling/gatordmock/CommandFileParser.cpp
+        tests/profiling/gatordmock/CommandLineProcessor.hpp
+        tests/profiling/gatordmock/CommandLineProcessor.cpp
+        tests/profiling/gatordmock/GatordMockService.hpp
+        tests/profiling/gatordmock/GatordMockService.cpp
+        tests/profiling/gatordmock/PeriodicCounterCaptureCommandHandler.cpp
+        tests/profiling/gatordmock/PeriodicCounterCaptureCommandHandler.hpp
+        tests/profiling/gatordmock/PeriodicCounterSelectionResponseHandler.cpp
+        tests/profiling/gatordmock/PeriodicCounterSelectionResponseHandler.hpp
+        tests/profiling/gatordmock/StreamMetadataCommandHandler.cpp
+        tests/profiling/gatordmock/StreamMetadataCommandHandler.hpp
+        tests/profiling/gatordmock/StubCommandHandler.hpp
+        )
+
+    add_library_ex(gatordMockService STATIC ${gatord_mock_sources})
+
+    target_include_directories(gatordMockService PRIVATE profiling/common/include
+                                                         profiling/server/src/basePipeServer
+                                                         src/armnnUtils
+                                                         src/profiling
+                                                         src/timelineDecoder)
+
+    if(BUILD_UNIT_TESTS)
+        target_include_directories(UnitTests PRIVATE tests/profiling/gatordmock)
+        target_include_directories(UnitTests PRIVATE src/backends/backendsCommon/test)
+    endif()
+
+    add_executable_ex(GatordMock tests/profiling/gatordmock/GatordMockMain.cpp)
+
+    target_include_directories(GatordMock PRIVATE profiling/common/include
+                                                  profiling/server/src/basePipeServer
+                                                  src/armnnUtils
+                                                  src/profiling
+                                                  src/timelineDecoder)
+
+    target_link_libraries(GatordMock
+                          armnn
+                          armnnBasePipeServer
+                          timelineDecoder
+                          gatordMockService)
+
+    if(Threads_FOUND AND (NOT ("${CMAKE_SYSTEM_NAME}" STREQUAL Android)))
+        target_link_libraries(GatordMock ${CMAKE_THREAD_LIBS_INIT})
+    endif()
+
+endif()
+
+####################################################
+# export targets
+set(armnn_export_targets)
+list(APPEND armnn_export_targets
+    armnn
+    armnnUtils
+)
+
+install(
+    TARGETS ${armnn_export_targets}
+    EXPORT  armnn-targets
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
+
+####################################################
+## Set export alias
+set_target_properties(armnn
+    PROPERTIES
+    EXPORT_NAME Armnn
+)
+
+## Export target scrips
+install(
+    EXPORT      armnn-targets
+    FILE        ArmnnTargets.cmake
+    NAMESPACE   Armnn::
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+)
+
+####################################################
+## Create ArmnnConfig.cmake
+include(CMakePackageConfigHelpers)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR})
+
+message(STATUS "CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_DIR}" )
+message(STATUS "CMAKE_CURRENT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}" )
+configure_package_config_file(
+    ${CMAKE_CURRENT_LIST_DIR}/cmake/ArmnnConfig.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/ArmnnConfig.cmake
+    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+)
+
+####################################################
+## Install Armnn config file
+install(
+    FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/ArmnnConfig.cmake
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+)
+
+####################################################
+## Export from build tree
+export(
+    EXPORT      armnn-targets
+    FILE        ${CMAKE_CURRENT_BINARY_DIR}/ArmnnTargets.cmake
+    NAMESPACE   Armnn::
+)
+
+add_library(Armnn::Armnn ALIAS armnn)
+add_library(Armnn::armnnUtils ALIAS armnnUtils)
+
+####################################################
+## Build Python bindings
+if (BUILD_PYTHON_WHL OR BUILD_PYTHON_SRC)
+    add_subdirectory(python/pyarmnn)
+endif()
diff --git a/arch/arm/ARMnn/ContributorGuide.md b/arch/arm/ARMnn/ContributorGuide.md
new file mode 100644
index 0000000000..a9e6412b7a
--- /dev/null
+++ b/arch/arm/ARMnn/ContributorGuide.md
@@ -0,0 +1,47 @@
+# Contribution Guidelines
+
+The Arm NN project is open for external contributors and welcomes contributions. Arm NN is licensed under the [MIT license](https://spdx.org/licenses/MIT.html) and all accepted contributions must have the same license. For more details on contributing to Arm NN see the [Contributing page](https://mlplatform.org/contributing/) on the [MLPlatform.org](https://mlplatform.org/) website.
+
+## Developer Certificate of Origin (DCO)
+
+Before the Arm NN project accepts your contribution, you need to certify its origin and give us your permission.  To manage this process we use Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/).
+
+To indicate that you agree to the the terms of the DCO, you "sign off" your contribution by adding a line with your name and e-mail address to every git commit message:
+
+Signed-off-by: John Doe <john.doe@example.org>
+
+You must use your real name, no pseudonyms or anonymous contributions are accepted.
+
+## In File Copyright Notice
+
+In each source file, include the following copyright notice:
+
+//  
+// Copyright © `<years additions were made to project> <your name>` and Contributors. All rights reserved.  
+// SPDX-License-Identifier: MIT  
+//
+
+Note: if an existing file does not conform, update it when you next modify it, as convenient.
+
+## Releases
+
+Official Arm NN releases are published through the official [Arm NN Github repository](https://github.com/ARM-software/armnn).
+
+## Development repository
+
+The Arm NN development repository is hosted on the [mlplatform.org git repository](https://git.mlplatform.org/ml/armnn.git/) hosted by [Linaro](https://www.linaro.org/).
+
+## Code reviews
+
+Contributions must go through code review. Code reviews are performed through the [mlplatform.org Gerrit server](https://review.mlplatform.org). Contributors need to signup to this Gerrit server with their GitHub account
+credentials.
+
+Only reviewed contributions can go to the master branch of Arm NN.
+
+## Continuous integration
+
+Contributions to Arm NN go through testing at the Arm CI system. All unit, integration and regression tests must pass before a contribution gets merged to the Arm NN master branch.
+
+## Communications
+
+We encourage all Arm NN developers to subscribe to the [Arm NN developer mailing list](https://lists.linaro.org/mailman/listinfo/armnn-dev).
diff --git a/arch/arm/ARMnn/InstallationViaAptRepository.md b/arch/arm/ARMnn/InstallationViaAptRepository.md
new file mode 100644
index 0000000000..93eb56ef1d
--- /dev/null
+++ b/arch/arm/ARMnn/InstallationViaAptRepository.md
@@ -0,0 +1,186 @@
+# How to install ArmNN via our APT repository on Ubuntu's Launchpad
+
+* [Introduction](#introduction)
+* [Add the Ubuntu Launchpad PPA to your system](#add-the-ubuntu-launchpad-ppa-to-your-system)
+* [Outline of available packages](#outline-of-available-packages)
+  + [x86_64](#x86-64)
+  + [arm64](#arm64)
+  + [armhf](#armhf)
+* [Check latest version of packages](#check-latest-version-of-packages)
+* [Install desired combination of packages](#install-desired-combination-of-packages)
+* [Uninstall packages](#uninstall-packages)
+
+
+## Introduction
+These are the step by step instructions on how to install the Arm NN core, TensorflowLite Parser
+as well as PyArmNN for x86_64, Arm64 and Armhf for Ubuntu 20.04.
+The packages will also be added to Debian Bullseye, their progress can be tracked here:
+https://tracker.debian.org/pkg/armnn
+
+
+## Add the Ubuntu Launchpad PPA to your system
+* Add the PPA to your sources using a command contained in software-properties-common package:
+    ```
+    sudo apt install software-properties-common
+    sudo add-apt-repository ppa:armnn/ppa
+    sudo apt update
+    ```
+* More information about our PPA and the Ubuntu Launchpad service can be found at [launchpad.net](https://launchpad.net/~armnn/+archive/ubuntu/ppa)
+## Outline of available packages
+
+We provide a number of packages for each architecture; x86_64, aarch64 and armhf as outlined below.
+
+ARMNN_MAJOR_VERSION: This is the ABI version of the Arm NN source that has been packaged based on
+include/armnn/Version.hpp.
+
+ARMNN_RELEASE_VERSION: This is the marketing release version based on the date source was released on github e.g. 20.11.
+
+PACKAGE_VERSION: This is the version of the source package used to build the binaries packages from.
+
+### x86_64
+* Runtime Packages
+```
+libarmnn-cpuref-backend{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+libarmnntfliteparser{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+libarmnn{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+python3-pyarmnn_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+```
+* Development Packages
+```
+libarmnn-dev_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+libarmnntfliteparser-dev_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+```
+* Dependency Packages (These are empty packages that provide a user-friendly name for other packages they will install)
+```
+armnn-latest-all_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+armnn-latest-ref_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+```
+### arm64
+* Runtime Packages
+```
+libarmnn-aclcommon{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+libarmnn-cpuacc-backend{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+libarmnn-cpuref-backend{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+libarmnn-gpuacc-backend{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+libarmnntfliteparser{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+libarmnn{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+python3-pyarmnn_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+
+```
+* Development Packages
+```
+libarmnn-dev_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+libarmnntfliteparser-dev_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+
+```
+* Dependency Packages (These are empty packages that provide a user-friendly name for other packages they will install)
+```
+armnn-latest-all_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+armnn-latest-cpu_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+armnn-latest-cpu-gpu_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+armnn-latest-cpu-gpu-ref_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+armnn-latest-gpu_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+armnn-latest-ref_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_arm64.deb
+```
+### armhf
+* Runtime Packages
+```
+libarmnn-aclcommon{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+libarmnn-cpuacc-backend{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+libarmnn-cpuref-backend{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+libarmnn-gpuacc-backend{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+libarmnntfliteparser{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+libarmnn{ARMNN_MAJOR_VERSION}_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+python3-pyarmnn_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+
+```
+* Development Packages
+```
+libarmnn-dev_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+libarmnntfliteparser-dev_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+
+```
+* Dependency Packages (These are empty packages that provide a user-friendly name for other packages they will install)
+```
+armnn-latest-all_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+armnn-latest-cpu_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+armnn-latest-cpu-gpu_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+armnn-latest-cpu-gpu-ref_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+armnn-latest-gpu_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_armhf.deb
+armnn-latest-ref_{ARMNN_RELEASE_VERSION}-{PACKAGE_VERSION}_amd64.deb
+```
+
+## Install desired combination of packages
+The easiest way to install all of the available packages for your systems architecture is to run the command:
+
+```
+ sudo apt-get install -y python3-pyarmnn armnn-latest-all
+ # Verify installation via python:
+ python3 -c "import pyarmnn as ann;print(ann.GetVersion())"
+ # Returns '{ARMNN_MAJOR_VERSION}.0.0' e.g. 27.0.0
+```
+This will install PyArmNN and the three backends for Neon (CpuAcc), OpenCL (GpuAcc) and our Reference Backend.
+It will also install their dependencies including the arm-compute-library package along with the Tensorflow Lite Parser
+and it's dependency Arm NN Core.
+If the user does not wish to use PyArmNN they can go up a level of dependencies and instead just install the
+armnn-latest-all package:
+```
+  # Install ArmNN Core, CpuAcc Backend, GpuAcc Backend and Reference Backend as well as the TensorFlow Lite Parser:
+  # (This will only install CpuAcc and GpuAcc Backends on arm64 and armhf architectures)
+  sudo apt-get install -y armnn-latest-all
+
+  # Install ArmNN Core, CpuAcc Backend as well as the TensorFlow Lite Parser:
+  sudo apt-get install -y armnn-latest-cpu
+
+  # Install ArmNN Core, CpuAcc Backend, GpuAcc Backend as well as the TensorFlow Lite Parser:
+  sudo apt-get install -y armnn-latest-cpu-gpu
+
+  # Install ArmNN Core, GpuAcc Backend as well as the TensorFlow Lite Parser:
+  sudo apt-get install -y armnn-latest-gpu
+
+  # Install ArmNN Core, Reference Backend as well as the TensorFlow Lite Parser:
+  sudo apt-get install -y armnn-latest-ref
+```
+
+## Installation of specific ABI versioned packages
+Due to Debian Packaging requiring the pristine tarball from our Github release, the version on Launchpad may not align
+with the released version on Github depending on the complexity of newly added features.
+In order to check for the latest available Arm NN version use apt-cache search:
+```
+ apt-cache search libarmnn
+
+ # This returns a list of matching packages including versions from previous releases
+ libarmnn-cpuref-backend23 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnn-cpuref-backend24 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnn-dev - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnntfliteparser-dev - Arm NN is an inference engine for CPUs, GPUs and NPUs # Note: removal of dash to suit debian naming conventions
+ libarmnn-tfliteparser23 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnntfliteparser24 - Arm NN is an inference engine for CPUs, GPUs and NPUs # Note: removal of dash to suit debian naming conventions
+ libarmnn23 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnn24 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnn25 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnn-aclcommon23 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnnaclcommon24 - Arm NN is an inference engine for CPUs, GPUs and NPUs # Note: removal of dash to suit debian naming conventions
+ libarmnn-cpuacc-backend23 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnn-cpuacc-backend24 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnn-gpuacc-backend23 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+ libarmnn-gpuacc-backend24 - Arm NN is an inference engine for CPUs, GPUs and NPUs
+
+
+ # Export the ARMNN_MAJOR_VERSION to the latest visible e.g. libarmnn25 to allow installation using the below examples
+ export ARMNN_MAJOR_VERSION=27
+
+  # As the Tensorflow Lite Parser is now ABI stable it will have a different version to ARMNN_MAJOR_VERSION please choose latest version:
+  apt-cache search libarmnntfliteparser
+  # Returns e.g. libarmnntfliteparser24 so we then export that version, for reference this comes from include/armnnTfLiteParser/Version.hpp:
+  export TFLITE_PARSER_VERSION=24
+
+  sudo apt-get install -y libarmnntfliteparser${TFLITE_PARSER_VERSION} libarmnn-cpuacc-backend${ARMNN_MAJOR_VERSION}
+```
+
+## Uninstall packages
+The easiest way to uninstall all of the previously installed packages is to run the command:
+```
+ sudo apt-get purge -y armnn-latest-all
+ sudo apt autoremove -y armnn-latest-all
+```
diff --git a/arch/arm/ARMnn/LICENSE b/arch/arm/ARMnn/LICENSE
new file mode 100644
index 0000000000..af3b51005f
--- /dev/null
+++ b/arch/arm/ARMnn/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 ARM Limited.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/arch/arm/ARMnn/README.md b/arch/arm/ARMnn/README.md
new file mode 100644
index 0000000000..4c7f4a6b1f
--- /dev/null
+++ b/arch/arm/ARMnn/README.md
@@ -0,0 +1,137 @@
+# Introduction
+
+* [Quick Start Guides](#quick-start-guides)
+* [Software tools overview](#software-tools-overview)
+* [Where to find more information](#where-to-find-more-information)
+* [Contributions](#contributions)
+* [Disclaimer](#disclaimer)
+* [License](#license)
+* [Third-Party](#third-party)
+
+Arm NN is a key component of the [machine learning platform](https://mlplatform.org/), which is part of the
+[Linaro Machine Intelligence Initiative](https://www.linaro.org/news/linaro-announces-launch-of-machine-intelligence-initiative/).
+
+The Arm NN SDK is a set of open-source software and tools that enables machine learning workloads on power-efficient
+devices. It provides a bridge between existing neural network frameworks and power-efficient Cortex-A CPUs,
+Arm Mali GPUs and Arm Ethos NPUs.
+
+<img align="center" width="400" src="https://developer.arm.com/-/media/Arm Developer Community/Images/Block Diagrams/Arm-NN/Arm-NN-Frameworks-Diagram.png"/>
+
+Arm NN SDK utilizes the Compute Library to target programmable cores, such as Cortex-A CPUs and Mali GPUs,
+as efficiently as possible. To target Ethos NPUs the NPU-Driver is utilized. We also welcome new contributors to provide
+their [own driver and backend](src/backends/README.md). Note, Arm NN does not provide support for Cortex-M CPUs.
+
+Arm NN support models created with **TensorFlow Lite** (TfLite) and **ONNX**.
+Arm NN analysis a given model and replaces the operations within it with implementations particularly designed for the
+hardware you want to execute it on. This results in a great boost of execution speed. How much faster your neural
+network can be executed depends on the operations it contains and the available hardware. Below you can see the speedup
+we've been experiencing in our experiments with a few common networks.
+
+\image html PerformanceChart.png
+
+Arm NN is written using portable C++14 and the build system uses [CMake](https://cmake.org/), therefore it is possible
+to build for a wide variety of target platforms, from a wide variety of host environments.
+
+
+## Getting started: Quick Start Guides 
+Arm NN has added some quick start guides that will help you to setup Arm NN and run models quickly. The quickest way to build Arm NN is to either use our **Debian package** or use the prebuilt binaries available in the [Assets](https://github.com/ARM-software/armnn/releases) section of every Arm NN release. 
+There is an installation guide available [here](InstallationViaAptRepository.md) which provides step by step instructions on how to install the Arm NN Core,
+the TfLite Parser and PyArmNN for Ubuntu 20.04. These guides can be used with the **prebuilt binaries**. 
+At present we have added a [quick start guide](delegate/DelegateQuickStartGuide.md) that will show you how to integrate the delegate into TfLite to run models using python.
+More guides will be added here in the future.
+
+
+## Software Components overview
+Depending on what kind of framework (Tensorflow Lite, ONNX) you've been using to create your model there are multiple
+software tools available within Arm NN that can serve your needs.
+
+Generally, there is a **parser** available **for each supported framework**. ArmNN-Parsers are C++ libraries that you can integrate into your application to load, optimize and execute your model.
+Each parser allows you to run models from one framework. If you would like to run an ONNX model you can make use of the **Onnx-Parser**. There also is a parser available for TfLite models but the preferred way to execute TfLite models is using our TfLite-Delegate. We also provide **python bindings** for our parsers and the Arm NN core.
+We call the result **PyArmNN**. Therefore your application can be conveniently written in either C++ using the "original"
+Arm NN library or in Python using PyArmNN. You can find tutorials on how to setup and use our parsers in our doxygen
+documentation. The latest version can be found in the [wiki section](https://github.com/ARM-software/armnn/wiki/Documentation)
+of this repository.
+
+Arm NN's software toolkit comes with the **TfLite Delegate** which can be integrated into TfLite.
+TfLite will then delegate operations, that can be accelerated with Arm NN, to Arm NN. Every other operation will still be
+executed with the usual TfLite runtime. This is our **recommended way to accelerate TfLite models**. As with our parsers
+there are tutorials in our doxygen documentation that can be found in the [wiki section](https://github.com/ARM-software/armnn/wiki/Documentation).
+
+If you would like to use **Arm NN on Android** you can follow this guide which explains [how to build Arm NN using the AndroidNDK](BuildGuideAndroidNDK.md).
+But you might also want to take a look at another repository which implements a hardware abstraction layer (HAL) for
+Android. The repository is called [Android-NN-Driver](https://github.com/ARM-software/android-nn-driver) and when
+integrated into Android it will automatically run neural networks with Arm NN.
+
+
+## Where to find more information
+The section above introduces the most important components that Arm NN provides.
+You can find a complete list in our **doxygen documentation**. The
+latest version can be found in the [wiki section](https://github.com/ARM-software/armnn/wiki/Documentation) of our github
+repository.
+
+For FAQs and troubleshooting advice, see [FAQ.md](docs/FAQ.md)
+or take a look at previous [github issues](https://github.com/ARM-software/armnn/issues).
+
+
+## How to get involved
+If you would like to get involved but don't know where to start, a good place to look is in our Github Issues.
+
+Feature requests without a volunteer to implement them are closed, but have the 'Help wanted' label, these can be found
+[here](https://github.com/ARM-software/armnn/issues?q=is%3Aissue+label%3A%22Help+wanted%22+).
+Once you find a suitable Issue, feel free to re-open it and add a comment,
+so that other people know you are working on it and can help.
+
+When the feature is implemented the 'Help wanted' label will be removed.
+
+## Contributions
+The Arm NN project welcomes contributions. For more details on contributing to Arm NN see the [Contributing page](https://mlplatform.org/contributing/)
+on the [MLPlatform.org](https://mlplatform.org/) website, or see the [Contributor Guide](ContributorGuide.md).
+
+Particularly if you'd like to implement your own backend next to our CPU, GPU and NPU backends there are guides for
+backend development:
+[Backend development guide](src/backends/README.md), [Dynamic backend development guide](src/dynamic/README.md)
+
+
+## Disclaimer
+The armnn/tests directory contains tests used during Arm NN development. Many of them depend on third-party IP, model
+protobufs and image files not distributed with Arm NN. The dependencies of some of the tests are available freely on
+the Internet, for those who wish to experiment, but they won't run out of the box.
+
+
+## License
+Arm NN is provided under the [MIT](https://spdx.org/licenses/MIT.html) license.
+See [LICENSE](LICENSE) for more information. Contributions to this project are accepted under the same license.
+
+Individual files contain the following tag instead of the full license text.
+
+    SPDX-License-Identifier: MIT
+
+This enables machine processing of license information based on the SPDX License Identifiers that are available here: http://spdx.org/licenses/
+
+
+## Third-party
+Third party tools used by Arm NN:
+
+| Tool           | License (SPDX ID) | Description                    | Version | Provenience
+|----------------|-------------------|------------------------------------------------------------------|-------------|-------------------
+| cxxopts        | MIT               | A lightweight C++ option parser library | SHA 12e496da3d486b87fa9df43edea65232ed852510 | https://github.com/jarro2783/cxxopts
+| doctest        | MIT               | Header-only C++ testing framework | 2.4.6 | https://github.com/onqtam/doctest
+| fmt            | MIT               | {fmt} is an open-source formatting library providing a fast and safe alternative to C stdio and C++ iostreams. | 7.0.1 | https://github.com/fmtlib/fmt
+| ghc            | MIT               | A header-only single-file std::filesystem compatible helper library | 1.3.2 | https://github.com/gulrak/filesystem
+| half           | MIT               | IEEE 754 conformant 16-bit half-precision floating point library | 1.12.0 | http://half.sourceforge.net
+| mapbox/variant | BSD               | A header-only alternative to 'boost::variant' | 1.1.3 | https://github.com/mapbox/variant
+| stb            | MIT               | Image loader, resize and writer | 2.16 | https://github.com/nothings/stb
+
+
+## Build process
+Arm NN uses the following security related build flags in their code:
+
+| Build flags	      |
+|---------------------|
+| -Wall	              |
+| -Wextra             |
+| -Wold-style-cast    |
+| -Wno-missing-braces |
+| -Wconversion        |
+| -Wsign-conversion   |
+| -Werror             |
diff --git a/arch/arm/ARMnn/SECURITY.md b/arch/arm/ARMnn/SECURITY.md
new file mode 100644
index 0000000000..41ebdbc0c3
--- /dev/null
+++ b/arch/arm/ARMnn/SECURITY.md
@@ -0,0 +1,68 @@
+**Reporting vulnerabilities**
+
+Arm takes security issues seriously and welcomes feedback from researchers and the security community in order to improve the security of its products and services. We operate a coordinated disclosure policy for disclosing vulnerabilities and other security issues.
+
+Security issues can be complex and one single timescale doesn't fit all circumstances. We will make best endeavours to inform you when we expect security notifications and fixes to be available and facilitate coordinated disclosure when notifications and patches/mitigations are available.
+
+**Report**
+
+For all security issues, contact Arm by email at [arm-security@arm.com](mailto:arm-security@arm.com).
+
+**Secure submission using PGP**
+
+We support and encourage secure submission of vulnerability reports using PGP, using the key below. If you would like replies to be encrypted, please provide your own public key through a secure mechanism.
+
+~~~
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+mQINBFr7/RMBEACjHR5QZL/z1t2aLCRNXLE4KJiQmCo7edU5Be+7MTjIJDzZNu68
+lNEUYRoLexeayif8eC4T19bUsSbGpxHiYsFFjV8ewLXDyDJRRuaBGPfQ5rn/mE6X
+Nvu+9Pputr+mB1R3CXcvrNkhmzPkK7zVM15oeyBMhogqPssuT4OeMduQdip8smfK
+xTMk91RrJTLb+G3eE1tf+81kXBYvzp2e24Sn0/VeYe0IWnBobjVBZk3TmcYxDvz5
+Y47fU9V6cNj3Zq4VYrgxuLoFCA2VtetyiFQm5IYa3Bt3SWcAwihr8nbR2HoNdWyA
+u8wJYYVzSq3hvT5l/IjTHxEcY+6RBq8poDSsftzvX386u9hmw7sJQFlTw6/pUjdr
+gbsZ2ZzRBzKtU17ercpn4kU6VgVP3WRB5HiTFFkEpZuqAznOYaHbMq4dfd/g7Quq
+C0VTbWiJnhku2i+g4BdHHRDtIF6U3aVQAfbrDb1LjVTa65p5ULOeY3HRAWtMNtu/
+Cj8cD98JDanzXtcnisds8vMQ8LZ6iMFChEnF8K4V0eLw9Ju6CMNiFYY7SEBndD/H
+M4KcU4li7mROSbJcshgEbe1SYkxdMuI9eY4DNYxl3VjxoPUGzeqXo/ADFKE9bHsi
+GTyEoij4ku0HspLVKnYHXn/LqHGwEcwjF8zphS+w5cn/e01akYwz5EVSQwARAQAB
+tB1Bcm0gU3VwcG9ydCA8c3VwcG9ydEBhcm0uY29tPokCTgQTAQgAOBYhBN9zqDwZ
+RL/vF0ihcdfNKdz4bBRiBQJa+/0TAhsDBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheA
+AAoJENfNKdz4bBRibK0P/jLlJR/QYypcjb+8BnHT9tCDgcV2KFYXS15VpbSNviN9
+Xs/UOnSadoGUMGCXDyb1PRNn98yUn7JlNR9rtsqPRmkpbo5cuw46ehgxjVlfcHnk
+CttaE8Davx6zo0fyrBib2+oVVW8usi9+uRK4vhhPUicO3oXwzldsVFz+RbpubZxc
+Bg/CZ+dQ2jMKOv1zDtInOG6OBnbQZRAeiWXgGhcIoPZ4zBQOi8nr0+bLcfvMeZi2
+uz6uKnylpXwZbl4ijcG8MKV/ei+7du+SzA9NY0WOT2g3FXDREWUhjKs8bmEZgIx4
+QgvDNpxAUThF+TqQ7zrsA8nT8POvDD0MhN/Z+A3QdPTdcaZFaXzIdxbDg+0FKmzu
+OgtQBH4C01RWrkmZlhO5w7/Qjt0vLlhfyQIL9BW/HeEPtjnH2Hnq8xYnZhlVqJEh
+FJU7F9sMvyiJiKviobFTd6AmpVkhxhcJ3k2L2C03imTsmUwAoejQCXwiYcOhyQ2t
+Z9Nk8YIZTEw2urGFi4HSQPwPq2j/2j7ABJ4rlzJvO6vs5ppGkumvzIIP9JnpVXbp
+wcbK6Ev6KdkX4s14Mzd6Hsd8LpL8t5nHhxUey6G0xKe2eSlHVm5Mlfhoete9UmIZ
+dzIOZkgTgWXlYXRIxwGQ2Pss7pURtofykvLklq4jcobQuHxurl9cteodETfbWk/J
+uQINBFr7/RMBEADWZG8eqt5D22g3T7ehnH/T3fuTX8LPUBnODMWGAEUY8uv64To8
+46odvrXFgWBgCf0sROEyJchc3SGLyR9S4lJsVJRde3QLN3WZkHlB4pSn4IQHFyQd
+wsLQi+S9uggHMPlQ6MNvc5n0P3k5bT9fLUmtJWJ3QVjW7k963ZXpzf1zbQJqs30w
+rlqGUZllfRoYQTfcYxFEaUFhwRJ//skNImWH8Vz+PTnqg2zRtFn3usrBV4GpNvsM
+6jy+YEsSvUa7IY8k4wpPzEhIfqDjGbZxFSQ1H1G+mLUL+DD7oGffej/ZoC86TIdM
+p6ew1rGhJdQBLh9nx+1ADOLWjNo2R0h60u7VR5q/K6V4fwWmeGFipPXZCD92I+nR
+t/cjznwNyD/6J9YrBMF7mbGrS1TyfLaLt4tpdcBnsgqDTodd5OmG65mroXsg/lNO
+7YZdecLZ34krfaLrWTtKkqULXbppB+uQvbVj8p8ONRImn6bZ+iAhnNaH9wJ06ico
+b1F0imJ2SJWnFr6PzPRr0gPStLgu9wrRKheaORwF/H/HxSyPZxNVxFqu81q518A/
+plhub9INQLaxHf/TTjXpqZCcfdNTYUAW8rwbQfW9doSIT4lHY8bJXktb6BsVjkFj
+PzDeYpXeOoTWetQqsEuTdg/F+qg041QBLtNj9Lr3Vy3StgMciRUIP8m0XwARAQAB
+iQI2BBgBCAAgFiEE33OoPBlEv+8XSKFx180p3PhsFGIFAlr7/RMCGwwACgkQ180p
+3PhsFGLWMA//V/XKrnI2YBh/SptUrgg7knPXva45bb7tGSH1fJg8f/wqycOSFFCY
+ES45boA5jlQ3z8uw6BYCz5KeOucGhxAMw+x5EDdxZ33ksY5zqXB35WaMXzEwGYYb
+E113/yhOsTbzu4bBKABSXbJO98MdAWvWpyCpp2MHIR3S9+ycM7/FMZ5xi3czZNRg
+9+WZP+7W4qWhJptQ0kBh5C3N/tiltju5WQ2Y7XIn+5dMOJdtseFS7CNerxXZGAtH
+nfRxaD/4ENdbWOwaVJiVW7+ioUJz09OWgy0gLYSDW+hciDnW1QAaJLpdAbniGZ0S
+JsTmaZla8JnUKqZPgbFfA2OcnH9H+DWc0pHv17c5tJzTMP7rgirgGRX/U2LOzmFZ
+1UxjQj5nn3Oa5frXbIAzb8xKiR0VDaquCM/3sti1AesYiS0Gw0Sqnw8qpFypgFXN
+CKVgYXppIT+TmbDbNJDOB2UycxeI4vbiBwU8fI4qSpW12WsGdAJt/rx3UsyhZ+02
+4aSqDHzhJmtDPQ6lnaKe1fUkC90tgp8loVGmriWQx82jAQMqATVjIklTpE4vm00f
+ocQIWOKEE90mKNEoV6rNbfl5QevmapTVdV/pmrRBzhbsa1uAUS4HZdH0Nf/OXEyv
+yYCr2gCFPymkkRYhPr2w5EgbWyzLaBIwqjyIbXaveuB3DYi2Lhbf64I=
+=EaN7
+-----END PGP PUBLIC KEY BLOCK-----
+~~~
+
+For more information visit https://developer.arm.com/support/arm-security-updates/report-security-vulnerabilities
diff --git a/arch/arm/ARMnn/cmake/AddDllCopyCommands.cmake b/arch/arm/ARMnn/cmake/AddDllCopyCommands.cmake
new file mode 100644
index 0000000000..671bc7d97f
--- /dev/null
+++ b/arch/arm/ARMnn/cmake/AddDllCopyCommands.cmake
@@ -0,0 +1,39 @@
+macro (addDllCopyCommand target sourceDebug sourceRelease)
+    add_custom_command(TARGET ${target} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        "$<$<CONFIG:Debug>:${sourceDebug}>$<$<CONFIG:Release>:${sourceRelease}>$<$<CONFIG:RelWithDebInfo>:${sourceRelease}>$<$<CONFIG:MinSizeRel>:${sourceRelease}>"
+        $<TARGET_FILE_DIR:${target}>)
+endmacro()
+
+# Checks if the given list contains an entry which matches the given regex.
+function(listContainsRegex result list regex)
+    set(${result} 0 PARENT_SCOPE)
+    foreach(element ${list})
+        if(${element} MATCHES ${regex})
+            set(${result} 1 PARENT_SCOPE)
+            return()
+        endif()
+    endforeach()
+endfunction()
+
+macro(addDllCopyCommands target)
+    if(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+        # Get the list of dependencies for the given target, so we can copy just the DLLs we need.
+        get_target_property(target_deps_str ${target} LINK_LIBRARIES)
+        set(target_deps)
+        list(APPEND target_deps ${target_deps_str})
+
+        cmake_policy(SET CMP0057 NEW) # Enable the "IN_LIST" operator
+
+        # armnn.dll
+        if ("armnn" IN_LIST target_deps)
+            addDllCopyCommand(${target} "$<TARGET_FILE_DIR:armnn>/armnn.dll" "$<TARGET_FILE_DIR:armnn>/armnn.dll")
+        endif()
+
+        # armnnTfLiteParser.dll
+        if ("armnnTfLiteParser" IN_LIST target_deps)
+            addDllCopyCommand(${target} "$<TARGET_FILE_DIR:armnnTfLiteParser>/armnnTfLiteParser.dll"
+                                        "$<TARGET_FILE_DIR:armnnTfLiteParser>/armnnTfLiteParser.dll")
+        endif()
+    endif()
+endmacro()
diff --git a/arch/arm/ARMnn/cmake/ArmnnConfig.cmake.in b/arch/arm/ARMnn/cmake/ArmnnConfig.cmake.in
new file mode 100644
index 0000000000..e608f86b6a
--- /dev/null
+++ b/arch/arm/ARMnn/cmake/ArmnnConfig.cmake.in
@@ -0,0 +1,15 @@
+
+get_filename_component(ARMNN_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" DIRECTORY)
+set(ARMNN_CONFIG_FILE ${CMAKE_CURRENT_LIST_FILE})
+MESSAGE(STATUS "Found Armnn: ${ARMNN_CONFIG_FILE}")
+
+include(CMakeFindDependencyMacro)
+
+list(APPEND CMAKE_MODULE_PATH ${ARMNN_CMAKE_DIR})
+
+if(NOT TARGET Armnn::Armnn)
+    MESSAGE(STATUS "Armnn Import: ${ARMNN_CMAKE_DIR}/ArmnnTargets.cmake")
+    include("${ARMNN_CMAKE_DIR}/ArmnnTargets.cmake")
+endif()
+
+set(ARMNN_LIBRARIES Armnn::Armnn)
\ No newline at end of file
diff --git a/arch/arm/ARMnn/cmake/ArmnnVersion.cmake b/arch/arm/ARMnn/cmake/ArmnnVersion.cmake
new file mode 100644
index 0000000000..435e6bef37
--- /dev/null
+++ b/arch/arm/ARMnn/cmake/ArmnnVersion.cmake
@@ -0,0 +1,19 @@
+#
+# Copyright © 2019 Arm Ltd. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+# Read the ArmNN version components from file
+file(READ ${CMAKE_CURRENT_LIST_DIR}/../include/armnn/Version.hpp armnnVersion)
+
+# Parse the ArmNN version components
+string(REGEX MATCH "#define ARMNN_MAJOR_VERSION ([0-9]*)" _ ${armnnVersion})
+set(ARMNN_MAJOR_VERSION ${CMAKE_MATCH_1})
+string(REGEX MATCH "#define ARMNN_MINOR_VERSION ([0-9]*)" _ ${armnnVersion})
+set(ARMNN_MINOR_VERSION ${CMAKE_MATCH_1})
+
+# Define LIB version
+set(GENERIC_LIB_VERSION "${ARMNN_MAJOR_VERSION}.${ARMNN_MINOR_VERSION}")
+
+# Define LIB soversion
+set(GENERIC_LIB_SOVERSION "${ARMNN_MAJOR_VERSION}")
diff --git a/arch/arm/ARMnn/cmake/DelegateVersion.cmake b/arch/arm/ARMnn/cmake/DelegateVersion.cmake
new file mode 100644
index 0000000000..caaede85bc
--- /dev/null
+++ b/arch/arm/ARMnn/cmake/DelegateVersion.cmake
@@ -0,0 +1,18 @@
+#
+# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+# Read the ArmNN Delegate version components from file
+file(READ ${CMAKE_CURRENT_LIST_DIR}/../delegate/include/Version.hpp delegateVersion)
+
+# Parse the ArmNN Delegate version components
+string(REGEX MATCH "#define DELEGATE_MAJOR_VERSION ([0-9]*)" _ ${delegateVersion})
+set(DELEGATE_MAJOR_VERSION ${CMAKE_MATCH_1})
+string(REGEX MATCH "#define DELEGATE_MINOR_VERSION ([0-9]*)" _ ${delegateVersion})
+set(DELEGATE_MINOR_VERSION ${CMAKE_MATCH_1})
+
+# Define LIB version
+set(DELEGATE_LIB_VERSION "${DELEGATE_MAJOR_VERSION}.${DELEGATE_MINOR_VERSION}")
+# Define LIB soversion
+set(DELEGATE_LIB_SOVERSION "${DELEGATE_MAJOR_VERSION}")
\ No newline at end of file
diff --git a/arch/arm/ARMnn/cmake/GlobalConfig.cmake b/arch/arm/ARMnn/cmake/GlobalConfig.cmake
new file mode 100644
index 0000000000..2c5bc837e6
--- /dev/null
+++ b/arch/arm/ARMnn/cmake/GlobalConfig.cmake
@@ -0,0 +1,410 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# Copyright 2020 NXP
+# SPDX-License-Identifier: MIT
+#
+option(BUILD_ONNX_PARSER "Build Onnx parser" OFF)
+option(BUILD_UNIT_TESTS "Build unit tests" ON)
+option(BUILD_TESTS "Build test applications" OFF)
+option(BUILD_FOR_COVERAGE "Use no optimization and output .gcno and .gcda files" OFF)
+option(ARMCOMPUTENEON "Build with ARM Compute NEON support" OFF)
+option(ARMCOMPUTECL "Build with ARM Compute OpenCL support" OFF)
+option(ARMNNREF "Build with ArmNN reference support" ON)
+option(PROFILING_BACKEND_STREAMLINE "Forward the armNN profiling events to DS-5/Streamline as annotations" OFF)
+# options used for heap profiling and leak checking
+option(HEAP_PROFILING "Build with heap profiling enabled" OFF)
+option(LEAK_CHECKING "Build with leak checking enabled" OFF)
+option(GPERFTOOLS_ROOT "Location where the gperftools 'include' and 'lib' folders to be found" Off)
+# options used for tensorflow lite support
+option(BUILD_TF_LITE_PARSER "Build Tensorflow Lite parser" OFF)
+option(BUILD_ARMNN_SERIALIZER "Build Armnn Serializer" OFF)
+option(BUILD_ACCURACY_TOOL "Build Accuracy Tool" OFF)
+option(FLATC_DIR "Path to Flatbuffers compiler" OFF)
+option(TF_LITE_GENERATED_PATH "Tensorflow lite generated C++ schema location" OFF)
+option(FLATBUFFERS_ROOT "Location where the flatbuffers 'include' and 'lib' folders to be found" Off)
+option(DYNAMIC_BACKEND_PATHS "Colon seperated list of paths where to load the dynamic backends from" "")
+option(SAMPLE_DYNAMIC_BACKEND "Include the sample dynamic backend and its tests in the build" OFF)
+option(BUILD_GATORD_MOCK "Build the Gatord simulator for external profiling testing." ON)
+option(BUILD_TIMELINE_DECODER "Build the Timeline Decoder for external profiling." ON)
+option(BUILD_BASE_PIPE_SERVER "Build the server to handle external profiling pipe traffic" ON)
+option(BUILD_PYTHON_WHL "Build Python wheel package" OFF)
+option(BUILD_PYTHON_SRC "Build Python source package" OFF)
+option(BUILD_STATIC_PIPE_LIBS "Build Static PIPE libraries" OFF)
+option(BUILD_PIPE_ONLY "Build the PIPE libraries only" OFF)
+option(BUILD_ARMNN_TFLITE_DELEGATE "Build the Arm NN TfLite delegate" OFF)
+option(BUILD_MEMORY_STRATEGY_BENCHMARK "Build the MemoryBenchmark" OFF)
+
+include(SelectLibraryConfigurations)
+
+set(COMPILER_IS_GNU_LIKE 0)
+if(${CMAKE_CXX_COMPILER_ID} STREQUAL GNU OR
+   ${CMAKE_CXX_COMPILER_ID} STREQUAL Clang OR
+   ${CMAKE_CXX_COMPILER_ID} STREQUAL AppleClang)
+    set(COMPILER_IS_GNU_LIKE 1)
+endif()
+
+# Enable CCache if available and not disabled
+option(USE_CCACHE "USE_CCACHE" ON)
+find_program(CCACHE_FOUND ccache)
+if(CCACHE_FOUND AND USE_CCACHE)
+    get_property(rule_launch_compile DIRECTORY PROPERTY RULE_LAUNCH_COMPILE)
+    set_property(DIRECTORY PROPERTY RULE_LAUNCH_COMPILE "CCACHE_CPP2=yes ${rule_launch_compile} ccache")
+endif()
+
+# Enable distcc if available and not disabled
+option(USE_DISTCC "USE_DISTCC" OFF)
+find_program(DISTCC_FOUND distcc)
+if(DISTCC_FOUND AND USE_DISTCC)
+    get_property(rule_launch_compile DIRECTORY PROPERTY RULE_LAUNCH_COMPILE)
+    set_property(DIRECTORY PROPERTY RULE_LAUNCH_COMPILE "${rule_launch_compile} distcc")
+endif()
+
+# Set to release configuration by default
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release")
+endif()
+
+# Compiler flags that are always set
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+if(COMPILER_IS_GNU_LIKE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -Wall -Wextra -Werror -Wold-style-cast -Wno-missing-braces -Wconversion -Wsign-conversion")
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -Wno-psabi")
+    endif()
+elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+	# Disable C4996 (use of deprecated identifier) due to https://developercommunity.visualstudio.com/content/problem/252574/deprecated-compilation-warning-for-virtual-overrid.html
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /MP /wd4996")
+    add_definitions(-DNO_STRICT=1)
+endif()
+if("${CMAKE_SYSTEM_NAME}" STREQUAL Android)
+    # -lz is necessary for when building with ACL set with compressed kernels
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -llog -lz")
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -llog -lz")
+endif()
+
+# Compiler flags for Release builds
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG")
+if(COMPILER_IS_GNU_LIKE)
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MD /O2")
+endif()
+
+# Compiler flags for Debug builds
+if(COMPILER_IS_GNU_LIKE)
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O0")
+elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MDd /ZI /Od")
+    # Disable SAFESEH which is necessary for Edit and Continue to work
+    set(CMAKE_EXE_LINKER_FLAGS_DEBUG  "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /SAFESEH:NO")
+    set(CMAKE_SHARED_LINKER_FLAGS_DEBUG  "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /SAFESEH:NO")
+endif()
+
+# Modify RelWithDebInfo so that NDEBUG isn't defined.
+# This enables asserts.
+if (COMPILER_IS_GNU_LIKE)
+    string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+    string(REPLACE "/DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+endif()
+
+# Compiler flags for code coverage measurements
+if(BUILD_FOR_COVERAGE)
+    if(NOT CMAKE_BUILD_TYPE EQUAL "Debug")
+        message(WARNING "BUILD_FOR_COVERAGE set so forcing to Debug build")
+        set(CMAKE_BUILD_TYPE "Debug")
+    endif()
+
+    set(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} --coverage")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
+endif()
+
+if(BUILD_FOR_COVERAGE AND NOT BUILD_UNIT_TESTS)
+    message(WARNING "BUILD_FOR_COVERAGE set but not BUILD_UNIT_TESTS, so code coverage will not be able to run")
+endif()
+
+set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
+
+include(CMakeFindDependencyMacro)
+
+if (NOT BUILD_PIPE_ONLY)
+  # cxxopts (Alternative to boost::program_options)
+  find_path(CXXOPTS_INCLUDE cxxopts/cxxopts.hpp PATHS third-party NO_CMAKE_FIND_ROOT_PATH)
+  include_directories(SYSTEM "${CXXOPTS_INCLUDE}")
+endif()
+
+if (NOT BUILD_PIPE_ONLY)
+  # ghc (Alternative to boost::filesystem)
+  find_path(GHC_INCLUDE ghc/filesystem.hpp PATHS third-party NO_CMAKE_FIND_ROOT_PATH)
+  include_directories(SYSTEM "${GHC_INCLUDE}")
+endif()
+
+# pthread
+find_dependency(Threads)
+
+# Favour the protobuf passed on command line
+if(BUILD_ONNX_PARSER)
+    find_library(PROTOBUF_LIBRARY_DEBUG NAMES "protobufd"
+        PATHS ${PROTOBUF_ROOT}/lib
+        NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+    find_library(PROTOBUF_LIBRARY_DEBUG NAMES "protobufd")
+
+    find_library(PROTOBUF_LIBRARY_RELEASE NAMES "protobuf"
+        PATHS ${PROTOBUF_ROOT}/lib
+        NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+    find_library(PROTOBUF_LIBRARY_RELEASE NAMES "protobuf")
+
+    select_library_configurations(PROTOBUF)
+
+    find_path(PROTOBUF_INCLUDE_DIRS "google/protobuf/message.h"
+              PATHS ${PROTOBUF_ROOT}/include
+              NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+    find_path(PROTOBUF_INCLUDE_DIRS "google/protobuf/message.h")
+
+    include_directories(SYSTEM "${PROTOBUF_INCLUDE_DIRS}")
+    add_definitions(-DPROTOBUF_USE_DLLS)
+
+    add_definitions(-DARMNN_ONNX_PARSER)
+
+    find_path(ONNX_GENERATED_SOURCES "onnx/onnx.pb.cc")
+
+    # C++ headers generated for onnx protobufs
+    include_directories(SYSTEM "${ONNX_GENERATED_SOURCES}")
+endif()
+
+if(BUILD_ARMNN_TFLITE_DELEGATE)
+    add_definitions(-DARMNN_TFLITE_DELEGATE)
+endif()
+# Flatbuffers support for TF Lite and Armnn Serializer
+if(BUILD_TF_LITE_PARSER OR BUILD_ARMNN_SERIALIZER)
+    # verify we have a valid flatbuffers include path
+    find_path(FLATBUFFERS_INCLUDE_PATH flatbuffers/flatbuffers.h
+              HINTS ${FLATBUFFERS_ROOT}/include /usr/local/include /usr/include)
+
+    message(STATUS "Flatbuffers headers are located at: ${FLATBUFFERS_INCLUDE_PATH}")
+
+    find_library(FLATBUFFERS_LIBRARY
+                 NAMES libflatbuffers.a flatbuffers
+                 HINTS ${FLATBUFFERS_ROOT}/lib /usr/local/lib /usr/lib)
+
+    message(STATUS "Flatbuffers library located at: ${FLATBUFFERS_LIBRARY}")
+endif()
+
+# Flatbuffers schema support for TF Lite
+if(BUILD_TF_LITE_PARSER)
+    find_path(TF_LITE_SCHEMA_INCLUDE_PATH
+              schema_generated.h
+              HINTS ${TF_LITE_GENERATED_PATH})
+
+    message(STATUS "Tf Lite generated header found at: ${TF_LITE_SCHEMA_INCLUDE_PATH}")
+
+    add_definitions(-DARMNN_TF_LITE_PARSER)
+endif()
+
+if(BUILD_ARMNN_SERIALIZER)
+    add_definitions(-DARMNN_SERIALIZER)
+    add_definitions(-DARMNN_SERIALIZER_SCHEMA_PATH="${CMAKE_CURRENT_SOURCE_DIR}/src/armnnSerializer/ArmnnSchema.fbs")
+endif()
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/profiling)
+
+# ARM Compute
+# Note that ARM Compute has a different folder layout depending on the branch but also on
+# whether it comes from a prepackaged archive (this is why we add several hints below)
+if(ARMCOMPUTENEON OR ARMCOMPUTECL)
+    find_path(ARMCOMPUTE_INCLUDE arm_compute/core/CL/OpenCL.h
+              PATHS ${ARMCOMPUTE_ROOT}/include
+              PATHS ${ARMCOMPUTE_ROOT}/applications/arm_compute
+              PATHS ${ARMCOMPUTE_ROOT}
+              NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+    find_path(ARMCOMPUTE_INCLUDE arm_compute/core/CL/OpenCL.h)
+    include_directories(SYSTEM "${ARMCOMPUTE_INCLUDE}")
+
+    # Find the Arm Compute libraries if not already specified (the user may have already defined this in advance,
+    # e.g. if building clframework as a dependent cmake project)
+    if (NOT DEFINED ARMCOMPUTE_LIBRARIES)
+        # We link to the static variant so that customers don't need to find and build a compatible version of clframework.
+        # First try the folders specified ARMCOMPUTE_BUILD_DIR (with PATH_SUFFIXES for
+        # Windows builds)
+        if ((NOT DEFINED ARMCOMPUTE_BUILD_DIR) AND (DEFINED ARMCOMPUTE_ROOT))
+            # Default build directory for ComputeLibrary is under the root
+            set(ARMCOMPUTE_BUILD_DIR ${ARMCOMPUTE_ROOT}/build)
+        endif()
+
+        find_library(ARMCOMPUTE_LIBRARY_DEBUG NAMES arm_compute-static
+                     PATHS ${ARMCOMPUTE_BUILD_DIR}
+                     PATH_SUFFIXES "Debug"
+                     NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+        find_library(ARMCOMPUTE_LIBRARY_RELEASE NAMES arm_compute-static
+                     PATHS ${ARMCOMPUTE_BUILD_DIR}
+                     PATH_SUFFIXES "Release"
+                     NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+        find_library(ARMCOMPUTE_CORE_LIBRARY_DEBUG NAMES arm_compute_core-static
+                     PATHS ${ARMCOMPUTE_BUILD_DIR}
+                     PATH_SUFFIXES "Debug"
+                     NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+        find_library(ARMCOMPUTE_CORE_LIBRARY_RELEASE NAMES arm_compute_core-static
+                     PATHS ${ARMCOMPUTE_BUILD_DIR}
+                     PATH_SUFFIXES "Release"
+                     NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+
+        # In case it wasn't there, try a default search (will work in cases where
+        # the library has been installed into a standard location)
+        find_library(ARMCOMPUTE_LIBRARY_DEBUG NAMES arm_compute-static)
+        find_library(ARMCOMPUTE_LIBRARY_RELEASE NAMES arm_compute-static)
+        find_library(ARMCOMPUTE_CORE_LIBRARY_DEBUG NAMES arm_compute_core-static)
+        find_library(ARMCOMPUTE_CORE_LIBRARY_RELEASE NAMES arm_compute_core-static)
+
+        # In case it wasn't there, try the dynamic libraries
+        # This case will get used in a linux setup where the Compute Library
+        # has been installed in a standard system library path as a dynamic library
+        find_library(ARMCOMPUTE_LIBRARY_DEBUG NAMES arm_compute)
+        find_library(ARMCOMPUTE_LIBRARY_RELEASE NAMES arm_compute)
+        find_library(ARMCOMPUTE_CORE_LIBRARY_DEBUG NAMES arm_compute_core)
+        find_library(ARMCOMPUTE_CORE_LIBRARY_RELEASE NAMES arm_compute_core)
+
+        set(ARMCOMPUTE_LIBRARIES
+            debug ${ARMCOMPUTE_LIBRARY_DEBUG} ${ARMCOMPUTE_CORE_LIBRARY_DEBUG}
+            optimized ${ARMCOMPUTE_LIBRARY_RELEASE} ${ARMCOMPUTE_CORE_LIBRARY_RELEASE} )
+    endif()
+endif()
+
+# ARM Compute NEON backend
+if(ARMCOMPUTENEON)
+    # Add preprocessor definition for ARM Compute NEON
+    add_definitions(-DARMCOMPUTENEON_ENABLED)
+    # The ARM Compute headers contain some NEON intrinsics, so we need to build armnn with NEON support on armv7
+    if(${CMAKE_SYSTEM_PROCESSOR} MATCHES armv7 AND COMPILER_IS_GNU_LIKE)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
+    endif()
+endif()
+
+# ARM Compute OpenCL backend
+if(ARMCOMPUTECL)
+    # verify we have a valid flatbuffers include path
+    find_path(FLATBUFFERS_INCLUDE_PATH flatbuffers/flatbuffers.h
+              HINTS ${FLATBUFFERS_ROOT}/include /usr/local/include /usr/include)
+
+    message(STATUS "Flatbuffers headers are located at: ${FLATBUFFERS_INCLUDE_PATH}")
+
+    find_library(FLATBUFFERS_LIBRARY
+                 NAMES libflatbuffers.a flatbuffers
+                 HINTS ${FLATBUFFERS_ROOT}/lib /usr/local/lib /usr/lib)
+
+    message(STATUS "Flatbuffers library located at: ${FLATBUFFERS_LIBRARY}")
+
+    # Always use Arm compute library OpenCL headers
+    find_path(OPENCL_INCLUDE CL/opencl.hpp
+              PATHS ${ARMCOMPUTE_ROOT}/include
+              NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+
+    # Link against libOpenCL in opencl-1.2-stubs, but don't search there at runtime
+    link_libraries(-L${ARMCOMPUTE_BUILD_DIR}/opencl-1.2-stubs)
+    set(OPENCL_LIBRARIES OpenCL)
+
+    include_directories(SYSTEM ${OPENCL_INCLUDE})
+
+    # Add preprocessor definition for ARM Compute OpenCL
+    add_definitions(-DARMCOMPUTECL_ENABLED)
+
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DARM_COMPUTE_DEBUG_ENABLED")
+endif()
+
+# Used by both Arm Compute backends, but should be added
+# to the search path after the system directories if necessary
+if(ARMCOMPUTENEON OR ARMCOMPUTECL)
+    find_path(HALF_INCLUDE half/half.hpp)
+    find_path(HALF_INCLUDE half/half.hpp
+              PATHS ${ARMCOMPUTE_ROOT}/include
+              NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+    include_directories(SYSTEM ${HALF_INCLUDE})
+endif()
+
+# ArmNN reference backend
+if(ARMNNREF)
+    add_definitions(-DARMNNREF_ENABLED)
+endif()
+
+# This is the root for the dynamic backend tests to search for dynamic
+# backends. By default it will be the project build directory.
+add_definitions(-DDYNAMIC_BACKEND_BUILD_DIR="${PROJECT_BINARY_DIR}")
+
+# ArmNN dynamic backend
+if(DYNAMIC_BACKEND_PATHS)
+    add_definitions(-DARMNN_DYNAMIC_BACKEND_ENABLED)
+endif()
+
+if(SAMPLE_DYNAMIC_BACKEND)
+    add_definitions(-DSAMPLE_DYNAMIC_BACKEND_ENABLED)
+endif()
+
+# Streamline annotate
+if(PROFILING_BACKEND_STREAMLINE)
+    include_directories("${GATOR_ROOT}/annotate")
+    add_definitions(-DARMNN_STREAMLINE_ENABLED)
+endif()
+
+if(HEAP_PROFILING OR LEAK_CHECKING)
+    # enable heap profiling for everything except for referencetests
+    if(NOT ${PROJECT_NAME} STREQUAL "referencetests")
+        find_path(HEAP_PROFILER_INCLUDE gperftools/heap-profiler.h
+                PATHS ${GPERFTOOLS_ROOT}/include
+                NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+        include_directories(SYSTEM "${HEAP_PROFILER_INCLUDE}")
+        find_library(GPERF_TOOLS_LIBRARY
+                    NAMES tcmalloc_debug
+                    HINTS ${GPERFTOOLS_ROOT}/lib)
+        link_directories(${GPERFTOOLS_ROOT}/lib)
+
+        link_libraries(${GPERF_TOOLS_LIBRARY})
+        if (HEAP_PROFILING)
+            add_definitions("-DARMNN_HEAP_PROFILING_ENABLED=1")
+        endif()
+        if (LEAK_CHECKING)
+            add_definitions("-DARMNN_LEAK_CHECKING_ENABLED=1")
+        endif()
+    else()
+        message(STATUS "Heap profiling and leak checking are disabled for referencetests")
+    endif()
+else()
+    # Valgrind only works with gperftools version number <= 2.4
+    CHECK_INCLUDE_FILE(valgrind/memcheck.h VALGRIND_FOUND)
+endif()
+
+
+if(NOT BUILD_TF_LITE_PARSER)
+    message(STATUS "Tensorflow Lite parser support is disabled")
+endif()
+
+if(NOT BUILD_ARMNN_SERIALIZER)
+    message(STATUS "Armnn Serializer support is disabled")
+endif()
+
+if(NOT BUILD_PYTHON_WHL)
+    message(STATUS "PyArmNN wheel package is disabled")
+endif()
+
+if(NOT BUILD_PYTHON_SRC)
+    message(STATUS "PyArmNN source package is disabled")
+endif()
+
+if(BUILD_PYTHON_WHL OR BUILD_PYTHON_SRC)
+    find_package(PythonInterp 3 REQUIRED)
+    if(NOT ${PYTHONINTERP_FOUND})
+        message(FATAL_ERROR "Python 3.x required to build PyArmNN, but not found")
+    endif()
+
+    find_package(PythonLibs 3 REQUIRED)
+    if(NOT ${PYTHONLIBS_FOUND})
+        message(FATAL_ERROR "Python 3.x development package required to build PyArmNN, but not found")
+    endif()
+
+    find_package(SWIG 4 REQUIRED)
+    if(NOT ${SWIG_FOUND})
+        message(FATAL_ERROR "SWIG 4.x requried to build PyArmNN, but not found")
+    endif()
+endif()
+
+# ArmNN source files required for all build options
+include_directories(SYSTEM third-party)
diff --git a/arch/arm/ARMnn/cmake/ParserVersion.cmake b/arch/arm/ARMnn/cmake/ParserVersion.cmake
new file mode 100644
index 0000000000..44b1709880
--- /dev/null
+++ b/arch/arm/ARMnn/cmake/ParserVersion.cmake
@@ -0,0 +1,35 @@
+#
+# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+# Read the OnnxParser version components from file
+file(READ ${CMAKE_CURRENT_LIST_DIR}/../include/armnnOnnxParser/Version.hpp onnxVersion)
+
+# Parse the OnnxParser version components
+string(REGEX MATCH "#define ONNX_PARSER_MAJOR_VERSION ([0-9]*)" _ ${onnxVersion})
+set(ONNX_PARSER_MAJOR_VERSION ${CMAKE_MATCH_1})
+string(REGEX MATCH "#define ONNX_PARSER_MINOR_VERSION ([0-9]*)" _ ${onnxVersion})
+set(ONNX_PARSER_MINOR_VERSION ${CMAKE_MATCH_1})
+
+# Define LIB version
+set(ONNX_PARSER_LIB_VERSION "${ONNX_PARSER_MAJOR_VERSION}.${ONNX_PARSER_MINOR_VERSION}")
+
+# Define LIB soversion
+set(ONNX_PARSER_LIB_SOVERSION "${ONNX_PARSER_MAJOR_VERSION}")
+
+
+# Read the TfLiteParser version components from file
+file(READ ${CMAKE_CURRENT_LIST_DIR}/../include/armnnTfLiteParser/Version.hpp tfLiteVersion)
+
+# Parse the TfLiteParser version components
+string(REGEX MATCH "#define TFLITE_PARSER_MAJOR_VERSION ([0-9]*)" _ ${tfLiteVersion})
+set(TFLITE_PARSER_MAJOR_VERSION ${CMAKE_MATCH_1})
+string(REGEX MATCH "#define TFLITE_PARSER_MINOR_VERSION ([0-9]*)" _ ${tfLiteVersion})
+set(TFLITE_PARSER_MINOR_VERSION ${CMAKE_MATCH_1})
+
+# Define LIB version
+set(TFLITE_PARSER_LIB_VERSION "${TFLITE_PARSER_MAJOR_VERSION}.${TFLITE_PARSER_MINOR_VERSION}")
+
+# Define LIB soversion
+set(TFLITE_PARSER_LIB_SOVERSION "${TFLITE_PARSER_MAJOR_VERSION}")
\ No newline at end of file
diff --git a/arch/arm/ARMnn/cmake/Utils.cmake b/arch/arm/ARMnn/cmake/Utils.cmake
new file mode 100644
index 0000000000..3a9d93a15c
--- /dev/null
+++ b/arch/arm/ARMnn/cmake/Utils.cmake
@@ -0,0 +1,43 @@
+# Function which creates appropriate "source groups" (filter folders in Visual Studio) for the given list of source files
+function(createSourceGroups source1)
+    set(sources ${source1} ${ARGN})
+    foreach(source ${sources})
+        get_filename_component(source_path ${source} PATH)
+        string(REPLACE "/" "\\" source_path_backslashes "${source_path}")
+        source_group(${source_path_backslashes} FILES ${source})
+    endforeach()
+endfunction()
+
+# Further processes a target and its list of source files adding extra touches useful for some generators
+# (filter folders, group targets in folders, etc.).
+# All optional arguments are treated as additional source files.
+function(setup_target targetName source1)
+    set(sources ${source1} ${ARGN})
+
+    createSourceGroups(${sources})
+
+    # Enable USE_FOLDERS. This is required by the set_target_properties(... FOLDER ...) call below.
+    # We prefer to set it here rather than globally at the top of the file so that we only modify
+    # the Cmake environment if/when the functionality is actually required.
+    set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+    file(RELATIVE_PATH projectFolder ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+    set_target_properties(${targetName} PROPERTIES FOLDER "${projectFolder}")
+endfunction()
+
+# Convenience replacement of add_executable(), which besides adding an executable to the project
+# further configures the target via setup_target().
+# All optional arguments are treated as additional source files.
+function(add_executable_ex targetName source1)
+    set(sources ${source1} ${ARGN})
+    add_executable(${targetName} ${sources})
+    setup_target(${targetName} ${sources})
+endfunction()
+
+# Convenience replacement of add_library(), which besides adding a library to the project
+# further configures the target via setup_target().
+# All optional arguments are treated as additional source files.
+function(add_library_ex targetName libraryType source1)
+    set(sources ${source1} ${ARGN})
+    add_library(${targetName} ${libraryType} ${sources})
+    setup_target(${targetName} ${sources})
+endfunction()
diff --git a/arch/arm/ARMnn/delegate/BuildGuideNative.md b/arch/arm/ARMnn/delegate/BuildGuideNative.md
new file mode 100644
index 0000000000..4aa1af3ee9
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/BuildGuideNative.md
@@ -0,0 +1,261 @@
+# Delegate build guide introduction
+
+The Arm NN Delegate can be found within the Arm NN repository but it is a standalone piece of software. However,
+it makes use of the Arm NN library. For this reason we have added two options to build the delegate. The first option
+allows you to build the delegate together with the Arm NN library, the second option is a standalone build 
+of the delegate.
+
+This tutorial uses an Aarch64 machine with Ubuntu 18.04 installed that can build all components
+natively (no cross-compilation required). This is to keep this guide simple.
+
+**Table of content:**
+- [Delegate build guide introduction](#delegate-build-guide-introduction)
+- [Dependencies](#dependencies)
+   * [Download Arm NN](#download-arm-nn)
+   * [Build Tensorflow Lite for C++](#build-tensorflow-lite-for-c--)
+   * [Build Flatbuffers](#build-flatbuffers)
+   * [Build the Arm Compute Library](#build-the-arm-compute-library)
+   * [Build the Arm NN Library](#build-the-arm-nn-library)
+- [Build the TfLite Delegate (Stand-Alone)](#build-the-tflite-delegate--stand-alone-)
+- [Build the Delegate together with Arm NN](#build-the-delegate-together-with-arm-nn)
+- [Integrate the Arm NN TfLite Delegate into your project](#integrate-the-arm-nn-tflite-delegate-into-your-project)
+
+
+# Dependencies
+
+Build Dependencies:
+ * Tensorflow Lite: this guide uses version 2.5.0. Other versions may work.
+ * Flatbuffers 1.12.0
+ * Arm NN 21.11 or higher
+
+Required Tools:
+ * Git. This guide uses version 2.17.1. Other versions might work.
+ * pip. This guide uses version 20.3.3. Other versions might work.
+ * wget. This guide uses version 1.17.1. Other versions might work.
+ * zip. This guide uses version 3.0. Other versions might work.
+ * unzip. This guide uses version 6.00. Other versions might work.
+ * cmake 3.16.0 or higher. This guide uses version 3.16.0
+ * scons. This guide uses version 2.4.1. Other versions might work.
+
+Our first step is to build all the build dependencies I have mentioned above. We will have to create quite a few
+directories. To make navigation a bit easier define a base directory for the project. At this stage we can also
+install all the tools that are required during the build. This guide assumes you are using a Bash shell.
+```bash
+export BASEDIR=~/ArmNNDelegate
+mkdir $BASEDIR
+cd $BASEDIR
+apt-get update && apt-get install git wget unzip zip python git cmake scons
+```
+
+## Download Arm NN
+
+First clone Arm NN using Git.
+
+```bash
+cd $BASEDIR
+git clone "https://review.mlplatform.org/ml/armnn" 
+cd armnn
+git checkout <branch_name> # e.g. branches/armnn_21_11
+```
+
+## Build Tensorflow Lite for C++
+Tensorflow has a few dependencies on it's own. It requires the python packages pip3, numpy,
+and also Bazel or CMake which are used to compile Tensorflow. A description on how to build bazel can be
+found [here](https://docs.bazel.build/versions/master/install-compile-source.html). But for this guide, we will
+compile with CMake. Depending on your operating system and architecture there might be an easier way.
+```bash
+wget -O cmake-3.16.0.tar.gz https://cmake.org/files/v3.16/cmake-3.16.0.tar.gz
+tar -xzf cmake-3.16.0.tar.gz -C $BASEDIR/
+
+# If you have an older CMake, remove installed in order to upgrade
+yes | sudo apt-get purge cmake
+hash -r
+
+cd $BASEDIR/cmake-3.16.0 
+./bootstrap 
+make 
+sudo make install 
+```
+
+### Download and build Tensorflow Lite
+Arm NN provides a script, armnn/scripts/get_tensorflow.sh, that can be used to download the version of TensorFlow that Arm NN was tested with:
+```bash
+cd $BASEDIR
+git clone https://github.com/tensorflow/tensorflow.git
+cd tensorflow/
+git checkout $(../armnn/scripts/get_tensorflow.sh -p) # Minimum version required for the delegate is v2.3.1
+```
+
+Now the build process can be started. When calling "cmake", as below, you can specify a number of build
+flags. But if you have no need to configure your tensorflow build, you can follow the exact commands below:
+```bash
+mkdir build # You are already inside $BASEDIR/tensorflow at this point
+cd build
+cmake $BASEDIR/tensorflow/tensorflow/lite -DTFLITE_ENABLE_XNNPACK=OFF
+cmake --build . # This will be your DTFLITE_LIB_ROOT directory
+```
+
+## Build Flatbuffers
+Flatbuffers is a memory efficient cross-platform serialization library as 
+described [here](https://google.github.io/flatbuffers/). It is used in tflite to store models and is also a dependency 
+of the delegate. After downloading the right version it can be built and installed using cmake.
+```bash
+cd $BASEDIR
+wget -O flatbuffers-1.12.0.zip https://github.com/google/flatbuffers/archive/v1.12.0.zip
+unzip -d . flatbuffers-1.12.0.zip
+cd flatbuffers-1.12.0 
+mkdir install && mkdir build && cd build
+# I'm using a different install directory but that is not required
+cmake .. -DCMAKE_INSTALL_PREFIX:PATH=$BASEDIR/flatbuffers-1.12.0/install 
+make install
+```
+
+## Build the Arm Compute Library
+
+The Arm NN library depends on the Arm Compute Library (ACL). It provides a set of functions that are optimized for 
+both Arm CPUs and GPUs. The Arm Compute Library is used directly by Arm NN to run machine learning workloads on 
+Arm CPUs and GPUs.
+
+It is important to have the right version of ACL and Arm NN to make it work. Arm NN and ACL are developed very closely 
+and released together. If you would like to use the Arm NN version "21.11" you should use the same "21.11" version for 
+ACL too. Arm NN provides a script, armnn/scripts/get_compute_library.sh, that can be used to download the exact version 
+of Arm Compute Library that Arm NN was tested with.
+
+To build the Arm Compute Library on your platform, download the Arm Compute Library and checkout the tag that contains 
+the version you want to use. Build it using `scons`.
+
+```bash
+cd $BASEDIR
+git clone https://review.mlplatform.org/ml/ComputeLibrary 
+cd ComputeLibrary/
+git checkout $(../armnn/scripts/get_compute_library.sh -p) # e.g. v21.11
+# The machine used for this guide only has a Neon CPU which is why I only have "neon=1" but if 
+# your machine has an arm Gpu you can enable that by adding `opencl=1 embed_kernels=1 to the command below
+scons arch=arm64-v8a neon=1 extra_cxx_flags="-fPIC" benchmark_tests=0 validation_tests=0 
+```
+
+## Build the Arm NN Library
+
+With ACL built we can now continue to build Arm NN. Create a build directory and use `cmake` to build it.
+```bash
+cd $BASEDIR
+cd armnn
+mkdir build && cd build
+# if you've got an arm Gpu add `-DARMCOMPUTECL=1` to the command below
+cmake .. -DARMCOMPUTE_ROOT=$BASEDIR/ComputeLibrary -DARMCOMPUTENEON=1 -DBUILD_UNIT_TESTS=0 
+make
+```
+
+# Build the TfLite Delegate (Stand-Alone)
+
+The delegate as well as Arm NN is built using `cmake`. Create a build directory as usual and build the delegate
+with the additional cmake arguments shown below
+```bash
+cd $BASEDIR/armnn/delegate && mkdir build && cd build
+cmake .. -DCMAKE_BUILD_TYPE=release                               # A release build rather than a debug build.
+         -DTENSORFLOW_ROOT=$BASEDIR/tensorflow \                  # The root directory where tensorflow can be found.
+         -DTFLITE_LIB_ROOT=$BASEDIR/tensorflow/build \               # Directory where tensorflow libraries can be found.
+         -DFLATBUFFERS_ROOT=$BASEDIR/flatbuffers-1.12.0/install \ # Flatbuffers install directory.
+         -DArmnn_DIR=$BASEDIR/armnn/build \                       # Directory where the Arm NN library can be found
+         -DARMNN_SOURCE_DIR=$BASEDIR/armnn                        # The top directory of the Arm NN repository. 
+                                                                  # Required are the includes for Arm NN
+make
+```
+
+To ensure that the build was successful you can run the unit tests for the delegate that can be found in 
+the build directory for the delegate. [Doctest](https://github.com/onqtam/doctest) was used to create those tests. Using test filters you can
+filter out tests that your build is not configured for. In this case, because Arm NN was only built for Cpu 
+acceleration (CpuAcc), we filter for all test suites that have `CpuAcc` in their name.
+```bash
+cd $BASEDIR/armnn/delegate/build
+./DelegateUnitTests --test-suite=*CpuAcc* 
+```
+If you have built for Gpu acceleration as well you might want to change your test-suite filter:
+```bash
+./DelegateUnitTests --test-suite=*CpuAcc*,*GpuAcc*
+```
+
+# Build the Delegate together with Arm NN
+
+In the introduction it was mentioned that there is a way to integrate the delegate build into Arm NN. This is
+pretty straight forward. The cmake arguments that were previously used for the delegate have to be added
+to the Arm NN cmake arguments. Also another argument `BUILD_ARMNN_TFLITE_DELEGATE` needs to be added to 
+instruct Arm NN to build the delegate as well. The new commands to build Arm NN are as follows:
+
+Download Arm NN if you have not already done so:
+```bash
+cd $BASEDIR
+git clone "https://review.mlplatform.org/ml/armnn" 
+cd armnn
+git checkout <branch_name> # e.g. branches/armnn_21_11
+```
+Build Arm NN with the delegate included
+```bash
+cd $BASEDIR
+cd armnn
+rm -rf build # Remove any previous cmake build.
+mkdir build && cd build
+# if you've got an arm Gpu add `-DARMCOMPUTECL=1` to the command below
+cmake .. -DARMCOMPUTE_ROOT=$BASEDIR/ComputeLibrary \
+         -DARMCOMPUTENEON=1 \
+         -DBUILD_UNIT_TESTS=0 \
+         -DBUILD_ARMNN_TFLITE_DELEGATE=1 \
+         -DTENSORFLOW_ROOT=$BASEDIR/tensorflow \
+         -DTFLITE_LIB_ROOT=$BASEDIR/tensorflow/build \
+         -DFLATBUFFERS_ROOT=$BASEDIR/flatbuffers-1.12.0/install
+make
+```
+The delegate library can then be found in `build/armnn/delegate`.
+
+# Test the Arm NN delegate using the [TFLite Model Benchmark Tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark)
+
+The TFLite Model Benchmark Tool has a useful command line interface to test delegates. We can use this to demonstrate the use of the Arm NN delegate and its options.
+
+Some examples of this can be viewed in this [YouTube demonstration](https://www.youtube.com/watch?v=NResQ1kbm-M&t=920s).
+
+## Download the TFLite Model Benchmark Tool
+
+Binary builds of the benchmarking tool for various platforms are available [here](https://www.tensorflow.org/lite/performance/measurement#native_benchmark_binary). In this example I will target an aarch64 Linux environment. I will also download a sample uint8 tflite model from the [Arm ML Model Zoo](https://github.com/ARM-software/ML-zoo).
+
+```bash
+mkdir $BASEDIR/benchmarking
+cd $BASEDIR/benchmarking
+# Get the benchmarking binary.
+wget https://storage.googleapis.com/tensorflow-nightly-public/prod/tensorflow/release/lite/tools/nightly/latest/linux_aarch64_benchmark_model -O benchmark_model
+# Make it executable.
+chmod +x benchmark_model
+# and a sample model from model zoo.
+wget https://github.com/ARM-software/ML-zoo/blob/master/models/image_classification/mobilenet_v2_1.0_224/tflite_uint8/mobilenet_v2_1.0_224_quantized_1_default_1.tflite?raw=true -O mobilenet_v2_1.0_224_quantized_1_default_1.tflite
+```
+
+## Execute the benchmarking tool with the Arm NN delegate
+You are already at $BASEDIR/benchmarking from the previous stage.
+```bash
+LD_LIBRARY_PATH=../armnn/build ./benchmark_model --graph=mobilenet_v2_1.0_224_quantized_1_default_1.tflite --external_delegate_path="../armnn/build/delegate/libarmnnDelegate.so" --external_delegate_options="backends:CpuAcc;logging-severity:info"
+```
+The "external_delegate_options" here are specific to the Arm NN delegate. They are used to specify a target Arm NN backend or to enable/disable various options in Arm NN. A full description can be found in the parameters of function tflite_plugin_create_delegate.
+
+# Integrate the Arm NN TfLite Delegate into your project
+
+The delegate can be integrated into your c++ project by creating a TfLite Interpreter and 
+instructing it to use the Arm NN delegate for the graph execution. This should look similiar
+to the following code snippet.
+```objectivec
+// Create TfLite Interpreter
+std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter)
+
+// Create the Arm NN Delegate
+armnnDelegate::DelegateOptions delegateOptions(backends);
+std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                     armnnDelegate::TfLiteArmnnDelegateDelete);
+
+// Instruct the Interpreter to use the armnnDelegate
+armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get());
+```
+
+For further information on using TfLite Delegates please visit the [tensorflow website](https://www.tensorflow.org/lite/guide)
+
+For more details of the kind of options you can pass to the Arm NN delegate please check the parameters of function tflite_plugin_create_delegate.
diff --git a/arch/arm/ARMnn/delegate/CMakeLists.txt b/arch/arm/ARMnn/delegate/CMakeLists.txt
new file mode 100644
index 0000000000..bae1d31e71
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/CMakeLists.txt
@@ -0,0 +1,308 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+cmake_minimum_required (VERSION 3.7.0)
+project(armnnDelegate)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -Wall -Wextra -Werror -Wold-style-cast -Wno-missing-braces -Wconversion -Wsign-conversion")
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules/")
+
+set(armnnDelegate_sources)
+list(APPEND armnnDelegate_sources
+        include/armnn_delegate.hpp
+        include/DelegateOptions.hpp
+        include/Version.hpp
+        src/armnn_delegate.cpp
+        src/armnn_external_delegate.cpp
+        src/DelegateOptions.cpp
+        src/Activation.hpp
+        src/ArgMinMax.hpp
+        src/BatchSpace.hpp
+        src/Comparison.hpp
+        src/Convolution.hpp
+        src/Control.hpp
+        src/DelegateUtils.hpp
+        src/ElementwiseBinary.hpp
+        src/ElementwiseUnary.hpp
+        src/Fill.hpp
+        src/FullyConnected.hpp
+        src/Gather.hpp
+        src/LogicalBinary.hpp
+        src/Lstm.hpp
+        src/MultiLayerFacade.hpp
+        src/Normalization.hpp
+        src/Pack.hpp
+        src/Pad.hpp
+        src/Pooling.hpp
+        src/Prelu.hpp
+        src/Quantization.hpp
+        src/Redefine.hpp
+        src/Reduce.hpp
+        src/Resize.hpp
+        src/Round.hpp
+        src/Shape.hpp
+        src/SharedFunctions.hpp
+        src/SharedFunctions.cpp
+        src/Slice.hpp
+        src/Softmax.hpp
+        src/SpaceDepth.hpp
+        src/Split.hpp
+        src/Unpack.hpp
+        src/Transpose.hpp)
+
+add_library(armnnDelegate SHARED ${armnnDelegate_sources})
+
+target_include_directories(armnnDelegate
+        PUBLIC
+            $<INSTALL_INTERFACE:include>
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+        PRIVATE
+            ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+include(GNUInstallDirs)
+
+## Add Armnn as a Dependency
+if(NOT ARMNN_SUB_PROJECT)
+    find_package(Armnn REQUIRED CONFIG HINTS ${Armnn_DIR})
+endif()
+target_link_libraries(armnnDelegate PUBLIC Armnn::Armnn)
+
+## Add TfLite dependency
+find_package(TfLiteSrc REQUIRED MODULE)
+find_package(TfLite REQUIRED MODULE)
+
+target_link_libraries(armnnDelegate PUBLIC ${TfLite_LIB})
+
+#  lpthread and ldl are not required for Android
+if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL Android)
+    target_link_libraries(armnnDelegate PUBLIC -lpthread)
+    target_link_libraries(armnnDelegate PUBLIC -ldl)
+endif()
+
+# Various tflite header files are not warning clean
+# We can't change compilation flags on header files directly, so we need to add them to an interface library first
+add_library(tflite_headers INTERFACE)
+target_include_directories(tflite_headers INTERFACE $<BUILD_INTERFACE:${TfLite_INCLUDE_DIR}>
+                                                    $<INSTALL_INTERFACE:include/tflite_headers>)
+
+target_compile_options(tflite_headers INTERFACE -Wno-conversion
+                                                -Wno-sign-conversion
+                                                -Wno-unused-parameter
+                                                -Wno-unused-function)
+
+target_link_libraries(armnnDelegate PUBLIC tflite_headers)
+
+## Add Flatbuffers dependency
+find_package(Flatbuffers REQUIRED MODULE)
+
+target_link_libraries(armnnDelegate PRIVATE
+        ${Flatbuffers_LIB})
+
+# include/flatbuffers/flatbuffers.h is not warning clean
+# We can't change compilation flags on header files directly, so we need to add them to an interface library first
+add_library(flatbuffer_headers INTERFACE)
+target_include_directories(flatbuffer_headers INTERFACE $<BUILD_INTERFACE:${Flatbuffers_INCLUDE_DIR}>
+                                                 $<INSTALL_INTERFACE:include/flatbuffer_headers>)
+target_compile_options(flatbuffer_headers INTERFACE -Wno-sign-conversion)
+
+target_link_libraries(armnnDelegate PUBLIC flatbuffer_headers)
+
+# Add libraries from armnn third-party libraries
+# Third-party header files are not warning clean
+# We can't change compilation flags on header files directly, so we need to add them to an interface library first
+add_library(thirdparty_headers INTERFACE)
+target_include_directories(thirdparty_headers INTERFACE $<BUILD_INTERFACE:${ARMNN_SOURCE_DIR}/third-party>
+                                                        $<INSTALL_INTERFACE:include/thirdparty_headers>)
+
+target_compile_options(thirdparty_headers INTERFACE -Wno-old-style-cast)
+target_link_libraries(armnnDelegate PUBLIC thirdparty_headers)
+
+add_library(profiling_library_headers INTERFACE)
+target_include_directories(profiling_library_headers INTERFACE $<BUILD_INTERFACE:${ARMNN_SOURCE_DIR}/profiling>
+                                                              $<INSTALL_INTERFACE:include/profiling_library_headers>)
+target_link_libraries(armnnDelegate PUBLIC profiling_library_headers)
+target_link_libraries(armnnDelegate PUBLIC Armnn::armnnUtils)
+
+set_target_properties(armnnDelegate PROPERTIES VERSION ${DELEGATE_LIB_VERSION} SOVERSION ${DELEGATE_LIB_SOVERSION})
+
+option(BUILD_UNIT_TESTS "Build unit tests" ON)
+if(BUILD_UNIT_TESTS)
+    set(armnnDelegate_unittest_sources)
+    list(APPEND armnnDelegate_unittest_sources
+        src/test/ActivationTest.cpp
+        src/test/ActivationTestHelper.hpp
+        src/test/ArgMinMaxTest.cpp
+        src/test/ArgMinMaxTestHelper.hpp
+        src/test/ArmnnDelegateTest.cpp
+        src/test/BatchSpaceTest.cpp
+        src/test/BatchSpaceTestHelper.hpp
+        src/test/CastTest.cpp
+        src/test/CastTestHelper.hpp
+        src/test/ComparisonTest.cpp
+        src/test/ComparisonTestHelper.hpp
+        src/test/ControlTest.cpp
+        src/test/ControlTestHelper.hpp
+        src/test/Convolution2dTest.cpp
+        src/test/Convolution3dTest.cpp
+        src/test/ConvolutionTestHelper.hpp
+        src/test/DelegateOptionsTest.cpp
+        src/test/DelegateOptionsTestHelper.hpp
+        src/test/DepthwiseConvolution2dTest.cpp
+        src/test/ElementwiseBinaryTest.cpp
+        src/test/ElementwiseBinaryTestHelper.hpp
+        src/test/ElementwiseUnaryTest.cpp
+        src/test/ElementwiseUnaryTestHelper.hpp
+        src/test/FillTest.cpp
+        src/test/FillTestHelper.hpp
+        src/test/FullyConnectedTest.cpp
+        src/test/FullyConnectedTestHelper.hpp
+        src/test/GatherTest.cpp
+        src/test/GatherTestHelper.hpp
+        src/test/LogicalTest.cpp
+        src/test/LogicalTestHelper.hpp
+        src/test/LstmTest.cpp
+        src/test/LstmTestHelper.hpp
+        src/test/MirrorPadTest.cpp
+        src/test/NormalizationTest.cpp
+        src/test/NormalizationTestHelper.hpp
+        src/test/PackTest.cpp
+        src/test/PackTestHelper.hpp
+        src/test/PadTest.cpp
+        src/test/PadTestHelper.hpp
+        src/test/Pooling2dTest.cpp
+        src/test/Pooling2dTestHelper.hpp
+        src/test/PreluTest.cpp
+        src/test/PreluTestHelper.hpp
+        src/test/QuantizationTest.cpp
+        src/test/QuantizationTestHelper.hpp
+        src/test/RedefineTestHelper.hpp
+        src/test/ReduceTest.cpp
+        src/test/ReduceTestHelper.hpp
+        src/test/ReshapeTest.cpp
+        src/test/ResizeTest.cpp
+        src/test/ResizeTestHelper.hpp
+        src/test/RoundTest.cpp
+        src/test/RoundTestHelper.hpp
+        src/test/SoftmaxTest.cpp
+        src/test/SoftmaxTestHelper.hpp
+        src/test/SpaceDepthTest.cpp
+        src/test/SpaceDepthTestHelper.hpp
+        src/test/ShapeTest.cpp
+        src/test/ShapeTestHelper.hpp
+        src/test/SliceTest.cpp
+        src/test/SliceTestHelper.hpp
+        src/test/SplitTest.cpp
+        src/test/SplitTestHelper.hpp
+        src/test/TestUtils.hpp
+        src/test/TestUtils.cpp
+        src/test/TransposeTest.cpp
+        src/test/TransposeTestHelper.hpp
+        src/test/UnidirectionalSequenceLstmTest.cpp
+        src/test/UnidirectionalSequenceLstmTestHelper.hpp
+        src/test/UnpackTest.cpp
+        src/test/UnpackTestHelper.hpp)
+
+        # There's a known Android NDK bug which causes a subset of NeonLayerTests to
+        # fail. We'll exclude these tests in NeonLayerTests_NDK_Bug.cpp if we're doing
+        # a debug build and NDK is less than r21.
+        # https://github.com/android/ndk/issues/1135
+
+        # Default to always including these tests.
+        set(INCLUDE_NDK_BUG_TESTS "ON")
+        # Reconsider if we in a debug build.
+        string( TOLOWER ${CMAKE_BUILD_TYPE} BUILD_TYPE_LOWERCASE )
+        if ( NOT BUILD_TYPE_LOWERCASE STREQUAL "release" )
+            message("CMAKE:: BUILD TYPE IS ${CMAKE_BUILD_TYPE}")
+            # And NDK_VERSION has been set.
+            if ( DEFINED NDK_VERSION )
+                message("CMAKE:: NDK DEFINED")
+                # And the version is less than r21.
+                if ( ${NDK_VERSION} STRLESS "r21" )
+                    message("CMAKE:: BUG TESTS OFF")
+                    set(INCLUDE_NDK_BUG_TESTS "OFF")
+                endif()
+            endif()
+        endif()
+
+        if ( INCLUDE_NDK_BUG_TESTS STREQUAL "ON" )
+           list(APPEND armnnDelegate_unittest_sources
+                src/test/NeonDelegateTests_NDK_Issue.cpp
+                )
+        else()
+
+        endif()
+
+    add_executable(DelegateUnitTests ${armnnDelegate_unittest_sources})
+
+    # Add half library from armnn third-party libraries
+    target_link_libraries(DelegateUnitTests PRIVATE thirdparty_headers)
+
+    target_link_libraries(DelegateUnitTests PRIVATE armnnDelegate)
+    target_link_libraries(DelegateUnitTests PRIVATE Armnn::armnnUtils)
+
+    target_link_libraries(DelegateUnitTests PRIVATE tflite_headers)
+    target_link_libraries(DelegateUnitTests PRIVATE flatbuffer_headers)
+    target_link_libraries(DelegateUnitTests PRIVATE profiling_library_headers)
+
+endif()
+
+####################################################
+## Export targets
+set(armnn_delegate_export_targets)
+list(APPEND armnn_delegate_export_targets
+            armnnDelegate
+            tflite_headers
+            flatbuffer_headers
+            profiling_library_headers
+            thirdparty_headers)
+
+install(
+        TARGETS ${armnn_delegate_export_targets}
+        EXPORT  armnn-delegate-targets
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+## Set export alias
+set_target_properties(armnnDelegate
+        PROPERTIES
+        EXPORT_NAME ArmnnDelegate)
+
+## Export target scrips
+install(
+        EXPORT      armnn-delegate-targets
+        FILE        ArmnnDelegateTargets.cmake
+        NAMESPACE   ArmnnDelegate::
+        DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
+## Create ArmnnDelegateConfig.cmake
+include(CMakePackageConfigHelpers)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR})
+message(STATUS "CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_DIR}" )
+message(STATUS "CMAKE_CURRENT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}" )
+SET(Armnn_DIR "${Armnn_DIR}")
+
+configure_package_config_file(
+        ${CMAKE_CURRENT_LIST_DIR}/cmake/Modules/ArmnnDelegateConfig.cmake.in
+        ${CMAKE_CURRENT_BINARY_DIR}/ArmnnDelegateConfig.cmake
+        INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+        PATH_VARS  Armnn_DIR)
+
+## Install ArmNN Delegate config file
+install(
+        FILES
+        ${CMAKE_CURRENT_BINARY_DIR}/ArmnnDelegateConfig.cmake
+        DESTINATION ${INSTALL_CONFIGDIR})
+
+## Export from build tree
+export(
+        EXPORT      armnn-delegate-targets
+        FILE        ${CMAKE_CURRENT_BINARY_DIR}/ArmnnDelegateTargets.cmake
+        NAMESPACE   ArmnnDelegate::)
+add_library(ArmnnDelegate::ArmnnDelegate ALIAS armnnDelegate)
+
+
+####################################################
diff --git a/arch/arm/ARMnn/delegate/DelegateQuickStartGuide.md b/arch/arm/ARMnn/delegate/DelegateQuickStartGuide.md
new file mode 100644
index 0000000000..ce58624677
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/DelegateQuickStartGuide.md
@@ -0,0 +1,108 @@
+# TfLite Delegate Quick Start Guide
+If you have downloaded the Arm NN Github binaries or built the TfLite delegate yourself, then this tutorial will show you how you can
+integrate it into TfLite to run models using python.
+
+Here is an example python script showing how to do this. In this script we are making use of the 
+[external adaptor](https://www.tensorflow.org/lite/performance/implementing_delegate#option_2_leverage_external_delegate) 
+tool of TfLite that allows you to load delegates at runtime.
+```python
+import numpy as np
+import tflite_runtime.interpreter as tflite
+
+# Load TFLite model and allocate tensors.
+# (if you are using the complete tensorflow package you can find load_delegate in tf.experimental.load_delegate)
+armnn_delegate = tflite.load_delegate( library="<path-to-armnn-binaries>/libarmnnDelegate.so",
+                                       options={"backends": "CpuAcc,GpuAcc,CpuRef", "logging-severity":"info"})
+# Delegates/Executes all operations supported by Arm NN to/with Arm NN
+interpreter = tflite.Interpreter(model_path="<your-armnn-repo-dir>/delegate/python/test/test_data/mock_model.tflite", 
+                                 experimental_delegates=[armnn_delegate])
+interpreter.allocate_tensors()
+
+# Get input and output tensors.
+input_details = interpreter.get_input_details()
+output_details = interpreter.get_output_details()
+
+# Test model on random input data.
+input_shape = input_details[0]['shape']
+input_data = np.array(np.random.random_sample(input_shape), dtype=np.uint8)
+interpreter.set_tensor(input_details[0]['index'], input_data)
+
+interpreter.invoke()
+
+# Print out result
+output_data = interpreter.get_tensor(output_details[0]['index'])
+print(output_data)
+```
+
+# Prepare the environment
+Pre-requisites:
+ * Dynamically build Arm NN Delegate library or download the Arm NN binaries
+ * python3 (Depends on TfLite version)
+ * virtualenv
+ * numpy (Depends on TfLite version)
+ * tflite_runtime (>=2.5, depends on Arm NN Delegate)
+
+If you haven't built the delegate yet then take a look at the [build guide](./BuildGuideNative.md). Otherwise, 
+you can download the binaries [here](https://github.com/ARM-software/armnn/releases/)
+
+We recommend creating a virtual environment for this tutorial. For the following code to work python3 is needed. Please
+also check the documentation of the TfLite version you want to use. There might be additional prerequisites for the python
+version. We will use Tensorflow Lite 2.5.0 for this guide.
+```bash
+# Install python3 (We ended up with python3.5.3) and virtualenv
+sudo apt-get install python3-pip
+sudo pip3 install virtualenv
+
+# create a virtual environment
+cd your/tutorial/dir
+# creates a directory myenv at the current location
+virtualenv -p python3 myenv 
+# activate the environment
+source myenv/bin/activate
+```
+
+Now that the environment is active we can install additional packages we need for our example script. As you can see 
+in the python script at the start of this page, this tutorial uses the `tflite_runtime` rather than the whole tensorflow 
+package. The `tflite_runtime` is a package that wraps the TfLite Interpreter. Therefore it can only be used to run inferences of 
+TfLite models. But since Arm NN is only an inference engine itself this is a perfect match. The 
+`tflite_runtime` is also much smaller than the whole tensorflow package and better suited to run models on 
+mobile and embedded devices.
+
+The TfLite [website](https://www.tensorflow.org/lite/guide/python) shows you two methods to download the `tflite_runtime`  package. 
+In our experience, the use of the pip command works for most systems including debian. However, if you're using an older version of Tensorflow, 
+you may need to build the pip package from source. You can find more information [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/pip_package/README.md).
+But in our case, with Tensorflow Lite 2.5.0, we can install through:
+
+```
+pip3 install --extra-index-url https://google-coral.github.io/py-repo/ tflite_runtime
+```
+
+Your virtual environment is now all setup. Copy the final python script into a python file e.g. 
+`ExternalDelegatePythonTutorial.py`. Modify the python script above and replace `<path-to-armnn-binaries>` and 
+`<your-armnn-repo-dir>` with the directories you have set up. If you've been using the [native build guide](./BuildGuideNative.md) 
+this will be `$BASEDIR/armnn/build` and `$BASEDIR/armnn`.
+
+Finally, execute the script:
+```bash
+python ExternalDelegatePythonTutorial.py
+```
+The output should look similar to this:
+```bash
+Info: Arm NN v28.0.0
+
+Info: Initialization time: 0.56 ms
+
+INFO: TfLiteArmnnDelegate: Created TfLite Arm NN delegate.
+[[ 12 123  16  12  11  14  20  16  20  12]]
+Info: Shutdown time: 0.28 ms
+```
+
+For more details of the kind of options you can pass to the Arm NN delegate please check the parameters of function tflite_plugin_create_delegate.
+
+You can also test the functionality of the external delegate adaptor by running some unit tests:
+```bash
+pip install pytest
+cd armnn/delegate/python/test
+# You can deselect tests that require backends that your hardware doesn't support using markers e.g. -m "not GpuAccTest"
+pytest --delegate-dir="<path-to-armnn-binaries>/libarmnnDelegate.so" -m "not GpuAccTest"
+```
diff --git a/arch/arm/ARMnn/delegate/README.md b/arch/arm/ARMnn/delegate/README.md
new file mode 100644
index 0000000000..c6bd51cabb
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/README.md
@@ -0,0 +1,6 @@
+# The Arm NN TensorFlow Lite delegate
+
+'armnnDelegate' is a library for accelerating certain TensorFlow Lite operators on Arm hardware by providing
+the TensorFlow Lite interpreter with an alternative implementation of the operators via its delegation mechanism.
+
+For more information, like a list of supported operators, please visit our [doxygen documentation](https://arm-software.github.io/armnn/latest/delegate.xhtml).
diff --git a/arch/arm/ARMnn/delegate/cmake/Modules/ArmnnDelegateConfig.cmake.in b/arch/arm/ARMnn/delegate/cmake/Modules/ArmnnDelegateConfig.cmake.in
new file mode 100644
index 0000000000..c878c46ad3
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/cmake/Modules/ArmnnDelegateConfig.cmake.in
@@ -0,0 +1,23 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+get_filename_component(ARMNN_DELEGATE_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" DIRECTORY)
+set(ARMNN_DELEGATE_CONFIG_FILE ${CMAKE_CURRENT_LIST_FILE})
+MESSAGE(STATUS "Found ArmnnDelegate: ${ARMNN_DELEGATE_CONFIG_FILE}")
+
+include(CMakeFindDependencyMacro)
+
+list(APPEND CMAKE_MODULE_PATH ${ARMNN_DELEGATE_CMAKE_DIR})
+
+@PACKAGE_INIT@
+set_and_check(Armnn_DIR "@Armnn_DIR@")
+find_dependency(Armnn REQUIRED CONFIG HINTS ${Armnn_DIR})
+
+if(NOT TARGET ArmnnDelegate::ArmnnDelegate)
+    MESSAGE(STATUS "ArmnnDelegate Import: ${ARMNN_DELEGATE_CMAKE_DIR}/ArmnnDelegateTargets.cmake")
+    include("${ARMNN_DELEGATE_CMAKE_DIR}/ArmnnDelegateTargets.cmake")
+endif()
+
+set(ARMNN_DELEGATE_LIBRARIES ArmnnDelegate::ArmnnDelegate)
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/cmake/Modules/FindFlatbuffers.cmake b/arch/arm/ARMnn/delegate/cmake/Modules/FindFlatbuffers.cmake
new file mode 100644
index 0000000000..13d6f917b3
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/cmake/Modules/FindFlatbuffers.cmake
@@ -0,0 +1,32 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+include(FindPackageHandleStandardArgs)
+unset(FLATBUFFERS_FOUND)
+
+find_path(Flatbuffers_INCLUDE_DIR
+        flatbuffers/flatbuffers.h
+        HINTS
+            ${FLATBUFFERS_ROOT}/include
+            /usr/local/include
+            /usr/include)
+
+find_library(Flatbuffers_LIB
+        NAMES
+            libflatbuffers.a
+            flatbuffers
+        HINTS
+            ${FLATBUFFERS_ROOT}/lib
+            /usr/local/lib
+            /usr/lib)
+
+## Set FLATBUFFERS_FOUND
+find_package_handle_standard_args(Flatbuffers DEFAULT_MSG Flatbuffers_INCLUDE_DIR Flatbuffers_LIB)
+
+## Set external variables for usage in CMakeLists.txt
+if(FLATBUFFERS_FOUND)
+    set(Flatbuffers_LIB ${Flatbuffers_LIB})
+    set(Flatbuffers_INCLUDE_DIR ${Flatbuffers_INCLUDE_DIR})
+endif()
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/cmake/Modules/FindTfLite.cmake b/arch/arm/ARMnn/delegate/cmake/Modules/FindTfLite.cmake
new file mode 100644
index 0000000000..907c3847cb
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/cmake/Modules/FindTfLite.cmake
@@ -0,0 +1,53 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+include(FindPackageHandleStandardArgs)
+unset(TFLITE_FOUND)
+
+#
+# NOTE: this module is used to find the tensorflow lite binary libraries only
+#       the FindTfLiteSrc.cmake module is used to find the tensorflow lite include directory.
+#       This is to allow components like the Tensorflow lite parser that have a source dependency
+#       on tensorflow lite headers but no need to link to the binary libraries to use only the sources
+#       and not have an artificial dependency on the libraries.
+#
+
+# First look for the static version of tensorflow lite
+find_library(TfLite_LIB NAMES "libtensorflow-lite.a" HINTS ${TFLITE_LIB_ROOT} ${TFLITE_LIB_ROOT}/tensorflow/lite)
+# If not found then, look for the dynamic library of tensorflow lite
+find_library(TfLite_LIB NAMES "libtensorflow_lite_all.so" "libtensorflowlite.so" HINTS ${TFLITE_LIB_ROOT} ${TFLITE_LIB_ROOT}/tensorflow/lite)
+
+# If the static library was found, gather all of its dependencies
+if (TfLite_LIB MATCHES .a$)
+    message("-- Static tensorflow lite library found, using for ArmNN build")
+    find_library(TfLite_abseilstrings_LIB "libabsl_strings.a"
+                 PATH ${TFLITE_LIB_ROOT}/_deps/abseil-cpp-build/absl/strings)
+    find_library(TfLite_farmhash_LIB "libfarmhash.a"
+                 PATH ${TFLITE_LIB_ROOT}/_deps/farmhash-build)
+    find_library(TfLite_fftsg_LIB "libfft2d_fftsg.a"
+                 PATH ${TFLITE_LIB_ROOT}/_deps/fft2d-build)
+    find_library(TfLite_fftsg2d_LIB "libfft2d_fftsg2d.a"
+                 PATH ${TFLITE_LIB_ROOT}/_deps/fft2d-build)
+    find_library(TfLite_ruy_LIB "libruy.a" PATH
+                 ${TFLITE_LIB_ROOT}/_deps/ruy-build)
+    find_library(TfLite_flatbuffers_LIB "libflatbuffers.a"
+                 PATH ${TFLITE_LIB_ROOT}/_deps/flatbuffers-build)
+
+    ## Set TFLITE_FOUND if all libraries are satisfied for static lib
+    find_package_handle_standard_args(TfLite DEFAULT_MSG TfLite_LIB TfLite_abseilstrings_LIB TfLite_ruy_LIB TfLite_fftsg_LIB TfLite_fftsg2d_LIB TfLite_farmhash_LIB TfLite_flatbuffers_LIB)
+    # Set external variables for usage in CMakeLists.txt
+    if (TFLITE_FOUND)
+        set(TfLite_LIB ${TfLite_LIB} ${TfLite_abseilstrings_LIB} ${TfLite_ruy_LIB} ${TfLite_fftsg_LIB} ${TfLite_fftsg2d_LIB} ${TfLite_farmhash_LIB} ${TfLite_flatbuffers_LIB})
+    endif ()
+elseif (TfLite_LIB MATCHES .so$)
+    message("-- Dynamic tensorflow lite library found, using for ArmNN build")
+    find_package_handle_standard_args(TfLite DEFAULT_MSG TfLite_LIB)
+    ## Set external variables for usage in CMakeLists.txt
+    if (TFLITE_FOUND)
+        set(TfLite_LIB ${TfLite_LIB})
+    endif ()
+else()
+    message(FATAL_ERROR "Could not find a tensorflow lite library to use")
+endif()
diff --git a/arch/arm/ARMnn/delegate/cmake/Modules/FindTfLiteSrc.cmake b/arch/arm/ARMnn/delegate/cmake/Modules/FindTfLiteSrc.cmake
new file mode 100644
index 0000000000..91833c18e6
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/cmake/Modules/FindTfLiteSrc.cmake
@@ -0,0 +1,28 @@
+#
+# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+include(FindPackageHandleStandardArgs)
+unset(TFLITE_SRC_FOUND)
+
+find_path(TfLite_INCLUDE_DIR
+        NAMES
+            tensorflow/lite
+            third_party
+        HINTS
+            ${TENSORFLOW_ROOT})
+
+find_path(TfLite_Schema_INCLUDE_PATH
+            schema_generated.h
+        HINTS
+            ${TENSORFLOW_ROOT}/tensorflow/lite/schema)
+
+## Set TFLITE_FOUND
+find_package_handle_standard_args(TfLiteSrc DEFAULT_MSG TfLite_INCLUDE_DIR TfLite_Schema_INCLUDE_PATH)
+
+## Set external variables for usage in CMakeLists.txt
+if(TFLITE_SRC_FOUND)
+    set(TfLite_INCLUDE_DIR ${TfLite_INCLUDE_DIR})
+    set(TfLite_Schema_INCLUDE_PATH ${TfLite_Schema_INCLUDE_PATH})
+endif()
diff --git a/arch/arm/ARMnn/delegate/include/DelegateOptions.hpp b/arch/arm/ARMnn/delegate/include/DelegateOptions.hpp
new file mode 100644
index 0000000000..5bc2e59070
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/include/DelegateOptions.hpp
@@ -0,0 +1,287 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/Logging.hpp>
+#include <armnn/Optional.hpp>
+
+#include <set>
+#include <string>
+#include <vector>
+
+namespace armnnDelegate
+{
+
+class DelegateOptions
+{
+public:
+    DelegateOptions(armnn::Compute computeDevice,
+                    const std::vector<armnn::BackendOptions>& backendOptions = {},
+                    armnn::Optional<armnn::LogSeverity> logSeverityLevel = armnn::EmptyOptional());
+
+    DelegateOptions(const std::vector<armnn::BackendId>& backends,
+                    const std::vector<armnn::BackendOptions>& backendOptions = {},
+                    armnn::Optional<armnn::LogSeverity> logSeverityLevel = armnn::EmptyOptional());
+
+    DelegateOptions(armnn::Compute computeDevice,
+                    const armnn::OptimizerOptions& optimizerOptions,
+                    const armnn::Optional<armnn::LogSeverity>& logSeverityLevel = armnn::EmptyOptional(),
+                    const armnn::Optional<armnn::DebugCallbackFunction>& func = armnn::EmptyOptional());
+
+    DelegateOptions(const std::vector<armnn::BackendId>& backends,
+                    const armnn::OptimizerOptions& optimizerOptions,
+                    const armnn::Optional<armnn::LogSeverity>& logSeverityLevel = armnn::EmptyOptional(),
+                    const armnn::Optional<armnn::DebugCallbackFunction>& func = armnn::EmptyOptional());
+
+
+    /**
+     * This constructor processes delegate options in form of command line arguments.
+     * It works in conjunction with the TfLite external delegate plugin.
+     *
+     * Available options:
+     *
+     *    Option key: "backends" \n
+     *    Possible values: ["EthosNPU"/"GpuAcc"/"CpuAcc"/"CpuRef"] \n
+     *    Descriptions: A comma separated list without whitespaces of
+     *                  backends which should be used for execution. Falls
+     *                  back to next backend in list if previous doesn't
+     *                  provide support for operation. e.g. "GpuAcc,CpuAcc"
+     *
+     *    Option key: "dynamic-backends-path" \n
+     *    Possible values: [filenameString] \n
+     *    Descriptions: This is the directory that will be searched for any dynamic backends.
+     *
+     *    Option key: "logging-severity" \n
+     *    Possible values: ["trace"/"debug"/"info"/"warning"/"error"/"fatal"] \n
+     *    Description: Sets the logging severity level for ArmNN. Logging
+     *                 is turned off if this option is not provided.
+     *
+     *    Option key: "gpu-tuning-level" \n
+     *    Possible values: ["0"/"1"/"2"/"3"] \n
+     *    Description: 0=UseOnly(default), 1=RapidTuning, 2=NormalTuning,
+     *                 3=ExhaustiveTuning. Requires option gpu-tuning-file.
+     *                 1,2 and 3 will create a tuning-file, 0 will apply the
+     *                 tunings from an existing file
+     *
+     *    Option key: "gpu-mlgo-tuning-file" \n
+     *    Possible values: [filenameString] \n
+     *    Description: File name for the MLGO tuning file
+     *
+     *    Option key: "gpu-tuning-file" \n
+     *    Possible values: [filenameString] \n
+     *    Description: File name for the tuning file.
+     *
+     *    Option key: "gpu-enable-profiling" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Enables GPU profiling
+     *
+     *    Option key: "gpu-kernel-profiling-enabled" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Enables GPU kernel profiling
+     *
+     *    Option key: "save-cached-network" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Enables saving of the cached network to a file,
+     *                 specified with the cached-network-filepath option
+     *
+     *    Option key: "cached-network-filepath" \n
+     *    Possible values: [filenameString] \n
+     *    Description: If non-empty, the given file will be used to load/save the cached network.
+     *                 If save-cached-network is given then the cached network will be saved to the given file.
+     *                 To save the cached network a file must already exist.
+     *                 If save-cached-network is not given then the cached network will be loaded from the given file.
+     *                 This will remove initial compilation time of kernels and speed up the first execution.
+     *
+     *    Option key: "enable-fast-math" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Enables fast_math options in backends that support it
+     *
+     *    Option key: "number-of-threads" \n
+     *    Possible values: ["1"-"64"] \n
+     *    Description: Assign the number of threads used by the CpuAcc backend.
+     *                 Default is set to 0 (Backend will decide number of threads to use).
+     *
+     *    Option key: "reduce-fp32-to-fp16" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Reduce Fp32 data to Fp16 for faster processing
+     *
+     *    Option key: "reduce-fp32-to-bf16" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Reduce Fp32 data to Bf16 for faster processing
+     *
+     *    Option key: "debug-data" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Add debug data for easier troubleshooting
+     *
+     *    Option key: "memory-import" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Enable memory import
+     *
+     *    Option key: "enable-internal-profiling" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Enable the internal profiling feature.
+     *
+     *    Option key: "internal-profiling-detail" \n
+     *    Possible values: [1/2] \n
+     *    Description: Set the detail on the internal profiling. 1 = DetailsWithEvents, 2 = DetailsOnly.
+     *
+     *    Option key: "enable-external-profiling" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Enable the external profiling feature.
+     *
+     *    Option key: "timeline-profiling" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Indicates whether external timeline profiling is enabled or not.
+     *
+     *    Option key: "outgoing-capture-file" \n
+     *    Possible values: [filenameString] \n
+     *    Description: Path to a file in which outgoing timeline profiling messages will be stored.
+     *
+     *    Option key: "incoming-capture-file" \n
+     *    Possible values: [filenameString] \n
+     *    Description: Path to a file in which incoming timeline profiling messages will be stored.
+     *
+     *    Option key: "file-only-external-profiling" \n
+     *    Possible values: ["true"/"false"] \n
+     *    Description: Enable profiling output to file only.
+     *
+     *    Option key: "counter-capture-period" \n
+     *    Possible values: Integer, Default is 10000u
+     *    Description: Value in microseconds of the profiling capture period. \n
+     *
+     *    Option key: "profiling-file-format" \n
+     *    Possible values: String of ["binary"] \n
+     *    Description: The format of the file used for outputting profiling data. Currently on "binary" is supported.
+     *
+     *    Option key: "serialize-to-dot" \n
+     *    Possible values: [filenameString] \n
+     *    Description: Serialize the optimized network to the file specified in "dot" format.
+     *
+     * @param[in]     option_keys     Delegate option names
+     * @param[in]     options_values  Delegate option values
+     * @param[in]     num_options     Number of delegate options
+     * @param[in,out] report_error    Error callback function
+     *
+     */
+    DelegateOptions(char const* const* options_keys,
+                    char const* const* options_values,
+                    size_t num_options,
+                    void (*report_error)(const char*));
+
+    const std::vector<armnn::BackendId>& GetBackends() const { return m_Backends; }
+
+    void SetBackends(const std::vector<armnn::BackendId>& backends) { m_Backends = backends; }
+
+    void SetDynamicBackendsPath(const std::string& dynamicBackendsPath)
+    {
+        m_RuntimeOptions.m_DynamicBackendsPath = dynamicBackendsPath;
+    }
+    const std::string& GetDynamicBackendsPath() const
+    {
+        return m_RuntimeOptions.m_DynamicBackendsPath;
+    }
+
+    void SetGpuProfilingState(bool gpuProfilingState)
+    {
+        m_RuntimeOptions.m_EnableGpuProfiling = gpuProfilingState;
+    }
+    bool GetGpuProfilingState()
+    {
+        return m_RuntimeOptions.m_EnableGpuProfiling;
+    }
+
+    const std::vector<armnn::BackendOptions>& GetBackendOptions() const
+    {
+        return m_RuntimeOptions.m_BackendOptions;
+    }
+
+    /// Appends a backend option to the list of backend options
+    void AddBackendOption(const armnn::BackendOptions& option)
+    {
+        m_RuntimeOptions.m_BackendOptions.push_back(option);
+    }
+
+    /// Sets the severity level for logging within ArmNN that will be used on creation of the delegate
+    void SetLoggingSeverity(const armnn::LogSeverity& level) { m_LoggingSeverity = level; }
+    void SetLoggingSeverity(const std::string& level) { m_LoggingSeverity = armnn::StringToLogLevel(level); }
+
+    /// Returns the severity level for logging within ArmNN
+    armnn::LogSeverity GetLoggingSeverity() { return m_LoggingSeverity.value(); }
+
+    bool IsLoggingEnabled() { return m_LoggingSeverity.has_value(); }
+
+    const armnn::OptimizerOptions& GetOptimizerOptions() const { return m_OptimizerOptions; }
+
+    void SetOptimizerOptions(const armnn::OptimizerOptions& optimizerOptions) { m_OptimizerOptions = optimizerOptions; }
+
+    const armnn::Optional<armnn::DebugCallbackFunction>& GetDebugCallbackFunction() const
+        { return m_DebugCallbackFunc; }
+
+    void SetInternalProfilingParams(bool internalProfilingState,
+                                    const armnn::ProfilingDetailsMethod& internalProfilingDetail)
+        { m_InternalProfilingEnabled = internalProfilingState; m_InternalProfilingDetail = internalProfilingDetail; }
+
+    bool GetInternalProfilingState() const { return m_InternalProfilingEnabled; }
+    const armnn::ProfilingDetailsMethod& GetInternalProfilingDetail() const { return m_InternalProfilingDetail; }
+
+    void SetExternalProfilingParams(
+        const armnn::IRuntime::CreationOptions::ExternalProfilingOptions& externalProfilingParams)
+        { m_ProfilingOptions = externalProfilingParams; }
+
+    const armnn::IRuntime::CreationOptions::ExternalProfilingOptions& GetExternalProfilingParams() const
+        { return m_ProfilingOptions; }
+
+    void SetSerializeToDot(const std::string& serializeToDotFile) { m_SerializeToDot = serializeToDotFile; }
+    const std::string& GetSerializeToDot() const { return m_SerializeToDot; }
+
+    /// @Note: This might overwrite options that were set with other setter functions of DelegateOptions
+    void SetRuntimeOptions(const armnn::IRuntime::CreationOptions& runtimeOptions)
+    {
+        m_RuntimeOptions = runtimeOptions;
+    }
+
+    const armnn::IRuntime::CreationOptions& GetRuntimeOptions()
+    {
+        return m_RuntimeOptions;
+    }
+
+private:
+    /// Which backend to run Delegate on.
+    /// Examples of possible values are: CpuRef, CpuAcc, GpuAcc.
+    /// CpuRef as default.
+    std::vector<armnn::BackendId> m_Backends = { armnn::Compute::CpuRef };
+
+    /// Creation options for the ArmNN runtime
+    /// Contains options for global settings that are valid for the whole lifetime of ArmNN
+    /// i.e. BackendOptions, DynamicBackendPath, ExternalProfilingOptions and more
+    armnn::IRuntime::CreationOptions m_RuntimeOptions;
+
+    /// Options for the optimization step for the network
+    armnn::OptimizerOptions m_OptimizerOptions;
+
+    /// External profiling options.
+    armnn::IRuntime::CreationOptions::ExternalProfilingOptions m_ProfilingOptions;
+
+    /// Internal profiling options.
+    /// Indicates whether internal profiling is enabled or not.
+    bool m_InternalProfilingEnabled = false;
+    /// Sets the level of detail output by the profiling. Options are DetailsWithEvents = 1 and DetailsOnly = 2
+    armnn::ProfilingDetailsMethod m_InternalProfilingDetail = armnn::ProfilingDetailsMethod::DetailsWithEvents;
+
+    /// Severity level for logging within ArmNN that will be used on creation of the delegate
+    armnn::Optional<armnn::LogSeverity> m_LoggingSeverity;
+
+    /// A callback function to debug layers performing custom computations on intermediate tensors.
+    /// If a function is not registered, and debug is enabled in OptimizerOptions,
+    /// debug will print information of the intermediate tensors.
+    armnn::Optional<armnn::DebugCallbackFunction> m_DebugCallbackFunc;
+
+    /// If not empty then the optimized model will be serialized to a file with this file name in "dot" format.
+    std::string m_SerializeToDot = "";
+};
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/include/Version.hpp b/arch/arm/ARMnn/delegate/include/Version.hpp
new file mode 100644
index 0000000000..88cf8ed4b4
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/include/Version.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace armnnDelegate
+{
+
+/// Macro utils
+#define STRINGIFY_VALUE(s) STRINGIFY_MACRO(s)
+#define STRINGIFY_MACRO(s) #s
+
+// ArmNN Delegate version components
+#define DELEGATE_MAJOR_VERSION 25
+#define DELEGATE_MINOR_VERSION 1
+#define DELEGATE_PATCH_VERSION 0
+
+/// DELEGATE_VERSION: "X.Y.Z"
+/// where:
+///   X = Major version number
+///   Y = Minor version number
+///   Z = Patch version number
+#define DELEGATE_VERSION STRINGIFY_VALUE(DELEGATE_MAJOR_VERSION) "." \
+                         STRINGIFY_VALUE(DELEGATE_MINOR_VERSION) "." \
+                         STRINGIFY_VALUE(DELEGATE_PATCH_VERSION)
+
+} //namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/include/armnn_delegate.hpp b/arch/arm/ARMnn/delegate/include/armnn_delegate.hpp
new file mode 100644
index 0000000000..8aaf255a9d
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/include/armnn_delegate.hpp
@@ -0,0 +1,128 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateOptions.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include <tensorflow/lite/version.h>
+
+#if TF_MAJOR_VERSION > 2 || (TF_MAJOR_VERSION == 2 && TF_MINOR_VERSION > 3)
+#define ARMNN_POST_TFLITE_2_3
+#endif
+
+#if TF_MAJOR_VERSION > 2 || (TF_MAJOR_VERSION == 2 && TF_MINOR_VERSION > 5)
+#define ARMNN_POST_TFLITE_2_5
+#endif
+
+namespace armnnDelegate
+{
+
+struct DelegateData
+{
+    DelegateData(const std::vector<armnn::BackendId>& backends)
+        : m_Backends(backends)
+        , m_Network(nullptr, nullptr)
+    {}
+
+    const std::vector<armnn::BackendId>       m_Backends;
+    armnn::INetworkPtr                        m_Network;
+    std::vector<armnn::IOutputSlot*>          m_OutputSlotForNode;
+};
+
+// Forward decleration for functions initializing the ArmNN Delegate
+DelegateOptions TfLiteArmnnDelegateOptionsDefault();
+
+TfLiteDelegate* TfLiteArmnnDelegateCreate(armnnDelegate::DelegateOptions options);
+
+void TfLiteArmnnDelegateDelete(TfLiteDelegate* tfLiteDelegate);
+
+TfLiteStatus DoPrepare(TfLiteContext* context, TfLiteDelegate* delegate);
+
+/// ArmNN Delegate
+class Delegate
+{
+    friend class ArmnnSubgraph;
+public:
+    explicit Delegate(armnnDelegate::DelegateOptions options);
+
+    TfLiteIntArray* IdentifyOperatorsToDelegate(TfLiteContext* context);
+
+    TfLiteDelegate* GetDelegate();
+
+    /// Retrieve version in X.Y.Z form
+    static const std::string GetVersion();
+
+private:
+    TfLiteDelegate m_Delegate = {
+        reinterpret_cast<void*>(this),  // .data_
+        DoPrepare,                      // .Prepare
+        nullptr,                        // .CopyFromBufferHandle
+        nullptr,                        // .CopyToBufferHandle
+        nullptr,                        // .FreeBufferHandle
+        kTfLiteDelegateFlagsNone,       // .flags
+    };
+
+    /// ArmNN Runtime pointer
+    armnn::IRuntimePtr m_Runtime;
+    /// ArmNN Delegate Options
+    armnnDelegate::DelegateOptions m_Options;
+};
+
+/// ArmnnSubgraph class where parsing the nodes to ArmNN format and creating the ArmNN Graph
+class ArmnnSubgraph
+{
+public:
+    static ArmnnSubgraph* Create(TfLiteContext* tfLiteContext,
+                                 const TfLiteDelegateParams* parameters,
+                                 const Delegate* delegate);
+
+    TfLiteStatus Prepare(TfLiteContext* tfLiteContext);
+
+    TfLiteStatus Invoke(TfLiteContext* tfLiteContext, TfLiteNode* tfLiteNode);
+
+    static TfLiteStatus VisitNode(DelegateData& delegateData,
+                                  TfLiteContext* tfLiteContext,
+                                  TfLiteRegistration* tfLiteRegistration,
+                                  TfLiteNode* tfLiteNode,
+                                  int nodeIndex);
+
+private:
+    ArmnnSubgraph(armnn::NetworkId networkId,
+                  armnn::IRuntime* runtime,
+                  std::vector<armnn::BindingPointInfo>& inputBindings,
+                  std::vector<armnn::BindingPointInfo>& outputBindings)
+        : m_NetworkId(networkId), m_Runtime(runtime), m_InputBindings(inputBindings), m_OutputBindings(outputBindings)
+    {}
+
+    static TfLiteStatus AddInputLayer(DelegateData& delegateData,
+                                      TfLiteContext* tfLiteContext,
+                                      const TfLiteIntArray* inputs,
+                                      std::vector<armnn::BindingPointInfo>& inputBindings);
+
+    static TfLiteStatus AddOutputLayer(DelegateData& delegateData,
+                                       TfLiteContext* tfLiteContext,
+                                       const TfLiteIntArray* outputs,
+                                       std::vector<armnn::BindingPointInfo>& outputBindings);
+
+
+    /// The Network Id
+    armnn::NetworkId m_NetworkId;
+    /// ArmNN Rumtime
+    armnn::IRuntime* m_Runtime;
+
+    // Binding information for inputs and outputs
+    std::vector<armnn::BindingPointInfo> m_InputBindings;
+    std::vector<armnn::BindingPointInfo> m_OutputBindings;
+
+};
+
+} // armnnDelegate namespace
+
+
diff --git a/arch/arm/ARMnn/delegate/python/test/conftest.py b/arch/arm/ARMnn/delegate/python/test/conftest.py
new file mode 100644
index 0000000000..780a0492e1
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/python/test/conftest.py
@@ -0,0 +1,30 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+import pytest
+import os
+
+
+@pytest.fixture(scope="module")
+def test_data_folder(request):
+    """
+    This fixture returns path to the folder with the shared test resources
+    """
+    return str(os.path.join(request.fspath.dirname, "test_data"))
+
+
+def pytest_addoption(parser):
+    """
+    Adds the program option 'delegate-dir' to pytest
+    """
+    parser.addoption("--delegate-dir",
+                     action="append",
+                     help="Directory of the armnn tflite delegate library",
+                     required=True)
+
+
+def pytest_generate_tests(metafunc):
+    """
+    Makes the program option 'delegate-dir' available to all tests as a function fixture
+    """
+    if "delegate_dir" in metafunc.fixturenames:
+        metafunc.parametrize("delegate_dir", metafunc.config.getoption("delegate_dir"))
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/python/test/pytest.ini b/arch/arm/ARMnn/delegate/python/test/pytest.ini
new file mode 100644
index 0000000000..719af3ce05
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/python/test/pytest.ini
@@ -0,0 +1,9 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+[pytest]
+addopts = --strict-markers
+markers =
+    CpuRefTest: marks tests that require the CpuRef backend
+    CpuAccTest: marks tests that require the CpuAcc backend
+    GpuAccTest: marks tests that require the GpuAcc backend
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/python/test/test_data/conv2d.tflite b/arch/arm/ARMnn/delegate/python/test/test_data/conv2d.tflite
new file mode 100644
index 0000000000000000000000000000000000000000..ab564591b0e54c727b1187a67be7d02466104f61
GIT binary patch
literal 728
zcmZ{ize>YU6vn^WXoCdlkRgLZh7N)bqDygUMCnv<@d1o9l|W2NOvEvxkKqIO2qHd#
z4<L>X4!%SEzI)qJkY4!Z+;i{w^PMCCfUDu?7}!Ao2QEBx;nM@!q)U9#pW7B~kH}sV
z#XM~QBhn|p7Hf3)G^@Q&b6Vcz`FWBhw{ex6gl9$mP-JzL7WvUQyqqRwToq+_QH+y}
z-TkEskGbj}k^^!`9PQcsoA!k~5^2zqCP|RhnXI&HHd)QqVMWcx)l&U{?2&affNf@}
zugjgSuO5@xY^L*F@=K&^(Ra~X@1UrwX<Y%IjMps6mHi=#7s|a!<9n-9p8X2asaVCR
z&zP+1GfA^5DH*`&x+cn2QH_O8%2?)aaxYX{HqDJpyi%u2Upcx1nt`>(!PmcgM636T
v@=UgRX%6XFUr+IGtf{#K@Em>2{hslgNN2!1SzVEQ(hTfB=a_%a)&Twh`nE$b

literal 0
HcmV?d00001

diff --git a/arch/arm/ARMnn/delegate/python/test/test_data/fallback_model.tflite b/arch/arm/ARMnn/delegate/python/test/test_data/fallback_model.tflite
new file mode 100644
index 0000000000000000000000000000000000000000..84f94018a661ebb5bce2f0cbf39f47de251a1e21
GIT binary patch
literal 704
zcmah{u}Z{16dW}g!5~+pNTINWg<74bh1kf!VRfR#m4XB|%0WzykhAt9#BcCh`~piq
z!$OQR$>xx7=)&yVxBKS3x3e1om|TpHfo(+au!#UYg!F(lqDMr;sk5uJ`@{~hLfkl;
z>Hr($`<&}>`1xH`2V4$|hb$W<X>uQz$(b2iYvRnB(e%+=&5|N6^TJ%_Hc1&9uy)%;
zKwtIuh(2*btP{^HJSIZogZ?X_UNzK{T9NhKdO{l#%^jN?`tFY0+*Pb<Ddw6GU-^FL
z5m)D8`|L{kaBQ_aL#LL2R`~?w_^PVPV<+lmz_qr;k$wS#w69ok+j`P3pO>?F3EXu!
z?>t+^QLWfoymaEF&9Zo(`_qgiZU=bla9H*q&JiP|hhAISE%aF2P4D%0-9C9zLUB@6
Y+BWYXJ%Fe4#@3FT^uN6Ne<`=d4<vj(oB#j-

literal 0
HcmV?d00001

diff --git a/arch/arm/ARMnn/delegate/python/test/test_data/fp32_model.tflite b/arch/arm/ARMnn/delegate/python/test/test_data/fp32_model.tflite
new file mode 100644
index 0000000000000000000000000000000000000000..77fc7379c177f64253e3c51813f730a55d1a6fb2
GIT binary patch
literal 688
zcmah{Jxjw-6g}412!_(3Lx&1)a8RfEF^G#+ij$&42M0j{FG!&#A*9X@{tj{U$B4hc
zAL3Aq=j0`rq2hy+`|f+^-h1xN8vvM`UmVbIb<l^0bp+_5M-Nyfx<sEis%(pPn-~x)
z#BF77bb&VGeSXWe`}JKEJKT=4hd3Tbv*<p|qhm9&)`YP&<LRThN~0{ylgwNuHky$e
zuy(B?ps)H{#E>{7)`({oJ|zO;ll~haty<C~DY9O*pV0OQ*Q4vuuX^O+rnssl&NZQU
z<!k0qtnL-<Q<dh!wNb|zI^_sxl}}KPMNt$Ul}O7O_u^}I3qoh&DXy-i*(LKlo##N-
z;J@>H8DBL*Yw^<HzW%}Wc|MJq`feW|4SxDBe~*j~yOshs1NC3`qJOhY+i$%gxnhf_
Z2`Yz+SO37%iL<3see{;R@-4Zlzz-qmKRo~d

literal 0
HcmV?d00001

diff --git a/arch/arm/ARMnn/delegate/python/test/test_data/mock_model.tflite b/arch/arm/ARMnn/delegate/python/test/test_data/mock_model.tflite
new file mode 100644
index 0000000000000000000000000000000000000000..0b8944d3ed284ef889abe598226fbc2b51068517
GIT binary patch
literal 37944
zcmbTd2XtiDc_!HQc-KMpTDIk*Xo;jmkz!`&fNr1xG;%DUP&wzEbIv*EoO7WHKq2SQ
zXmlpoWHZQ;Xvwy`lC>U>XUAz~w)cT!S@Il@*JtlJAO3gafB*m9`@e_xzIyJ=nKQEk
zW9I+?q~AaDwKKnV<~Pp#_L<M0`2vvt5>RsH%zpr|9ykM(odN34oOunvZv*%h0G|i`
zoXS4~@VfwJC;KL!>*KJ+B)*VDeB-nJ3qW}hKoo!zfEIud02Kfdz$}0<0Q~^E0GtQ#
zGJxj*d<np*-d_W7$bR<hw2u*xRsc=_9|HLMUj@bnWD-CYKoLL{z~2C9{_khb{39Uu
zfBnpvU*$Y|_BCMgzXiy*UI(lNq;>ktnXdq{3OaM<1wbObfGt4#{|(Ud_8S-8cngrH
z{y6o@Z+v!r=FIN_@(%#8Xlx8$#1_(6Bn)7$fW~2;`Lp?F&z=M7PQ7>9{xUG$see8P
z;8gw&fKz+c0R{Vj%mRfkzk22j6mYT;m}_zh@B?7%-veb|0P2Vyz~?|)A~4S)P+kSv
zedY`9vu8l}|8P3iDNfhr)NiMCr+NT$Kf`l?{2~ArPKx1jI7}Lwa+ZjB`^|BjbPC9x
z3AO-V0QKJjaO&%y_XjTjIh~I8tC~-`PNkvranGqdO#{4N%;9wHPv>yDZa*L6G=GZc
z0G!%S0Jg<h;+r56Q*`FH+s~c>OYvFVsjq(C2e|w!o$7$D^<O)cxxeY_K9#3w^Otn~
zA3*-J@9EruE%2G$zXixs9}`Jz0qM<N8cuMNNCfiA|AoF5P<NV6#~+6L-St!1W%}C9
zQ#tcT)q_{~zp(Li+*5l__v+8*dwMob@f?6td;ZBr35iCb3I!OR2*(z_Ie=r3rcbXR
zG$D1I#s-`}hvN%?=4ZaJ@UQ#-v&z#3KsudEU*gACPi6cY${VLrqa5FT4!D?p10sCd
z{^Ivr4rYJxT>i%k`^<ayCAY8s;mUUAyOQNAqko`3mwB?i5aSJ`etL6mv2VBj$o#X9
z8?NUi_`hY<VxRw<p8a(hEB@1c8WTBk@rC`TOU@7P{F!;_bFYg2Vf|gXap5};pLTug
zXVIIZg}?rd|BDklM7(<*bA9V~94GLduCtc*p;~M@yh9ZK-ZzedG1!<IdO~5AhV+wB
z%+K`9T71s>=l|z7ZxY^Xd=q?oE&ke^ozP8LThZOyr#=4><c9A(r|}Y={JSr?f4@ns
z5MHv~e)7?esi1pZ-+2CqOV16AeEIO%_V@ZIpToi|?T3H*N86;s<*$Dt?3(%Fds~z#
z`9<n=(&|0oe|rAX_r5eQ{4c^^S^r$x^7-TA(e4+EpX}fCBqu8W>8FBUtlw_~emh;i
z({=j!`u*}AJzd}bv3|e-`D~Y-IkUX@|GiKD^!UZk&c+`-_hQfK+MFi5@E3Yd_x`D#
zUp}9wd-oL20r*1z|Kfc9^XUZoEdBq}|D#{he|jeVXS;t{|1a5n_UCpt{~db&wU_`G
z{lf0w{3pGq=ktH|_o<&wV-$_e6$vqKpZQwnU$;{Wv^h<u?RDBSpZWRB8SOvC1MS}Z
zU($CPZ~yIhFvB4W0Z#+%oc{}bK-FjIf2^bSv-ks?wttR4SH+*%_p7Hd_jLZJ^3=Z5
z_Rj%0y<h%Q4Cp%zcl{VV5QBg{2lV^5;_Dqf<&qZYyALeJs{ro(KhK^8pFMkK0|HwH
zSgg(e{_L3s2sC7%tOhvqGNAmgf$Jt<^nU{!eH*w(Z2z}s&t`#s+d%yjppFfUhXd-D
zPbE-h1IoCk?STz(4CFrn#{4h9`18QQ&;aFrp#Ce8U#uT+JNg&#?l~a;%kl0FApg(j
z<=+|cer@GnUuP9C)@eHRqx;qW`dO^|ic$SptegF@VE5F?aMbM>zTeEpPsIXG!b-CP
zsx>9rDRC|mQ~RbFBH9$C6&YMjvtR)Mq3x89qOl4j;U&-}dSi%OQ@L}|7S6=snF&OJ
zYAN}YB#t7%b8~b%y7~auPk4K3AcN2Nk!qaJ1;s$;+X3UU5RFEDwdqD2FD;7s%dl9L
z;Dw}8I3dksX!vJyAcQ*vxdiubN0lsoVq9k#fKtX!kPuG8;?DbOEbfA$Yf?uOV+t*%
zJ`#xIRa~eoFvR9*6!G=ef^b+G)uwc*;Fzk(!k2vef|K<M@lTowsy`@KxgtX0ip^f+
z(v{1WklVn?#@;X2>{CA#3J%NJp%mPok|?cbsr<-0C0R*6%CVherF+eG7g5#v=3ZzV
z$!Y9qwcaBpp>PpRYdD-6S_F1h3)O9l^;f5K6F43rtW>idR7vJ~WMCsRLYhD!z#lSc
z`oi5S(g1bGJM-OMi&0e_dHq%rp~bYhnQ_kufxJvNe(J!rr6nsZ(P{Wt^E5v!XW#9O
z8GVEa_<P)H8Txe7W6+w@X*R-a#2j(QgXWR;7)o30x{mWiu*|WK%V$$_6vdzu&rWmc
zra_}4lohqd{<!Q@mMsFc#-=(%f$%#U#f~+&CNx3TM!~y?;##<V(Lo_jluBCC5@QVg
zUftYe@h~-X|0lT^iEx%xdw<OJMKARn#n*~_z4v0krgO7`6i|~^Q$cyR1cJx=sq%UD
zG-HalvpR$rU6BO*s7b5;_-5Pc<6VA3D9HIOs8O1Ydv9Lm8l8$;H>JK?CUxLOJ}W0H
zhM6A9l&j<F-n^O~kgUHIQ!#d>sez$dS64Eq6OLV*<GJNk@Q6Muz6De0X#<4~nsp<!
zh|OFTjwu6Dif}(LjO5$p6eYwY&gdOkQ$k8&7g)JzOGly`kQN#gJyA}@V<_(&G7=n`
zvDiwiOa^x1<C|JF+}O<;L&}494AuCFFIv0}MoZ;8>~`C8KhlG$l-C$bebdXD+=(c=
zA(QBu$5=~jIOjAaPyrp)4Jt5|G=Ww_R0WoJ`B)&Yq70G_83U1xrS5yO!H3!v9(+iO
zm4K~%BBht)7!z@(=}nc+S}>u(Av*zE7X^dS{ddP~5i!bGUP$P#j)eV6P(3Fa_5_4M
z`)tZNH@z(FS4?Zc&f(*arMx<|MemEuo-;TIsZY6xfQ8|dRs_?rLPZ@#fjwfphfU-%
zsbV`WFBY3Puw_$xmd0?+hOnMc`bKM5WXTH2dIPE0j?e1sw)wc)wfVHB7DFip#sOqr
z=#d7L#D;_*4NJVMFr{^<(Ha_ZT%xH^W<y6%;aq@^Xf`}tY&!q~N&47Hk_X$z5yXTy
zcxxyFIN5ot3Nf&*4n3AW;zN1{vg_;HhQV>HqCY6b_}lDs$O(#x8~IE+XezmTv^H{@
zGUrQ75o4klUi}YI3XYYRH)IJVJ_zNEO1HjwG0jQQ!{>yrI~6?_p8O@m3g=}N&cxiC
zEYw)47Mc47zMHpflBK{Q&qevk2bSlh7%{Co!Fl5w4s+9~>Aev6qvj_I!qjD=gk!($
z-i2tE#4_me{MR!g!u5^ub)B6YdiN(5tJ~_fpjOFD*Rn83VU^iX&bLCIAF2K4$zP?W
zo^Gdaj?Dzd6{1eB@+`{8#8PDl?8(AHx1Eo@%n7O*JVs5L^{n*Jx!1aBzLr6TYTvb-
z_xp0$m+`7pq|7MI4!i4K!Kz7{CCyuS$XsMK+)LD0S_7gz)`rt%^kY`GvB-i~h&R<@
z7Rnl9k1wWkkE{fS6batcz<2^|LR_f#t#paixT+l|bIB@iznUB1SK%?QH90M)CnZ{a
zDIp&f8R{Jui+MitF5Tv-z%^w<`<Nz_w#vs{6B>3)K`=RM_0esk;eOlZ$IFfkd}fnm
zaFlyv(yIgu67y~MRS|WxHMM#;(=X_<&^kB>)(pLV-_^o_jlt@0%CLx$jU{Q~6L>w{
zLFt3Hq9zA))For%B&0IMiH+_TSBQbAQb55riv58F!Z|6N(#yCZLrE$)n%D=Ks&iue
zON)9+ilj@9E%w2;Rg^c-)Fw;1iDc4s42H2on-9UZ4UL@nasWL0600T}bemfJe6hqM
zlK8|)eXOD>?r6$co6TL&qbj$a0zt*57+hI}D-Y72hFq@Lq-Z){KDWUfUc7~VQ}Pe|
z*fRgx*!Uw?A!*&nygOsGyUrIjo-0oprx66KZk|_@yQFR;2V>0p7N!d3iyqt@_s6#+
z;VO#!ia7A61W*+toC1Ac^L}mNME(KMl2N`Oju1>>lt=lzk+-3;@X1Qig(dtwV}IvH
zRW377U-6auR)_e*HyIkuVk{<i&pAzIWYW!?ArF{tv22S*hF~o3Cs~Gup(P*KkXW09
zRioOXak1UnoSN4e(>s#o9wpBd8Z%{=@UWSPiG-atZ|ARhGnAKW;{tgp28BW94ywh~
zSDfR|p?4gTYe$nv>UZiKm-~jmi}+g7f{T)1(*0}VNpzH`m?HZ~vm9>J(yZ^5mVL8h
z2+|d1Mat^)6)tbFSKxuAjivX=+gs9Otz=TEl{~U@LF1!t>qPUhZC?qsJOR({e?R1>
z`kSn4Vw5Y<5_>|gY1q1jp^C<{Qoxty_gmiGf_y$fvn%;@_AJ7I%}<Rcg|3glFrk;r
ziZGE%GNmwWur@0xiYr~*G*z{xsRj5N8PW<SG$wlS$2gsYUa=urJz{)XkCO-St{w~A
zO(&H3W8!VPMjb_WWbBI`kUbQ!<1|y3TPzqad5EjIlO~90i7OGoBGgEKi$I|qI#k*M
zSaeQyjgTi|V7Gu{#xVrsQiE4qY_z9vwYyH3un|%ZA;|0MP<Rj?QL2_at&rHdCSq#4
z(7_z7qnc5`G`y%|9M!5_5ngkK_PK5?EbY7{3kXSvo|H3XUWBoB71upC6K|E#QyNn)
zePPXCa$k0=qSy`&ab>=zB*xp@8g=THg<V}*7_)$Twdf&H7ddoW3zFc*aQ-b>=Z4$8
z2s0Se#0=c=7Qt(k4X&;kc&4$doO09WQgJ6952P<Hu&*EU>}Y|;&&8W!A}D0nsk`e1
z8x_8c&SmNk9FQh(X$T}G^^tGf3Np=}h(dXtE0r*JwALLa`n47BIbGRVyh<93V=+tb
z4KA2uWBB>BNmw_1^W(k^($Vhq7RKkD-dy4FU-B*yxa5ve<kz^rXQ+Q`M;u2CA1mhz
zWDX`S5czNJl+JQyTwr6=f+flBj0L1oFr~WnM~0KR2OXWr`22P3>>4{Z+7JK2S0{!)
zl+SL>GUtYux-;&|ynLGBWc?9xBhG{3FTZ47&Vq&XD9wIFbURH+7j(g;b)pY*SJe(f
z>DVc^w<bo$sR2BnKd>pl>F;tvdheTT(x_BhlWLt4X6CtXxbBhRS^8rP&h~<wiA|Gc
zzh>pjr>n8g4NzXcTJ9uyi|U8Ul=WN7*pkrenpBg=mtRfU_{PGCp?Z5MeXE(dw6?m)
ze}0d5&GwX|^S<*<@r^y%BK-(I5{iHH(W@-j&2NGg9YpQ*7uOez-2p4&94hi#C-*?g
z^5-D~s$7!KmseM^V@53;ztr%|J>1%PAoJod)%PkIShq2i#O=g({CD5zXs52pxi#k2
ziR@3-XG}f$Vl2eIG?U3U@;!dJVj%k1h*}pai|*t}wQzk>Tz5y_Ir%EWZIAvHl`s9E
z=f)A?+pDVVdD;)LNAx*fh@o;fCHQkGw(BDx#6S3kdH2TeDymPNEAxBb&jW8o5SoRL
zG1C(KBw=&t-Qu5M1)BbsFuoP?;`03^E#+-+xG4TE^7{DDjLIQE4DDO!uhwNoJo%Mj
z>bV4I_S<Yu-^TU}H*}W>SIRed#5+gqVb53J=$GQ8(z!dsk>1}S=^i?!DTDmEOW$Qf
zRI!t5o=lgGD(dRL>;FMGDV$_9T?|gH;;MXeB%OtmM?hC!3xeh(Q$~j|x3WcG9xO}5
z*1ExcBtExK)E%88=Z1Wub#x3{MCQ!I(9q-BtzCObBn|k-sLmzPOKS@+)z*`VHOt<K
zw@h;qWs5_(6$qgbpjM?WCNiExP=Ys-SB=eR(AhZ{8IR@T_kt~3#;oz5B-i$#+k-Rt
zVnWXdHg=Za+aqXpWA2Zc4svR;epaZ(J2@y*SEfNXH_-~ZiC35mHi9xoY=qyOkI`E`
zWz~qVcR5HSKE0sYt4&vM6A;Hn8t*3&XQ;S?DgvyEkOI^hJF4ZQZksba94SwVc|vw5
zQLi8_At`R8v~A%zZP<J}XQOLyp}LI0F+z8J-Yxo|aD3VlfD=>9&81gTXdmG$BJlxr
z=&D@mi5Ql>Ch7L2iFR!1Z5doty}3ueIyG#q8G~h<!Qx^Ekb+*f<d#GjT)CM=X!(7`
z7F0=bq6%Fqua$Zd2!>02F={Y3c~dWjAs0NdhC-vYFa{Pc=Q#bksAk8PdK*KN3W77t
z7(!@J27Oc|L2}zirz|9)q-4E7UqDi{I0J>!a`!7xX&fim!^EDjPkQWG6Fk+=j6i5d
zU-Ygsk@ZIZ7(=ziX6(}KwkulMI?q^~bZZk%NP{O2j=wBh>az8))AVSHW>}h(O$%4t
zAFq#v^Sf{6o6z+PDdCl0B3MS5bD2;q2}xf5z@HZ3OfC&Qqq(8<T)&}g2sO^F;QWNt
zEO+&cu#sZLET`Y;)Cge0&Z@!~+rs-z8mGO0&h@g+PKC>dQ&xIUf=7r=Blzw4tL<g5
zLI&R`SHn~g%qqP%E9!+zUbJ7kXiE?7gv95O26NrnUg8iFF;AWUdSOc9*c6cFI2>2H
z?=C;st-g(#uQpoU>+2q+&h1Kyn&i!)8(o5g8$PI7DRc{~Ht@WL<6Y&AE;|POdF6IA
zs!ezInT>0Un~q&q_k04H9xI6Bc*HKx-QTpOHtjyv)SFY*JexPS7Xd}8mia6wxyq#3
zCmBh)&QF;iE=1{`e&k46-nvcJb4QD&f#E@1Ohj<N9BI0-GoG#{V2UC0M3sF)kUzP}
zCe6Cj)8^NfKcS)BgFO$JRr}Vs{To&vXACASdfpQt%AQ$Qo7_AaemAPV;c)bbQuFr)
zt#=>0o#80BB{+Gv)65%slM>H|)tV)|hD+7ug&ujo^Yzt+&-{)V&Pd3w)W1V9W-O{c
z#oQ5fXf+a-Mg4L~pNl&e#t5|gT>boOY$L2Z<u#gJ-ri;NcgH;2AO)7`1a$)9&^lUp
z7mSbDKEA$HMx>&iGC%rioVtn`A!6Epf^o(jQ?JG$&HeK#0%fv)t6~mMIxaLf?XO7G
zb;S?t7&d!55bjcJ536(RNk8=BE2Di+#_H_>*cHA1#$Lg4Y_@S=z1oVb1xJ`|+)2tt
zz&88DTly`yA-l7qTos~MMmq+^)(w$;hfLk1L-lLCOZ7ePtJ|u^{lK2wFWq)pF7V_t
z5gq|`3zG+Xb)THKS*x$k2V0xl9ArA{Rp*H|%hF#JBVDsaXlQMrG#p;tz^9qV?`8c3
z!rUf1?hQQb+2tRCL&6YZEtl!$h@}(f{5Jh#=W0ua3+1lp4{+|1g5dxouHVbFfOs<q
zH<nA8Ww*t#JlaE|<2;lsW|}q1!;c808)H=q{Ot*S+JDDO!SNDl+mP+a(0x4$%TG}0
z)WCa~M03FkhZP)U;aY(o2wS<6;e1g-gbpUBU&Mey7wPL@D||)@<-1|KUX*a5CtgF<
zwd<Bh$xqm`f(R`$Mz%RPv_$93&6@aH&Qe?O`l}F@g+D##jV_x&M|Fl49!T4Q)SAuR
zCJLvb#aXCU8Qj~P(H+ZAE<5HE-CCuNA*Aii@|%|3D|a6%w$&nS&6aSvG&el?n{i#j
zeo;-oI>8>BNa>BmYT!BewFZAOjVh8y`03#T@uKwYjks;j!&tV#hZf)OsYZ*`jzUhz
z@uF|)B7w_MFMOi0Wl=^1iKBCYBbkFt*BR^DvG-g=p?0MC4ME^)((zh|&g-LwBqrB2
zSIiEdpEajSn0WU`b}~I)n<{7Q{=W4gSOvA5cvD+X^|htr0bM2cT&%9Sfa&OMG%F$z
z$x-f1>iMK}X~B(Zp~<1eY33)MjiFh#h(zb_lVWf|_LcolP={&j@DgADtrbCKdW(aE
z4k(D^N4cEOwRyFUS=c7#*mlf3!uWiB>R>W)fP;O3ZdWZ)SJx}6WP4YLGPKO#yqzDt
zK4u=;p@_(_H+vWKnNNw#X?{OLGlbL`b7AybT;a^OQmU<2o@`LmM!rk=qgZZ-vTZ3%
zE&3%t3$)&H=NLmMuln<!7$v=z3JvYTvQ97JeM@mwXY{p!mpcNmhj5(r-%xL?kKyCt
z=u?$CMApz0(rj+3nh!|}?Ln`cz4z_vijrvY7?=iQjiIx8lpwh|>`HGlDrM_%@rLbU
zOH3L7xd$X0@?wqxn(F%fuygmnRc57wH}wV)HKrcp6(qF0Tf`>Ch?@AKcXxAq1hEV+
z;%Iilm?%hN9`MqW?iwtNYG}0v(Sw#|Bpiz!POnJ(bOPisXG*V&Uc{%y#8v0_BIS2O
zmeAGith?2I#D4R<j7&G<M3OX#_@I{oGVQX%?=Qw<^79uJNrjX<J^tlEYv+=Lqvor&
zX`>VPl~HDJb@vq%jzJCeAF4)(Lnz6H9b-43!InAPv?`17@MX5kz=4~2C$r6I2VMwl
z7V(T}$CGZd7tBZNne9cw`+BW}_6?<^3cH>5V2JOOv9hH~7i8YNK=*+5WZ!<m!cYir
zmnPei_i}0m^LBn<taD%RGW`o1c2#lK^r{Zx=}>1_q%qTjhbIY1uI&;J5@2vC5iE-d
zB|)_=W90K5%&XtsU5px=Z%x%__&Wb$Wt7dvf%Z8x;dlpO4}BmWJ6N+cXl^Db=vhIb
zj>fG8k-x+##up=<F#)8~!i8{J^uy+?HRd#~K$mq1E|D2(BUOvvvu}Z3o3|@IXkJ2#
zT(bDYZV68_t$rtNTPM?4kKTD)$g(d6?oF*_vI-PmT2xd_^L==oGqI>kROd4s#XL)I
zTYf@`4c%bl-Vp4{nc?!6z|sNG6%DV1-$DIY0w0!%P>;80FI@<OlZW=T<$YqDeO5Sx
z7VGKph+(QjzBn9sFL}jy-hr1|Qu*zzzEO05flqJg_>$D!0KUg--lZLRG~;8C2&xGh
z+;lAzr?7r_Crvzu0Z-jp&Y7q6@Qg=>lDCyIoak^1jT4j1EV|u+0P`Ru(ZTP_lj9(9
zV>+T9^H4a;AI;i}GuSj76?+w{w<JPHYBzQi>DW%FIrLh*TIwjgrnEKJe_(kf#oV5{
zYoX-E39yaDLZHAy<R?1FExF0xS)&JK?ANbqaV1Nz*UD`le8@jIJ3UBY9w6|SN}#Az
zyw&>+oWwY}88@D@Tni^rf%$qts+%tgi#Bkt+HU3@YL?g<cE#8B1!67kcCM8i>0K0d
zY_hyOrepGLzIr9O)pG*r82H9Lu4wiHgVTptgwL$8xo=3eqcOw{x-Bbf&Q@n>1MBm0
zAI@><9aBaYh}@`8)<$4F_cf9^cY6UMHToHOyk(m%W~b0neY1XH)s76v6lex>in@Sg
zxJJiN%pgxpjJs2fiBjDi&&K3%gU)QMfgR{(-mcJ%mS~6fj{7BE8+%5<=nqB=g^?te
zjJ(v2A=D1&MVy6})~q3KtgzcSX^84og;xw&lugi+jyOYdrqZ=6!x1Vf!#t|TyJkdW
zHrH3=JGs{nC*~pTW2^H!Z^Aa`5-0I5eX#0z!M>5F8)#*=N`*Q&`>+gK3`j*L-xByC
z1~wB~jR())(b8;tTd}3;WVSDbPd78mK2n~jTHtNoOvz?Z;`D(qv%Ikpm%g_5nhLG`
z*^<)b@gWl-aM<YPNs?g1u-!Zb_cnw47!pSs_90XpRxCPg(eek2XzjFodH|f12V+be
z3_`LkjMwK`(^KXqH#w~)letK|dQvgvC>9zLYt(6zOsafzW6%V1jjAvcDC;oO#4#!m
zIM)an86M1)NTbk!MxGz5c!#`h-hw38oPZdSRZL$|#>h|M;8MNYW|Y!WNJ2i_2er;>
zLbOS(4nc(m9ATE1I7RUXHLQBj=rlGEliCghJ)H(YVYpejZf3}D8a8PrNz-GaWcJ)B
ze8OCGl17^tHUg~AM3Y!be~<?zP15<r8H~}|2o5W-LR&wWC9GqpnIx7b8b-|<sm{5!
zaAdG0G4mwSTP3>e;NduK(}l}T1oausVTa~XUlRqEKP9VQ4QhuXT&3vVkerKr*(#2e
zYP;+oDi-)J#k8nB0+^Be7&{SN+_#Fu>!O)b;%LeF!}s0^X)utEo;-hBN|RocCvVKi
zP2cH<j!vB&pO<49W*N=?_6?ifQ{#4dgHd%PTiA_f5Xu|ZlrSn27L*#Dnu%ZGJDQ=P
z2v*E+Qyy^d9XHv&IGDL?*5)=gq7tPF5t+|+ynS!Ku6|97<1x|SX1_6_f*Oqa^B4jC
zB|Jzua5HdS4YwwOugdf1(WRA{w>(N=*LXmy7-^Dh^6qqs)+o+pQz2E{O}$g{fX~uM
zGd{T}BA#;**GX5T4M+BNg!Gg@i#xiUDlB^Jf1zL6lt_j&-j^}QeoUsn_gV}qh05zQ
z94}J0o0W?LVRjXGYW1Mz%mP{ue5`k#f=gMG&SZrQn`X9XDq3={C~=~@rqJFe(ZV>n
zqAFXdIEdXibk!~FSVD{_ViQdezrirgYcOG$ftm#qgC03JgES+yZP9n2vpbnACN)@s
zpdVYRayeLw<gXSCDtnR8RgO*OEo|>38m}oZup^r6ol&#eO^|TgRh~tM<%Vm1kwf;l
zxW~GkBs0&tXCEs2k$ooG$er-;av4k<+|_#w$~C&2-ziT|kd|LwHNr12P=xLe#)l=<
zaZM~smQ2}eXBR}nS(+_*+;6Y7$ATkon|?H>HmJrgyoQGR!r{sVWacIH*I{c%3^a03
z=S#L|&SBDvW$Qfh%TD5X&I?^I<aL~EyT!pwQDam}p}v9_S7dSQ7FH^jU*8-zF4wj-
ziM-k?XR5+NDOA&aY`Q++(Ixm8u(PTJ5kVPLpM~RaMzS?)h!PU}At>JK8!1w@f75(f
zdTxNTPnIuw^`o;W#(J1Wc$$7g9`P>8(0r)Z*|*K4hNDQ3{09Z_`XjToh%0zRvow0I
zmzw0m_=*d!^Db~geU~Q&o$hmSxfvzJpqQK)`gx5)CL*EPv!is&yzYWGv89jWc;TIu
zDZ~6C;ldw7@6YPllQL&8Bc+7e&G_-5hV#D2`Ee1CV&a$salKv4tU*IsJZ3J;i?89^
z%miNQ4{xnv983qhzX^6AVQbF0q@mfgt<~W|Hk4^tcY<)sUIqxS>Z^0rY@{@XGWwj8
zM+NPaak0E+Sh6Tr*5rMW?5=C0p_B<BMWds)Pmgf2tm_8%SKX*1tcN$8kottzgQy+(
zY#TKt!lTf0E5~S-Mr*wpJ(k^W*2q+i@y2bU{A`Y;5elj{u4=O#CC-=2M3dQBZF6k$
zX*%d=0zKCp(#?v=R1hYSF&>oN7g^Ra^C98fxI5}bbjZ8bkvJInA`{%t!!Sph?7KaT
zrFr0KkU!hmzSOtcunwD;e&>i%EnJK{h=#=u94uE4GVaN^dhdLj8GB#V&;K*Y5J6Df
zti;SeIl4?$)jv5K6u%r*=N3O4`hK2&gL`Ml8|A5X<CRi+e`s`YxA1(EL?Nn7@5W{C
ztU7*^`5}a(5ZP>X-_}p#T{!jO2j=bCz?Ua@wm)&6R5PPi-HVgq(bcg~gissezZ$|V
z7$3o3y9F_4=g&z~d9&fp<uNNH@1<rBvfjDZk*tX0__>?k#$pvMnOnMZ#4<s;$+~P=
z`LX>cV|H*aO~2{{La>}Lu|G2|3WRTke10@VVZ6A`ah<iuK$oQP-M3A_q6+Gn<tcXl
z?wU{a@`cgKYldZnb=Jrn3m6+Z1*2>g@H5aZ`^=v+r4}}bd954a9>k+;6O%I|Sme#%
zlNabn2M;;^Uq#WEOp(Qe%uJZ=iXkRGnwp-OW=g}A7tS;7pYMl!fa<$$kjo?Ph+lJ#
zasJ#2?AM23uY(7>wUVCOJut?un$|t{V$bLhiwn^tKzQoZ2)gbZ{p61&pLnLkJJ?U}
z9nh;|^H#`SLyE?+kk+(g@=C~?|8tPf-1i*^)K^*j21B%>VQuNpUFTv3euN(AR}86y
zja!1>9XdDh#;1|0pd0Ot>eF&E<E<H{eS&mxzI(^0muw*gC~Zg$x0if0iClfWD&PIP
z`?kI4q5j+LPEnaT(WDXi?0RB{DtUcFhKS#j(Btogwf6b&9Itcuv3EYDb7PWBid<>I
z(d|Cr@p;Gy4S!H7K_@Pp(2RJ5x>DlP0!sabAf3PgJ|x7TrlpL3#MK*ZsZ^218W_*M
ztWRr`Zi~Sf@W|}Nm}Ct}ib|MiH^;pUAG$m}dmd3`x6kI(!`_+EsI|X0NUjq-6BzQS
zN?vGRWj@d*YTPhX)x=UF3v+deDcZy`Yz(f9Ti9;n#%Q|k>gurPZ5nUZE5mxNt~?f&
z@u<<`4jWA@CM4Bz@nw`}Jquojbk;fo;}R1^T@i8u0Te_pKQ;^tS7oK#`C~Fk$#fD$
zb&mSxI-8xs&~6gfvhiEFE*_;ctsaTrkg>vHcxV!80L>BF4$q93?_Vs#Jg8pK*5szl
zy3+N8TqQWHw1Uri>)@dfGie3|10aw0AaB7K>?XCi|9U_As(ZA*_|{NB8M>62u=4~c
zRX<)$dy8~Wr07idv9C-bpA(#gkz_`h&2;^$DbA_(I>|$~H15~MZtO62Xoxj4F*=CC
zqh7cqc{7I4#M~Ulu~;%v4`2GgFeDphk1~P=sg^Q3Juu50-%KxfV9IwWwz(n<_m}4r
zO0PG1<?ZLKQlYURWWf)YfA+2(_UgecEAd%VTVvQXS8TB!=yZ^K59WP)(3~_c3UjY3
z9%F597U%o6jE#{wjpQxtMZ>3MK8h;$s-E0W?YYkVh|%P`_|X42T5x&IE86Px{?lEb
zX@ooXe(O(vn9zPZzHndkz);wC(cY!-BnGGJyB!PMq8vk-eCR?{Pg~}OY}RGoyHQ_b
zoavTUz7y35%IOhKW+9e4FmvGCP-tAYVAi8nr3JGpQn`SUjL_vA?D+r@dCbdtC#&p+
zuFRGd=crprDIuEo#h{O>iY7jlta3-X=eR1FRB29ZwIQ|uH6d4wCh9`9Zsy{7lh}Zb
zmq!H_8cbi-&$v;IcxSw;&E#9ebSe0{mk60msxm3Bb2fM$^g+L;nm*ynqn)KI(yjxy
zGvc0L@WLs9nVHo0#8UgohY0&_df8vcPsw<y%kVS-+kKTWB<18&MdJkS<c6t@ShM(s
zmz2X{_xupe5}BdB`sO!P-0S$z-63rbvD}nPZ4>RT(3eO2i_8Y(+LhJBFSu6#-RU##
z)yx0H8F~(or`)f9<|F+J_SKm)O`z|;#lPzMm4k~~p#MMqEx;QFBn03>{qXOeJ^MX?
zQ}uiQ`Ps84K-o=zBXtYlO{Iad-v>BZ2Y~z#$cKUQ=Kwy|3c#hh36%ZjPo6!407NN0
zkbmJHpFR67z~|!q!?R~!2igz;Qq?B_Uuy`^!vpG@K-r%H#$W+mbf9lM`3wFPF!aBd
ze|7fFe<%OS_}zb<e`Nu5ou*U%RrXhZ`WgSq^CI#y{#E&s@H74uN6Blm+&ZHiJWiC6
zow61cB<^gi%hoO=%zZ&lTbpBQ8W!_VP)uz%$(~AcpJ*+Jm*tzQt5FXKHpiSus0hl0
z5_?~%O2JK~RW`PwPt+DLa&0fe*{9|z4S98ZL97Gwr-@j>g3fG<Kua?qLAbHyo<fwg
zoVI8Hdq_gIL!yBdn{7RmnZ{0U?i%l#YKuC*QY2J3ZdqJA*2m&l7$y)qqsVP}du2m-
zcs#XvuS6G1Np+q|;R{6NEcOTo7J{RSYM5+}9!j9+Fj9h-;jbwohB2K)Dc8Y+5~%7j
zuZL5*7kZc>Vive@5tNE7Qv~6U#I-wQFni^w7HFo|Nv_6<9>Ga)sb0F<Z)TKMaBTro
zv1>B)Z&lZnCC(7EvX&vV>ATns$E;sxo0AH4Nw^elotxOy*Cssdn_R3;=R%FK%{4Y2
zBsMrCMYn-%qK?xeDXx={<&6?<`#LXKsyIbwS-W%SsAz#NBI8-AoG}6eaWg)w!YR9X
z&vTTwNRi7rk~ESkTl;4;z5xxNoaQJQAGK~B%wfr?oi(fOKoi=2tO--xYl@IfDyD60
zWl<am^1~*?mqg}(o7_fSa!ctTrWdxr%e?}OLgbt>l*7{-39N4pQGy%^DxP`-V}h1y
z2w%{tNKOd_CI#1%!yeQac-8`DO1)Q!8>*<P2;$WnW1A_$vf#r&gDOp>O$6MQg(e@h
z$FQAZ$n}=GOv{kQDn4^Y|7qKaPsy*St<+i6&7_T7AEabx`tf!tKbJkbFsBO0wdZb5
zqh_vg1#p^Yd4;lC80tlXguSF}RjVGWGMO^mebu`qH$$p}6nx43Siro=@ROEeu9%E<
z68+e=J!Di!Ax);We;k`Da0H?rbf2@zx6g`<xJ*^0)K5_29)&169PAa<5o}yEGzW^r
z(WImMQJHE++}^V;EADNNMS5D+a9q>~=ueK0N8fE|w{P=gA5jlGQPF0K&C4~dJKD$Q
z7%g05d$JZWZj+VqH&^f*v}Ht=ynGN5Yt7|*0vn@=+?=n^isy^7o^Wd|`RAqMqA_Yl
z`XKiW#G-=tMzlieVQGcDye+U6)v~=YqlcNTj`5qQ_>;Gc!t0DFrNcW%fzPe`yYB37
z8pcw1RxQ_T?)Aw2(s?d?IGuj~arA<Ym4N+l{u4j%YHPiseUM>0&wih)16Ohb){zA8
zlBtiVT(T^|oi>4kf(ySs$ngkD^sGcpuS#m8yYBO@Xnw!udP2Gid{i6V)Uo%bDnrg?
z(v7=W@>AA3?aK>&@?=*0H|4W^1eDThb=c*g{0=33d>OGWqJ&F{;TTtw>ECIdM8q;M
z#5!Uq<<4T?QJ%9I^b-ra<8SF3y4i54G;h1<j$#sUxQn349XwJ!|8OU{ynb)>a%D73
z&a8a8^D=Rh7#`t<iL}lMlqkZb%jSU+KF>s!1Dtn9S5+B>j13mtuv3?*E;(Z0syv{y
zicmsTJjkwD=S~#ChS7s+vr(`rKS*UtyHR@!+H@&^QF)8Y^t{V=%P0JcA}niG8Oslr
ztM2JdT3kMd&^VDTKf>Y)i->8huGVo1H^tSZ+V*W34SskW<k$-VfoeY2vB@30d9+LF
z#RN^9v}lR;uB2gGGEtj_d%LK(EO+dDw4}TnMSG2UL*i+6BUde2t2$<Gb#=<rNP@QG
zVy`3JyK0>0kI$_6D602h{KL+!C3f#U&*BaNLwCTPR?<ehxV4Za`tI}RDr%8*-+kEO
zKDMPBSezPX*oMI7sX`i0E)dM58-trQODz<GOO_v83snD}3A?{THrsLAYna=x{h2tP
z=T~{|+$`~Q`bU-tp%LrN>={b~@V(qoD_do|xU5~@@=~|dfzUqqn*^!4r4^*NWA_6v
zX3uo8%oq4U;02PBPaes}b-5&0-Y*&$4M&Nkn;w<&$_9RN_)5@XVAN5WN$Vtfw8@@@
z82wYKw-)h46n}GrScA!$hL~fulA-8Sa2O~T)UK@SSMA%yeBHu>fCdBgA>)h(Of-~k
z(vLJ^j5>qGK4C5fO9kzcRy8}2;%d{EdnY70cf^ARzIoMz<6@~=P;NRgz=Ml&X*Q|d
z)dnrD{FqTPeO<LR6!1M>ZK$F)xmNFzs1~BS2|7Iu-qy=VVWHggp3ahw%L(XC&~2ws
z+RniZw{^crDsW7@e$4dn^4`U^O7`pK8Ak)^S(_riMfTZDK2XWJm~u&X2hG*m^71kV
zd%PNw>B`A~^>1kJHPY8?>q%dCU`*Aafo!_JmM-wSr|kOIU9!14h<)3br1;E@_!hO-
ziq5CsL;dJ3<@|)pu2tD<W-cVJG=8O-ObeCUKQPe-3;a?HxAZGt78P0%7GZ{uddKjn
zTqE9M5wTwPz}dn6@jcS_xi0>QSG0pb43ZwPbU)!~w}#I9p!xPCbIAzzOs*W<fNW3#
z9|XhvdAUAQso#9MXk5u|iJ4)`rp8jZ@BRw3DNlvBO=4r)9EA*|)e{NA?mVI1*+JPC
z3z4i7B}9X<8DSyYY{;&qv{h-^pNG#`cobo~6XvQ83L<u5a|weyijG90Gc^gEw+-GE
zL^iRTv!PZ%7DFCz&~dVY3hCzxN9I!W=&Y(SD#hbnWPIiNB>tX%Ip@Ye^^TIGJ9*1n
zS}h=k3Zh)xWFu56+AE;%Jt0F|R5q5$JHUqJewHC#h_={bm!*X;9rPdB9*)unLCW=~
zxA&l6gMF)y_@4#BfjVq%?WgmMx8ZuD{pfp(ApfJJBA!PN3ztaz4?NdrXCS7ror4m7
zl%pEiT4fCJgMB^qC4??rNHa#Ka>f?nY@ZUGI`VE{5bq{2p#rov))M&DmQjw;FW_gD
zkrnPRO-Ph5>_>GXK=2*x%R~((t;X%~ErDj0Oar0RqIgrm`A%WY8F!gLG((t0OdkvU
zw!FvBCX<CCyO*V+ZOyr4I}fIosBzmT4wrEecyBJ}LMfWe4#dBMU$9URtM6B7F?eOK
zvv63OR73<D3syAMt)`*#?U=lb!}IcMMqiA)7YVMQcA|Ck{5$kT1kB2_9Ji!~WqrDv
z=%}|+SO<Z(y_OX4he-H}{YY#l-$rE1e6M(4ORLTZ9nRD;L68&HTrwz&y}>Xwdb}mL
zjgxV~#~9dfXgEyQHvLAS6r~W<*GsHHw{@B<vn=!db7R#GP6p-jNkZ--7PA-{>Rh`K
z9!sYZ#u=y9k+PYDODyfyr66UfS-b?Ry{;CIHil%Ic9BI%VakF8TFSNpSpbntSv@03
z7JMyClz@e7|Mdg?edBRqQ8bC<3JqE7I5S;r+r&7ix;zmULlS0JgHENHJrHkLTk{lL
zB~gn^t+KQZ-@0Xz#kWs+qWp}TN@YT45E5%Vz9#2Nh=*|vRK-ZpC?~rc>0rGI;eFg9
z1ePK8suoLQC_v`AX`%sPYo`u)WO(7YHqDt-TxY5Ibz!x#uCl__L7bxmm7Xk+ZIYyG
zJF6+v(3dRxD(}SO&W?q*iK3Mi37qJD4eYHe*>fYwJ)6h49tRb%?;NP!sXk8S11$Ez
z`Y}FakJ;nwP3Q-;LQ)ZXbkD*_?zpD;i;FUyIP3A;=P59A4`>;cd}Cn(5wnTbz-YXF
zhDm@%JSe7Odxt8!4l%+GG*E4=>)tjUGbp!_Rtk@SFBQjF!$(UGE`{#+0zE~R?$Obj
z5C?FUD<QcY3)8H&HW0*W1wz)+3e-$S*k#-_WDUp}qQt^uh+w+$Fu8cGN5PLa=4FCh
zq=bM-BM;=QHOXj#GIAZ@oJ4m<D?KudlxoDOR8hFtv>>c!<)b8UY{BfC6D0)QVQ~&3
z&@prYqW@rlClXAC#uz#+!Y^a4OvfbZNLzi=e1yMuGv}txGxMd`ONZN4L@1vwMjLl}
zma8w#ZYNbKmkV$AF7U=Qa@yrh)(rXXT9_K>*<*>`K_`WArJUhOYo2g6HEcQKwIzdU
za(`KA-Xkvh>V{$eh6sPt!438-@!cZHBo%v)USb>f_qE?kP{G{N17j5-pCyj%?t3~*
z6&K<Gqv3+7IP51M{NE(akSfzyztfcFh%jn;n@e2DUX$3irB3R*n)<^>+o+f|PFgr{
z8AssqNNV2SfLE}>+aJrodkFhdN8%w2zc~8oMCi^82<_MpGl}az%)s;BRsj?H7Gcs`
zHcg7oKKCf@Tf8ue9!<Ay;Mxx-zczpTA?>Vua)7-bPk0rvD^F4qc3EHcV2hlQwU4XZ
z$N%v-Gz~6d5AGTlmJXJMiQwZ@=NJKFv=6-%<-+DvqK4gBTI6OnggSbAsOy1+Xqewz
zyt@trJgNPow0yqaAm`uQT^x|+PP_!oQ|H3njVdDW7rHBmExB3|dor0De1{fDHk9r_
z9a&6omG{9c8~pq0Zjd7`RR-FI2}*ub4E(n;7_lVAIrXU~cUE`J%9#948#R;Tq)<ao
zVC(C|aNx2-2O>ny(_^ppI&H1qI6)(NsnnlpYb%oh-tbtfPvt(0n6TP@2o>Dx9gOqF
zNLMs7dkguW6h9m{^4}pdGY-cM=<InNPZU>3a0zI1dJk><VavrisRRkm6~oL?v`XF+
zr*-r80ios48&tE(g;c4E1*pRg(A`l6bi}?!2yVCx#cIwq;oX=quJ5y#c!WqcccPI)
z48x3_s57J9S@%OaolPUZ(k+6U0N!I+YujX&P%5GQlIR|dzG_>#w)Fu-B;j~Ckb4%=
z(v*1t_$5KcvgWBZQt*g@J{t`na43v=84;ceYIRjdZ6DtrZig*ED9y+yow_mKCtb1B
z6IIkONMLhGr<=1|gqWV0UePUWlVQ+W#Er7<qF-f?$DAAqok<dnIdOdq7$P)T0=YF3
zc$rR^6G=8OA<)XE5sWW{83)%LOoPKG(3_>sdEC-uWF=ZH3YlGViyH_TD#$3<6?K$8
z>X_Xqnz55>dQ{wM>z-h$d7JwPMMwFeZu>FTvvCl9lK(`VEdd)F!&f~#So1A^l4UhM
zrt;Doo?@YMzbV@Pq$1LPuzpaST$P=CH%VLaU$7rQhqqjOZ_lT8Xw}9`KRA)#1)kb>
zQQr4PY|%$=@59Q|QRIF2(8HsJ?K&5vNTkGg|Dj(**_=_8w!z`s;jV;K8klQTW(^sU
zRC4d$EBYCnb96RA%^^5!Rqya1`O|H$!zGqe$BAZ-dxqy+PTLmpiZ?9Bn42o3hsWjI
z9O~gycp&oYFqVWXb|l=x+8cbfE5`Ek-bMni3~u_|rYS*;(|xrokLuNgI76Ktp%{9c
zq)oqt=#?-GoaA|HMLk}C4bpx2mVJiwz!nnqajsIKeMVba0oi<GVq|?7l`3~lT))rd
z4=b2gNm<noOIU3&b|Nrjo${fjF!m8@&uGL$9Idav#|kKOn;#U#D{M;K>^Gqg%}L~;
zHxRvdyAE+_9lOLy=WQBWzU$u+uX&O_j(T{JEjO)^(@z($9X(T;>VrSG$XOxoyQOON
zr^W<jPI~|8GCG=#FKOkQZ~k#(^&{O#U)EXMV2)Tnk?nZGW33flev0UsO@e2FL{0fI
z&%K(Web^NB2jcVE<^$SR!UZBS)tuvW2y5>Ma*D#Nig-8|cw&?;X7%!7SV&HAdu7nn
z61-_jD>VU8Vz`GvZ$5Bd=j==fXO)~4>k?6ar`%4nm;t(OX|-<Yjz#=1P@5M3y}!Xf
zQ7<_>`$pwU+>C>Y|BG#n$^^-oYLao4TKfkNwXZk}t@v~#|Dx0WK3ZmS4-CLuf0)g(
zzo<hxwJ(2qfG#jY(@Ep)uYD7B+ZU%9kmf(c%f>W<pgO4|y|pzmLuQ{Nzn-6hhGG#e
z?&S}66n}@=@`if&7rNaYQDJa~F@iUJC3Ar)lHQpN6tqv#hm%iZO%1|P>Q1nkF0tlD
z@2x8*ea4kL<2P#@^Va6=jE==2`4;)TKe{??$Y(KoF^q+rig7#3Kla<0=rC-tFWR}i
z6<E=zM>iXmV4$ucXG(XHnUdu3h9voy{y5rk_&`4M?lB8Z&0Co(PeqRA&+dCFey2?T
zSLMIe!>A8i#eGpk!wZBI>ki3|M_WX%WF|k{*w{AQv+Yjqo)95*;E}LcSdky?nP_y~
z5rbbh6!X!o)W(4lOP^j8SKjreK}p<|@Pv{?B=Sdm>`-N!rG&cuP`+}P;wJgz`9}_&
zMz*<UbyrLR!7}lwrdSfV;l(_wj4NPHsy=ThVY7^{N7p^rq}x1`{ILAc*s!`7;o8U;
z9))RwYD3e>tXu+Z#-}lHkghcrzUm(@VhR+f-Bl%$wjStkj#Rkdb;}Ub;l|<u>Ide=
zEln6p6Ap^Be6}t!o3js-X_L@iH*5lP_QWD`^`*2<r|^xdI<iB`;&R@EgFOlyp-*}%
zRGY|)NFH?Wa7Xh{<XXr-Y#w`7CcPx3x6;p6Jj%<;_~MAV9y|#7o!BOuJV#0~LmAXn
zS_R9YY5L5y;n>)<*f8hn;vjS`mN3hWA$3u3@o*w7mF!pH8w}b``hsTB$%?q>2J4<W
z)%w2s9?c}&;sus6Y@cMWEcfT~R;R<sQ%$4tKVp(-^ULGZRj{0R8J8r^at79$)N9BT
z!9T@;4Amh^S?pW4n=xL*@F2fE74XFUOY`c0oeJ6KH{_Okj~3zyP*dtK9-nNhjdv{j
zy0%z>Si4J7Xb>~J&O`-!q7Q0~@V#P+O@zuBMpz-Gg*Kf9Nn%aA?qgFV?kFDy*Jn6w
zCal<RZL-8mD~fBzr4aB@oQz&ccf@Pi&9s&~PbxZSl>r88hn}oV(EAo5b+c`h3X{nx
z8eJ5WPlgh6HLP1`n3gL0Wt7rZk`eT_Fzd>&hf20?dSwqJoRTKzR7fqAnPCi@Xv`z@
z-Q8%`rGf=MH16ADr(o9$1*kf`X(tfI$MpQ6TMMqqT77oLMD?a&b){}$t(IjRgT!Kt
zoT!bg;ac&L8c4oR-7<id{rKCd{jE*m5*H=)gBg1U9Zcdt4fW=F#x2BDZyh#?_6`-S
za82f#b(+_@;U*TdH-n;9;S%@Kq#QRPu6YEl5&iU5!`@gKQ8eYMY|K$6ZcfD-xpI9T
zH4#Uphp=8;_@2$XGDyvbWLQ>Kyt>$9Thfyq6U7hi<TV4uoX?Cy>M)+!qKgir;`QwH
zAIzIM*Ps-__Xu@-CF14HqaO2SIWQT$4iJIbQiPlM&XI{WR<{K#l}5#+_Y1L+>89d3
zy2_9$Ea|7>dGBL#I%g~|`#!F>3{Q=>{w#I<+Lb$rRc$O_BX6}Dk*M1WTMuS)l_^(J
zCIpKgc`aDa=)3#<iO^-yy3f~zzsAzvSUK*NWBLg!<AqP=OV3TxuT^fROJkw$YheHV
zc|Y=pPXgX;im<aw|I)s7=IpKUFE^;iVn1$XxiKH75D6Z<&(R0J7JC&$U(`}Bfd*y?
z{1K>jsWwF~80sR<5CQnb3zKbSqRmFB$=SB@meo96#HwQ#GzS`uS!0G|Y<ur)wOq)m
zRGBfNJ+oQ<Vg7wJIdlEa97Rf-7E^RN!u<veG%4=D|9b0<|Ch1%jCCbF^Zdpi4D1K*
zLN>6$#?Fj(XGa>1T2i-Koo~)3=fguD=A3gX<|2!sSR{)%=bSTncyhY;+}O9H)M{xo
zni-829*;2|?=I{$Y*?-yW3h$-113;-tLm*P7RX}pt>^ju{|{HbzkCyUxbya-$c^yr
z-CMW+cx6LP0MY(}<?ZXSLC$A4iH~(;a;3d9vFLT|{1`2NIdq2WKHBAleM7ZOY8Pf%
z%A1LRBrrYc26q^f$?+r+Yx4f&7y3#r?faB*HR}+KUQO;%VJM{<@*Ce>{`rfw@8)c#
zfBp5<7RFv$GCm?Zq-IoMO@f>nD4zRro9eGlNY)}?pom<uMh4HlcAiiVCZuS>AAM@l
zvi51u9lCVEC;MoK#*+mykf%HVwPd!|^<tJ~hDp-(c~}F94o~M0Et`uljPEl*@I6)v
zBrdmtAT1g^t*AafdsjS{v}dqU`9k!&#LVyq&R6AUUg*m=sxcQDDS3hqQY3}oZokB^
zSGs3w9$j-qjEIbMr@_>Bk<uIbWt!$~Brh#dJv$V|IUCXo`1f5K-D=y>68Yq>Q+aNc
zrlm&JvAaF>*6;|F#f`_JxJ>AJ<QyD%HJR<Tf)3=2a>D>VYd*CttZ$DGXT^Q&zEzCF
zvRy1mG1Gcs(@HaI0AmxbVXM!ssbrByc+_(y+xo(-v2RUpN^eKHd_8U@AiVKBQ`)oU
zU$}!6uhtPc-GB5Retn1Xs;FK9Q<s&arOZ9kvs%0&Q9E6#CW>p_z8>kQ7A39=*;kXw
zRvqtIO$H8L{5BSggn&Ek!P(M3;%S?~*6>&i+8R~=7J4nQG5O%rzQIKcf7@MHTKwKT
zlR2H&EtB3;-;L-25c+XwzVd<QPHCB$eLEeL-hICTX{xno+MamIj{4#|1Bv@WSmejD
zTW=Hyo6<ja;x)8OcVbi5JuK7Hx4%=lR#TDoS9EJWs}#!9ICrS<$8X))y~BIFP=vPU
zOG^T}@1A=l6}4|%>oVrU2fu<YkBL6ws7{@|On@>2gaU_?6;tuBPRmwsOTOq)hnS_D
zIzCC-a3TsxAEiKP@f_3dZNsk_WEM6!D4}{aAn1|eFHT>|AB=1J3|eF6b1wMk)Wh6g
zG{%(WtmJR=?;=|G@;G$0qbGu7dyKv-kG|Pwyh>>F+r6`+tKMpo7Y*jTRwq~9IlMRt
zY^KNDQuBB5<$Vf0{Q4{0Pm-4A%<>t(z1;M74>VU<@>6_U`YXdC$Y9zfadL_C@Xmj<
zY}jZuA#y&$tvmnzwoUPy#{GG2tfWhhttoI*^h=D?xgkhW?a3+~$#H1NZ<O##=c8K<
zG2GF5XVTtGn%|!GA%jIJavjBaAA{N2k2y{xa38vE)y74>rQLTIR}4QO$zCXJx{t2Q
zg9*ys9a%Cao3N5T8SdCl*)o`1_g>*11ISK2mjCkk!7RMHv6Z!M*r{fYENtPAsWPCv
zzfcQ1Ih&Y<azmjQeAb$#k$ugNx*(0Kq$sj>xL{)0nJgfpBM4;vCQ(^y_y8SkNqx7=
zoEI+!(Iw}N`blu2R?6CQqF-!vUJVW%u95TOllX$waX63*JEw#T_b*<QEpY0oztUN1
zEGwtCUIxg;j5Fx(?+kp=SrD%yG565Iz&)fEVKV4;#|rB^fmM~r06f4;!-LhQ3yCZ*
z?5ee?lhDC;_&?xI>*$7ycd2^$l*Xg|W~;_+#VyY4uNEddsdIrNf3<YNuXwAc6k$0q
z?Eida@MkK~VS?rK9p@i2m%GfKz%s|cd>r`q7rt&WI`4M^5B`P?-){$tYlAi4`VGOb
zc>5OZ+LFoQE-h^w?q2`k2;o#^y83vQ2fnPvrWYK9c9kYV{L1cA4z~!AU*6&rdZx@q
z2l0RIdc8<<ge){Yb%!yBtIRfGa`bcl&A!s`A$Vv~yq%w3`^f_$fqwP{TLAhIx<Z!l
z>)+acwFfDlR|WJ>AMYLKJOdZ^5`5ubrWBZ~myk$KiVMHwDQ}9M;AB;*G${bG&5cGu
zWstqIKiAqV{v?hJDwCRfzPlvr<`uC5*$EY!MA6-uQ<eTQ6QG?t_RW_o3tQ&VrJsD8
z@nQaYJk;*z!GgWca&6BqE*k2Vi0e9hy7K^;{z(4tA-&npZzk^qGJEwZLv|yo;0!#M
z**oBQgZZ_&7=SO1DxatD+7r(M6WsbY(x;+W@?E0YbU}Wr4ag#$&AObkOAJ+kpd<8|
zUUA1QOG#N9hqSN1GvRv~?qvN7sHl`UT+!K8+MN5-!0fIToZ5TfSD^?ed_O_rMwRBq
z_W@SoOvZdYmA@a&Pw3Vh+n)H-k1A*iiG7%S|FJXDy)7FTc-Svl6*axD+Z^io@fJ7d
z;Fwf>qxZbB+mj6Q*;RVu!|xI4o$h|s`vZ=&jO_ntQcG&5$)6XC^p0uGRB!*j)<d^K
z+c#<PFll|yx^)z7Bx#q~4|WYTGEV+fVcys!Jlf^7pj$(k7FWV<hUwBXj`4?4Inj_B
zjB9^-`?TKGXZd?kE&X}yNNc>s?Y|gZ;GXpA>;u!}@}C6A*Ox^xRedtt6lhD$wy@fu
zi*=kzojDoR@E*ad99nA{IUSV1wnMg+)>v)cg1K;S(A-&1!O<M=uzLB3otPT3sw`s~
z_fYozj5)`%>Nam1H_~aCY{HMu^2w&7yy*k@Y>+N?)5wMPZo6I>OYP}KkITuItLh5%
zj=ZGdx(1yPbAFc7u=U$=e?bnz=T)jvt*WYnB2Qw@%8e|0?5YiODJtDVj^*pVBI#Kv
zZX#<LcrKROB{^T*Rj+TgY5E}5@eVrVh$pY$?~BL-R@@Pa2nC3JtZ*`JJLx7hV`X@n
z)IOWp9agg?<PW<Mhj3n%SB_jK9;D&3wwhUw3m<kqI*I1Xx2jYR38|I(61Z&;$;3B~
z^oCoBvij=wrk=pL8_TSj^0@ivc9ERRn`XBOTa-^Lq1wqM4n^b7a>%$L(4<d!A9%)s
z@*0Drun+F4MQV+_y&ZHf)P+90Vkr!9+z?~@;i$Bdgyas!`Ss?)E^<?qPB`h!Yj#l-
z${w<faqumtH8IMcwst483qr1R3N5i&Ka1p>u+94%C<M!dU5&B%gqyp3YJ4nkeStGa
z=lhn|PF0jvJvLU{@+FFqe5M~$cSg4TA&Xe{KdhX(K>$`TI-x5b@A+ziHZ69o69_I`
zgx_fqSAyh`@PNFVwwZAU^+u_=fZua;SssSI`%oHJzZ=^7UUbVuy*`Ta7q(n*l%ufG
z{L|(2#V!Q~M12!3bH=~71jyBNS<6e2T*X)&$$ZzUxn-^LYyRS_HGeem<@>ke43=KZ
z00<Pw(}k#ox_W0)i{NV7Hn>(-jpi(7Qb|9tr43Jd%|UV1$#+Xa56rQP#bnz=Ai10R
z0b_T_91<W;T953{Hu}D#w`Z#yE(IC3bv{O~9?w!pPa2bm%Zs=aqV1TRNZPo?8%-D%
z*?{TyFApkmDeWog5ZpboQ8gb4iPW{xUn!xJ)b2yQFcLM3%^aWi$m3F%+%P!Z(6~b*
z@o38=F$eO69r0fAz=!O5T8Ja3XHD3f6<&UQ<Km+*OUv3;GauJJcvnmNq3pr-V!{aC
zcw-HG=nX9KhGfYkO=3|7X21J=y*rNolfeB))iVj5`V6|ZxJ`~Vx3k4!Xk9Lot}=i}
zzc6svPvoC{YSVZdqiY{KCv~&z<%#FJwFf_S6V~i_AfTkcW~=vs($(d83!;QSlD~Rz
zH@1H=-<33CqPTI31bjGTHOowM``uYfD8|JA@cUh(&C;()ye={HQegT3J@ilF*=paz
zI$)D1UsG<gndVj#pcam^stqplX7z{$s$<T|+VTEQbVk}<fSU)H|Iv#=)J{j!RRVcZ
zV)=uv0arV#KS*<<5}wz>3^&JPICoAb{CTIWI6ELa3;0iE^bXgr5A-Hv>+^Td|1*E}
zjSti}-dF!0{MC<t^#9_o-oW<#hR^zjY5ImS`cJQK=EJVoKX_k#h<R82ON_fpV3QJK
zqO4fox3LZWF*cDgR_X@UAH~{#hAsb#|NVEr`wH8Rjd@t<v31`61M|MZ+UHnb8(VjT
z^=<se-~H~JbNm+8m%!Hl0Nejt*z^MHw_wY^!RG%7#%JBa+6=bcKfCzP>=kyXf8fb>
zu=bnzZ#>k#jj>zrVD10yE%^TnxcZS&P7^Dn5(Nv}@4v#1_@CT0zwx5`#)t0P*!2JW
zTzuH_H!tk;e(&Y8!++qgjI8hc1Bdm$Tw6Z5b^Z5#Z#S$o8%)bl$qpYP!EFUiVOwKE
zRB=|MhIcnOd}}=rVn?|gd05XEsVeOeBH#in8XZPG!7Ncrq_M?hmW0)62-r3l&MCT>
z)K6#%k_=niXb9s&OYS7hY4f3!(rNMu_bkYKY(~&l^kug+rR&wJZFMUeEmYEC);wi*
zNg0<AT7G7}T9F&u_5({P8`hY00)y&US=Ao#)}~c(-inCwQcqNC@as5KW}a=1H9v7j
z^%+^hhmW3k$Q5BX9W*@-OSuIza})?weSkZ|iYuuR1h%(g5?@`d*Fn;zMU_(^cGE8G
zV*)w9-QD%q&0dFtjf8A^Cyl($@~;O>);KSdCd$&*(C$0jOn$|ngFLi_U6w@{_ahu0
zAC+j-6;ix#I+9iu_@W(5uKIZT3^%~lpqg4DsESFz^(LQ_R_GITV#rD(|98pQO1+eH
z#YwrA#NN(jF76z$w9~lZUYQ7f)LOjOH%9DW!ElreZc5VvqAurb7Ji~arTp%)PCvF<
zk3Q5xQI#g1ncE@EQPu1Qspf|Cs*Ir|Q^o3;@s5#>)=b5fpj3_kijv}7t6q<?pe}1y
zlHi$jadye>-s_M^b$@vhBEyS&R(~N0*H4Smgtj8KHd>XoxI^a+s!nHz<uXzkNmGGC
z+N?*cM0-!JHY!MiuwY%Z$5s{qOXE-w^jnV}tEd|Di7XJ@xV5k{l@>tyP}e@K-ss2<
z^|o#;_nKNzIei~>r7Me!K%f*kkOZL~Vz9oF=Zr76<97PW32Zr9!!Yl;NIIO?>#n??
zKJ|Ia5IKn$T3LN-Y>3$UsD4Yz)y8GWeIGL=PtRnqPm5iBPeSNpRiUxI`d;E1oyxKK
zy2;l<>3mc#MW*{&WoMKFQl{f0gw7*zbM$I{?GsbPwbtR7&1lx-oO1T=OAF2)yqW$-
zVzgmyMjO!QzrTC9kcuyOez^OfydbEFf~Txi8Gc*hqP|(U6FYVPB%<v<(m;_CV05Tx
zd(bPVYp+N{m##K~G%^qc+OJra7;qT7{MgGsw@H&+&Kovr`e<~L6K=R%rT)wkxEXjE
zhy&|<J8g3y3pBdM;7ajs+QwVrYR%=%uy&}(nRMg8UTRzLGgiE$QJ9mL-&OxG@dn9{
z#hOCbQk-K?U2E+h3_N^W*l?|BfJ!SOnNv=i@Z-9~*K)JZTCkc(JY!-E-36pR`W@Hm
z4b13)_A6`Of&cptJ#8K(hlx_@y=0!<tXl&vqLb%Nn;Qc>Rg(!h5FlrdMPGKiRP1?a
z&r;9jZ3B3!EsVAk0iY$%ggu9-9r<;t1`%uC8NT2TX4L{OT<SD?S)=K^tTc_ARhA%N
z+4i%by?O%H*{Ieze}v`C$Pidom$I6G`6ShKO6}L18B4UHG;JdZkr9iBpd&3iY%jE6
zsi_7Z>eNkpjO$gY&-DjNJT2?;+L2%fIiPy9$Pi%5hX34}x$DcJ4zCVT`Gl&nu-p}j
z55cPPl+Psb)x0fj_#QM-^A4|Ka8EyVNAzrd-m8q$(t|)p7AE`nRXOHdl+8s@Kj4iE
z%(eMY%P3FtrW1=JY1pwFjWw520qLvw!0y@J*zPWH_v419F<Nj(rZaAdSLCX8%|^XV
zVNgffwZPL*S+3BVC~=Kd>E_gE|2hDs3sf!Do3H~BC(&iI6RZBg3L$MqU<?3I{+D`j
z#JgF~^rE`W=6K1>x=wITn<`WNJvTL#mi-|v=^q)dHOyICC0H6R3jDnHa5b((Am^}e
zlS@Srg7}rB5JfCBB?M~Ko2_@>rW%U~QBVjN=*<E%xZzu~==BYkq-+jC9Gt|zgC`1Q
znB<<y;S}{8nN65g?pmlqGgzkRSUXG-q+j(HScsluF!m4S(r6~50lJmeUZ!R^Pxxm?
zPI_@{Zd9-Isw`CU>gvJJ&*U6WT1u<L`PhYr_)L<|W4^&8zN`77;|+;SAGh85zfxzg
zIA=qiZ1Vz$Wzv2;Gac*TX<88vsciB+&zID5&6Ral9_aqpVcVPg48HKp#I`?+H0lBF
zZ?7e_Upb$+qOQt8{};cR7`>_j0Q_uF(ls-=X3c@{piU?9q<Gw(?u^kutLFUWsk111
z;Z=|NV6|J>Khu{^_HtF%=`h)dAy^!;U-2ZWN0vf09d}{oeo!y$(YhjLna?b5WxRn<
zeY7j-siSf)oXqS-O)g9?BohVoa!I(c1@CD>f()J^veaA#d;H=fKQOaenBE7;8}EN8
z3C9;G;_qt&c7#Zm2nw!Fq9yOFwM0raW-di${t^frTiWr5eD_&~5fb_vs{a#WUv7wi
zr5{_|qJ7PYeZ2P&mwH26Eh@5dgTIi@Ifvc3kX~$rWAB*EOoiBo#-i_J?8oC$j9Ij_
zYYnj#N^Ly>PT;0WUBQ&hr3IfkZWi@SnAam+;zQ#;Fvit`T`0$s0D4xSCHA{LQbLWF
zX8CwKAS**7W$^);{B%mg$5?p(>{?G*yt@;U?LbBe|JcvS6}UWpxNmmna%;Le->rrV
z_eh~K<TVA9U!1~>)9vFiE44k+ys65m;P+0WCF8p8DpEhlE5Y-!5mJGZK07aJKPbH;
zit4kgqq93c<75Xk{2ej2=_Rd8=eHwl8rw{<D0EOeAYS&b6D+lij)Et%;D(%#*D(N-
zdMGa#Tmkz;4kfhVkTc}tgbb_Bgu|#=Rtdt~4Fe@hCwP=mo~#&m=H(=zUCLa|yG)2`
zU6-?7x^;^OJxd?=fX@gZO902gC`mBN+mXotNYAE_vt{hZMD=+#Y!)Pq`$W%CzHpIv
zxok<OUysv4Tjtj7`aY3(usX4VD4`)lyM`xEqM;XFuPUhH(a5?RZZr0UT%Ib=$eMu0
zs}qN;s~m<wGL+t<v*s$cb)Jt;ZV0vlw{Hf>1}ua{makiP^%`x!)K()P&S=vpGwHTK
z&Kz%R9NvRO*oYihtmo>yvztgzT;6s<wU<xB6uO}BmEDAIjFk3(!2@c_;&7zPG_Mw&
zw%Spg7E1(bW9h$?F@r$|u`0@R{X3a{Z>n8|D{OAl^-^cRMIXhp(x5<HdydvTg$i)!
zZ{#f+U{=ZB&u*)FigVD~R+<VMrq8&CqZYb8#4Y181^>0SBcYEKv<l=U#x_o8i&{-c
zWS81^mJfsNJEaEzd&YQItlrYHmez%DIUYFRvs9<WX%F2%FcQ<nEhMuaMOl{1V&C4^
zq$8!Eyx5OvpTfFwD&qCG^lp(Yf;fWVVy#r4R6{*?lM-WbQJE%UQmT5%95Ms;n|bAB
zDymj>y$BGFdX%P<u_+Z*cE^+|Ljyx!hK!d{%TadUlu+#oPmWJGpTs`->At0!<|@p4
z%2ZHW^@a9^AtU;9@DLgoVg)#*ki_&p@{#FC`>vkbE31B?xX;e%$017J7}VDyPxBip
z#b)I~Rjj=Bha)%CXiu}>Fi^Tyo422G^*2(Zp}LkWVqhNH^LK)zvsk@f01~O_RTcTA
zL|1OvJ0x8JpObwW(;_8HlM_~FURB|tqp#J5+s!#$4T|3BnV!j9eJ7lD9{G02KdnW`
zXNyxoz5Uuk<DmgQ>q;D=2@%Z|Xag&EerNJTQPY~H9E*Grzb!mY@!ypD{q5d-N&XuE
z2>&SHImpPiW#q>(wF%kM=U%76PL`t^ngg<YM8PH&8<HCWp2gVcy%=Js8?z$pAGVxj
z3w>x$?eAP<t-5fT-D@jTW<^WDvh(4C5^*fh)++ApLd*&YVxD-Z3BbA;b)`wn1Oye>
zoW|D>Xq(|5dKD!ThR_%2a+gpe>*>T=Z&=c8dG)=ht;QA|wM1DYdb%;bC?6lv^A}an
zHw`MBX4Vt+$(g-&TWfR03(W?sf#bD9ZO(@omxgv~NKzfX3Bi`6tWDOw9ORL1LdTy9
z!t<i)vVTYz7{d6ln*DDZLCoWVqpYXFo;$o$Fu*Dk(3IF!wNBKYp!4EPw?Qapf%5W$
zM0D#XT6$e<+DWkI+$y#_VS2#Jlvzo0nHRUjHylaD4t}eGH=E2xH|hrRu5gVT&9$X{
zfx0ELhDY&+!f4AnjAWT*Pwc29f}?5@<e<DmQOYEfsE)lJwcXb+f(DT*nTZP-b$8MH
z*xs_~b0TBeUv_qpD>x?1*{p`mC#1NSbJ$B2YGzEEm5UlGJ4Y#EY?MsL>QVm9tSJsc
znq<N}C5FEHrws!qGTKcaGQ^h~M!ic>b*#E@8pZkp&bM7ZT7QS@keN8z3<}4SiG{;=
z<Q0z?55ur{MPhn~cE92W5nmf|?p!bOv-a&J<@$+uC6JXPMahtx4pU*zG-t{I-m)Ps
zy?10wOZ+EIg82#C%5+G4UmksI3w|O{8BK5JFODU5d=w<1!i6-@i?y0EW6Po4va7VO
zTbs);7S7tX#R`^<*5qSH!ee`y7<%fTRi4Ci5k-~=nOWs>f@^tHQUqJMQ<?Hf#!dsY
zg<j0A^@Y45#EPbzC7U_cz|}*&Ktq}T10Vn{Xq(bUrmSq!$qEiP<$<<F5+X?%akhOe
zO^kOQ1-&9*RlF4sjW(*CcQ{>9Dn^%x8jcFALYVt(Z!ov!``beeqhSj8WB217%_I<9
zSo)-P>kFK2yE8so_=`J#K75hPl-Q?*6uoHLRP<AGtNJUPho5y}W|o$oB{#SVRTz@`
zRwjEZX#igd${umJGoX`zOH4S8`#^2?<IgQBnIp7OP%w2l_=pK|J2U5AUF>!(XVgT!
z&v(Lm?rhhYRJfqsXV)|b+w7F0t54Yjf+<rJIcpOud2Xq=st(C&R+HmYob6hbDa(;B
za@3bwb9y;<L_Es#VuUDU`;{*e$mB;BmLSo7@^0B*$db`~$^_>1c4);1(=MZ8gvFj7
z31&k7i><oATZ&^gNA+_<+_ph`u2H<?I4UHE(I)pX(l2azz+W(3*VAOLaK^y8_-oYD
zDdhuP+wP3Hz)M=%roK>G0ZV1_7bpvfaIwEccc5l-wMdpNmS*A=c4x@ws(fJ4YdM}M
zp^$EQq-I3yeZ@~g{KgUL`z$y-&ije`T`f|#)Vl_gMr?ip$Nalb{t9|#ef6Y|?04ZO
zeQBP1;D`R5ng;yrufF~dV+%A~D=B6XkU9*e2WDH*y|1LF6qlNjyu~-`LSjxw0;b_;
zs$#OjF2SvwwDsa)F=+B>_a<$Pv)Nye5TS&`ZfWF>)RmmGuLreiOF?H<D?QqrtDYB-
z`X$S$z?-r(QJLQsTgf60bn}TFpbUt-ZArfT9b3Y9=xs2(;EQ&oNnc6K{kq18qPw1|
z`+)EyI3>q>s^eES?_b7-=Z$PeonoI3kZP2nJm}>P%;l#aoIM?6h8{gQ2po>AK9`2z
zMCieTb$jdR)R|D)Hs+Ya8igy-*Sg+tmNDLGm0AgEn&JkT8Kt_VF5vS#>D>37-*cJ~
z6W!nsRpi&b=?+f;358Pt?QZ;0nI)1-g|1c>x+7{4{g2+hYjXB(y{D&z6nJ#~hoM}C
zqY;{U)|eq6$@~{mmO$}`Rqebu!cVUnqYHP)24}P~&$g;tE<uO;E%8n18~CnY^^tu`
zin^yptN0v)8wfM6VF2@Ui2|^;Hz#mf-OL)K4k}Z2u~s0&#p_@zAro1c2vPLNU<?c-
zMo~fFVmith@Cer_=#oe{SdAC-b(c2bxiX4aHb@CXhq5#U=TSn7>WNlf0`yyT|GvFK
z9|wM1sOWOaY+LEfV|bekHxL-J{T-%7sv5SB_e!Z%gF`s7MLO7bhw}vaxr2jiQ-Rtt
z<)Hg^ODa%(Xw%7<#Y7PaqLP7?cPT<Xdba4d#&LSJqJ;?p1p>@uxG296QGo^+T|6cQ
zP`0c<?LCYP6pFH<K9|}>2^Mox%9X3^kO;saFz#}`URN3!r0^=sMo3bexn0pZqUZ?F
zxHuPZFl(-(gD5r-YVNBGo*sN;#e`#SUZ`v0q{<X|1~_tLZO<$eBcY-Kyy53s7m(ZY
zb4DbxrqA|==*<^hSjDca+A)xW-|%}Vd6UQs=d9^Bx##mFW?B16xc|T++Fg@So_IEj
zIzF1Qx#(ctVM84}waP^+Zg%bg!p(#q8!n|Hf-|R1;5BaLm}zr3bt6#WJO2go4gL=)
zA%1hf*KsB&$btQecXeXtZbI^iv;N@Ctz}A_!_eVWbD{LRgp~AwWtGh$EXu%r#?HwI
z>36DQl%XhwENL@EHdL<IHEBt~!g|<mt2IVSU(*u3{OiwBZZnhoPEQ&a{p7QN53>8V
zSd@Yxm1Lf{U;0Y9e@~CLEY4M6<RIMsvJw>;k{)e&5e7~bRB5$P6js-8tsREXRQR63
z#jaSi%19Y%%1kNPkdk7KtxwiDapKjq;*mK_Zf&1+>Veiv?FSY^FtBIpa4;=T!)%6W
z-P+qui#kXYNDNpo!m(6dqwEj`PFhK^k+TlU4<*)rDm&rY0yi{khJChEyJ@ntAy->&
zRPpsoYwj1-LXhv2Pn!h3G9M5$lkX2)00u)p_(JP!qpPJKY{UAWd6xxr{g&6|@$1R}
zXh?=c<1!MOZuz<k_yx<!@L)sOhjz*g!)-#rGV+LkSTB+r@p$|w2VfA8G7<m-YEIxX
z{;#0G-6An|)EF^6S;O?bh$o17CzFgDIa5h1(I&T~p#CtM{Y*9XUN$@veHj#*v#exL
zs&+cJBtlAb2bPu#62tYy4>Lj24ZdhaFY+5h>q-cgNoE(%%P)cjSuiZQf?xsb{QmgK
z{x43ak)?q1i%v&G>OIGPiBxUK(d<hj0VaAskW}QDFSY-H%}{E$G~2EtLUNyx$ZZ2I
zhZEAoXieZ-E=lO@n6eE7Au+yF6H;@g=UOaBQLOJ#Uf*_@epm{8|7-DgSrr}ym!O0Z
zV{5Aeh{-5w8{x+RYc_Hz_Z_feIZN;$ggN6%&)~t^Bcq0ik8d%(#hHJo9$OxjHAe+)
zC_-WTtnl5)x7~Lr5YDM<D#Yf1NAKd^9HLIos!)4|C=Vtn{c*1}n$21-46l`E<Il5+
zbk5aqxDOqMO#8R&j;Is~m$H#i7rdSCR(zC`^Utc&t|>XOsU_y_au2$f`?Te==?jtY
zS^b2+T<D<wSqLZTs8j*7!taw%1f9pmZprQRhsjDT$dps0)qH9>8oO-BQ_tHxo10gG
z?eaw6v-HRvIy483RZ;`)gyd(`Xbe(>{WrIB9eE1HypcjIdumCYp$FurwKT20U5->X
z?=%SHEE^WzbR;>{6G_h_9Ild{Fv|-#rcJ6XGvXhgL0a`E&Y^c07#C1A?TVU!OPObv
zkYn%sj-_pXUjK`s_pf`0GjqU+KTRAJ?|I8@x8t|on5e{2JsVam)8o|i*z>xZ$x!?L
z*5`E4YS2Bc2g&vl|JqjudlmY^Z9hzBE@eil^b0hd>q9w4^U}28nD}MxbP}jK-4U@y
zdT1OP6^Zw4)$mFYU$$9$p|rJ<Wa$FRqq@T>z=FL=eIme%WAJC(Vhy^J43X_PwcMa?
zDW=MfO2Lb>xGauX9-+z)S-23+8&F$}T@w}n4M*$2u0~R1n`wqggC^iR%*vrW9uR1Q
zVUjB;C<f!RuvTNvNF8Nky)N%;ajhYhRqODoO)5@@<qvV>R(-yx9H`KBW?FDW$Y+<q
z-JB>i1)Y%31#ALt>FqX)cC4_5Q&G<IO3~98Jdj(Z(XIHCM+*I>#<Ou#{zBmL_vdQ*
za(qy(=A%fsJN6$s#%tY>+HaxX<_^TcD<1-!7-Ww>?m7HTt)`zD1gQDKfzPu!sSZ&f
z8Qz~+QP+tkxYb~0YaVG#A&Vxva&Fo;;5eNda46z0)ea>&N;lo~x4hY+yov5-A}zbv
zrNmjJNl<Lnh#(Fe^QB@dF(0o4CSvh?KYnk!_E1BrDUM&K1EcEK4e0?#gB8y3d~L!7
z>8x$m{GZZ4XzVzi^?&<l?BjiW^n}$Cuj{y}UvX4_%W}!@eQQVOj17*<fBE%IWJn3;
zJ>5}Wa#U8mnwtK{?|03FRqm-Y@X07JeNnU6NyW+_;TSs{`~LN;gd=HO#wXdUOtGOK
zhMO-0Iq0A?yG&E<8c+lvTCJVT8>XYa_Gz9j^m81v6&^3v#zQ;2JP?283PfOcN3P8&
zrR=kyP%B0ANx^CbMo;ZfE$Q&<?<ns-OBp^~2L7pc9p5ijk$RfGD?Lzl5d9rp_9eQB
zJ+<c_s2=Xzw*Efpf3}@0JO26~>skRk%e5!k5&g|<_ixNL^ZomOGLpJzl)(_PygGS1
zFi3xNbofGX%!>JaS&bkA#r^xtvqhI|!$TKc{I}JSHw6o(VSi8rCxE0JjCziB!I8~J
zn+-`S0}o$H9HK7wp)N7(qyXt&Ifdz(=Z#JTvkurECfOX<m{Q8L4izRCqv}Yt>9X~m
z+I@lXwxk1rq3lur?9^CFOs6UGdMB~i%lCB0>-)9JydXN*D<l+o5mcAXu()HcrSsB!
z!$W^BtzhRQ`rUd<Qb=TbHBnNaQm%Okk>0VS#Bo{QOYUN(cHhUIe?s9L%_fpPX*e&6
zE;M!<0UvGe35{2v!&RU|Q)#}ht*15JhS$XuIYwi=R2+oaPPazQF*W6|o!=IA3C2Wb
zGcJ<)cv7aC4zVp3U!>9qb_!+deuKQ@sk(k8%r{W!;3PUH(*!YriEwDiU48t#-3@1j
zj9pE|zpR;YF=^Sro_>%}KQnqhZ=M~W;~J2TPY&dE+L?7+P1gzr8R4|5mf@oLV!_g}
zIwr{lfZ7G`9%F2nK3NOrYu89C!Ci$`qevBEX@|@}6E+h5(AQe7t4R%j{f<a33Ict=
zbMk5^is4cR(F)#F@#gJfUdcZ+pv{Oh;i)xANHs26vKDF>cEHup8C73f#FoV4{#WHq
z;+6-*7;%k#C0l96Im;R<&5)_B^Y=i2z1Ga;1jLw4;~>LV4?<tqYnm;VV+qn=g=3yo
zR)hh-(95XAfhMSbXV=OLnq@TtWMKLg_iZFuY>OqTDIb&d7j*VY_LkGm2!$aQt2h@Q
zc_VprJ556YsmCZfyW(t9^c909IA;|{-CP%jpcoTJlwQanmnAqVA>B}P+{}gSw3V7e
zKNm%U>X=Fx5yz-vQp=@`xR+|_rbZMu$n6z07Rwej+|7C}7F0$OLQa)aTzP0)BwEU@
zpd-29LtO$f5wSyQfz_tjmLZ%X1koJ#Dt0fHo2N2`%z<3v7vtMQcx^k!8Avj9(Uxu-
zT&7CmJcgPhMJRndUyG}#T!k%7r^v%>SV#=z12L4`qUVgn{G=s87BLQ#v`l#q&FL7X
zD>h)#uyN)>U?6qo(F4S);?;bocC8TyrUoUC&>8YNNrA@mJ$D&3`e~R?uU@8U(36*I
zP+FVRH?$3?8Z0;m7CWSkSWgl`0Hger)%?(<t~!|lHmt%b5#=0f2_0w%>auX6AXPb3
z)Do*4gl)>2YKj&rLFSQZA5LDG($RA{Pp8=`dEH)j#p9KVr4pRnw#N*wht}QM5Zy*G
zs;nBO+a4@f>Cv3Qh%4fj>ajqbm}b^061zjSh35gT+j0(9uxw`2^fsS#*CvMZa;ryA
zHI#8gK~qt!K8!=$*n>>5R4CbTF{`&P&(Sf>qhU{L|HRjg2t^uo%^cj@R|~{VMuNa`
zX;E#b76#q%6vgS)IQiK;I78F^kkoF|s)9zVF%2RRBkHRHr#>yP`vFzKxXU$4!iN4b
z(IH5b*Y%zvm1MlW3uJSy8`}4+?NqYJawfUuvP(}F>E6cfR*!HYFga;J6Vp!ux<*0e
zpy_@6Xu+zHG$c~#RCbXxPWg#SFj`%<V+JAiH8mDEn$Lvj?TD;>{&$xDZSc6R2-C&I
z+*6r_Z?JlXGmy~8mdm+z^52gBUOvbbo4ZAY$pVBPT^Mq%*jHzRtnP_-um>K59jJdi
ze%*^icRE-7KHB;8bm}OuNpu-xk(;>A$SvXe8#f*rEDrE}$qs!{nPrN0dmW6(A(m80
zl+PEhmX_ym8@bYs*Ty3G^+gy^W%LGzNzAL8&3$R`9(s3v-IZ^;rDmTv-a8WtGHhk2
zK};R2KIbSzO|Puzy}F3Hk(j!#_;Xdonh*rT#O5C<wD0}<9E$$svGwQ3$72zPK^NVd
z)px$1`6I})#|{61>PmAQaUcEkaq`#UUlo|qRqstgR}dssSFTbuhxAQx)KF_x(1X$G
zPjydfTAQjH5B?8^N3%bx{Cv3Tn(nGJyj6sUJ^;l_fPerI9ej7(-(-lg=!QXoR$|I9
zH{vfi`4WNjk<B>pzD07mS}!yD+`EF2!(mhCJcnIc2AAifCG9m=w$7>Y-P2d{dR>uM
zN3zXb^+Y*F=HU8q=s>{j8Y427$^LxbAQi3IJy7H`l$Ra)n2c%xE0A+HYTQ%2d9J-5
zNNHnXHpJN^^y!>|tq>Mbbmk16H2J<H0faJJUz-{HO%<CPenhkwJ~0H@T*jQe^*o$*
z1e9WCYv)A!)=f{MWzu%e$#Gx@i>CI%M}}c{h?iRokal8kGYtZRjSx6@5t0cql5N%9
zlVLyi(wXGER}=M1XElH4B+ZffW{z&&EB6I;?OM@j`6Ovx`_2hyvW5LxqmKthy+^G^
z)YfK&9wQ-dInYf-NRM0%XLsEp7M+THO3}0waNAr16Nmj_cMeOQ?gJc2gl1McBT9L;
zYzlHzwM1lfT-OJP!8y-QxJI&P>Y=hh;Y1ZSS66rukyAr`Hz5B=pvGPE;cg_a^Y|(j
zy^ilbL}NHB*Nb@o2vi>5-I&DPi3FS-^{JR##Kt|6TvFSSRm`qgl^)Cw4r)MR>^p&a
zo<dX}3rD3wX?owvC_n$G?zU*hwpsIX1WEUe8JXkbp-tM>Y@42#L_Y4JJO(b_fz6FF
zm=WkAXvbXBT7yDGgj4JFqeE2*ooEMH#mknZQqR?Ph6718-su>v{aV??|Fo4{ei;Y{
zH;ajaJ7v^Fm{{a^C>p9Gh^kP5WO2C~TI+6moixy&!$Qh@K_i5OG`4}DXAX|qoI2AX
zmKHWke6TiBb8;brFwZktG^ANb<}lFUm>V@PN{sPIJWFAqT8&Z|$Hv=96H5V%QZ`qb
z;{iQ$<U|kiWw3kBp!xvtSs(QAt=+e!R<lVmjL<?j=d#B_qM2u!C$pTqm{1u3GuOit
zBw&`^XSy3Kxz&suPc_5bY5G>t8Qr<bEMpOD)4CVN%p%im@p{`MsY;=yt9+mmyMtAm
zqRTt!tJDH;Jafjipio)jG}EeSXi|AHQgkNC%2xw6XfPfM3%r(XS`^14=uo7#I;&-;
zRqNb^+>(*Rg%Pakb~4fr%)-y3ypPrK0JT9)#D|*lq)~tQJcQO>J%<h(=b1>VZdV<)
zWu`(t><Ze{*?8{P`(bgs*lO}l-Fin%g1IAHyffXKp~;rOX85}T_{*|Q2Vd1=aNxD!
zb35nPseFI!;_}Z$HFtRD#jnZZtJ<-Y=}kPHq)s}ruk7@R#km!YqQX}hwYzpzX0%@H
zOAd~zld3D}cD-~&vcj|?Z_fC|6aQi9G-U;Q-delye7g%ds$<(mv506-bS!aDyN3;~
zNnPbDsc@@d3frTAS?r>(c>;?{zRX{80A_euU@l25i5Nq~Y>hTdwlsC#g55y!GT5AY
z$Xl0OL}z)!=a0;QV24)qMP)JbI{m0-cRoTYwbiRp1ho7+dAQ>WO>Y|W2HiNNPe;Hu
zKK7KM!sjf%+~0Se|9mlB-dqTUx5?Z*538|<<d%roJB#rLxzc$(SH-OQxb7ryM7OfP
zjECKm$9`Q$4_hmRzVd15AOMchg3d$iS!5lrBWDWa$v9hr*|V$wVvLnwYACFhqRR7`
z6>vZw^)tLa&BQIcKO^X~8w79*-()r9Dn}$|g8$7~qB^1#-Er~0@4ny<5DVi&8>|zX
z(~$mSPyR8{eyVz_7VJ|T5v$_{TT1ge3@5w({b9Ij`T7IntN(`B59wQO6;g;UN*L~L
zFa^Q2N5_FPozv>uxnCft$&CHXtz?V}T@P57FEZis<CwVM`ne2$bnAdEnws?XQpo&M
zmi<s3zED0+L_!gOT{EZ>kNtonGdf0550*LpIB8%2*U8m^vE-3+6vX#<x^_PQVa%d%
zhR0h4CHBeQD}(M*s*+!dulcx;Zo3jO7`-^fN1uLSV)c(CVP3&LFP}dMn{14Xiv4XP
z>Z@#ja`YVpW8+-haa`VEeBz+oeO%Ec0<QN<WQ~jt58&=rw}HAo-G!K{;=IYMQd_{x
zVPdoDkG&`5&i87zWX`=4uU6qNaOJeu5tfg&8+(Gu1_)jQ)S}d}@o}Nt_W7S%5~)4&
zL~OjS)c8Ehpro+S@^YUHPgU=PdNG;U$CvOdR2d~HyN4%jSY1}iDo5f}ZJQoC3S0Tb
zJ1$k4vXrAHd}AX;QhB$&N&GRlb18R^e}CwWOx1Q|uM}0BVD;^8_R0pq8@4<5|7#nJ
zwuf0X=MLGj7+Nl>M+ki(8)?ua=^ejXCn9J<Nx8@>IP!ao7UD+U;!`3|dBgn~#~Y3j
z_Q;ScENqoDQTE8DYM6r_xgtMKn}um*X9qK;P;M*L=60@8OPF{@)EmO;cZ9vnXt<(v
zjVec~fT+P9!X_udMhPp8HM#I1KUpWZ=|q=wgG$HikSzh->P{=8aifuYgTpr<?Q0tE
zA1e!mtxb*6D+5^_f|M_si!Vb{30}5<+Viq^%y*ttdmvBfKu!~!WfA^R7Es=p>TqrM
zx{EuisNaX2)D)APE=S~P@eMg6sI(j31Odi2N<FY93!6S>6OkIlreY{N*@q5R!bSkU
zI;C4f{q~XAcR<|n?gm7;rP@JO=4kJkfRQ+!s`oS=S3`Luv&LD0np4Tz%;yXK$bOKk
zLKHd$*$AM;lGS15A%TvAtKmr8PDfs1TJ<bbQ#pouoO|a(6R;Ll+;Y5GxEq<Hli0-6
z_QN}l-8F9&)1JBx*A@kA2Sm?^qyC+@>M?1GsC=N4@)XH1b8*g#!;>TLn{z&Zcb`LL
zQ7^k0V}iNNP?acqS!$B{BM=B=z6-f`on~dwt@RFBV`oH6#^ivUl>9zNr(L=32A4Ga
zKx9p0Ff)C0n2oIC1Hm)~rrKS0qKaZ8&@4g^UBJrkBt)bF*%vNi?x?m+g^r%C+dQ7q
za^B0Znkn`pw7sVbR&^GR>!M_Xxw59VW6r7U_VB@Yl9NmcmTaasIx~lz$j$^6IxPi%
z&ajCZf=P$kX+wY_k>HW)T?Yn>cgc#G@RBuA1OFN|vHoe`CR6&rIS0<a#jlgH_pZsr
zdV?pA%K<9;IwfceZcxdqCYk>lokt+xlH3J&LkM;)JgUH~z7MS?{nm|j-RhQ|xRzSi
z@pKfEKhxL|uZDDL18rgX5t<6|+z$m?NYPTfk*iA0kMu!)1%#3ed_ct15e9Av9=@CN
z%J#jf=AmQoaklGbJ#V7woZB5eE*NJyztUI+t<Vrv6#ggUXM=+ARkc;H2ASuTaM7SF
z{avCKa<oBL5sNckD&jSH>~nF5nX2OuD<vMbeEf^N#Fna#w!yQ1UL`PGC0RAKME7`N
zn?2&ZzS?@6aftXLlE^2OY-{cdQENlh<L!z&hc>2K!qR&)W>eHs6}4PyeK7%qt(EAu
zA-F;tnTdOM%Zyt8fu^%f$U>%7wi0c`R++vf(<&Y2vuJ?HSd}4REm!nVy36MWol}u{
zuVzl}A>O<@S&*diVnz<IKk3POiozsV2*EM16+cxp-QCR7u)KlyC~41P%0{hV2$0q0
zPT8f2oGqMJXzAh5KKcl;`>K+Iz@73TXM;m%1bkW1DWm?I2Fnb+^jANfYkPuG<%y1S
zB<;ly?0SVkA|OVGHD4wo2xy#XEaj2YAqiVA&3P5@ThG9}54{63ZmU{ZPP3cpTQrFo
zj2I@5N$yJjxUWQ`M@EZ>K8WAM1+>nGibe_zkAu6#g4%avF6eK?WrU(iepa=`8~u3b
z)#>!GmNLrYXs6g|X0&AtSibm#Q9g_|NA6nd&>-?Z#h8O~7x+S^4EvH3t_cn*g%-9!
zw9T>X44@a!UL6^3zF5pwv>>19&LC0$e&n|j^e=Pjs{ZC4*(d+mPaoHx{-FY?C`#$v
zK<`Ufyfucp9MY#0HlZin=4W6P$U4Yg2wW!-Z%d_kta1+}*h383ExJRPwCP+rsa%{O
zW=+@cQjbaxStCBQX%>RYoehoNxT&>w>jbgBY^nhujp%T*fUPw$0+*S94z9vSFS}oJ
z7$^#sT@Fqxc$cpb$^9`OYpX*mx9>kEj&0GVQ1}auE9(DL?TD%rLQ;6!;2_E8#nznk
zeB%t2SL3YbOB=^hYA~CTDz_>2<X&RYpeh~mgsx(U;j|o!dRXZAt_(;L;KReHVF^nb
zKTbFs=;KUW{TM9YOxVN0kO4)`FunPn2~$@t`4avl;p3}h;x(8UY(=&-0(mA)4tbbJ
zrk1BA<M+&F(OhJ^NiZyntV~P_1sR2MT`9y0#VFCo*-tf|inl36AWmiED3G#&Y0I|4
z_1mA0Bmr8oD+`LEIH7B00eu)5rvkNb3tx82ZiS>|TP0muC50vTWsm%R^)f>DAaKYL
zc>EKzCD`1L-0tk2#-{SXHZo%(q6mlE-D>fynh<Cox!(=s8tRZF$0-r=3(5gsYNEYI
zqL;T^)t<}A40S$|NzEdTL?YL!cQ^gCEiJfgCrBOMKsrRt*;P@hI+4=LHssJwa=dNT
z062|$`EKm??bwEbWPb}}vb3e*IvXF=NdoE4RFu)ajD`EN+G$yu*x&TnCC#%@v2#52
zKILVbWnC@eNEa?$Rxl?lH?8IKNFw9>qGnB`Lok+FzS?zVfzyK^5z7bY+}+TZ?b>Te
z=m@RbZDzlB^5<A3TFtYIeGwZQ-7+Nq{ox?^IOL0c^(y*>ag-;9F*r7>XkFkH%l;MK
zinPy8j}Mz3bT5IVn0|_o@2L~@5#NzGxwS~6egK&)nU*5@PTPut1xZA*Ye$DH7omm<
zmzNA2RKQfZdB$dVOS)5^52^SntqA9Hx<&d&fY>5FaVLG`EU=E-3@HP<fIAukDnKp*
z#8<sUcr}ID3N#$cY%E8yqf0~B^$qw}G)e1$@i1r{q3oJjXTDVBG>f$oqbP;S)l?qY
zed2`GJM!X-z15s%emCi1wsP)*?HmJ+Jp7Aca;I3s<ck5tH20_#vN3@=={Z&$q2bfQ
zTnD-rO9|`e)<|;mCFr~x+H{b-T~?D-;wJMeGB5~s&GukeTMqCNXUIt1Z&d6VpV%Ff
z$6CjAUvG_%I!t*{cw!`D)#0qdqSjdUu{+T)TV1zrKsl9A!>c4(N~D1AHkF>FQ(^Q?
z?Xi-1%^p9jk&T(*wBM>S();JKQp7R!xiCf1VWy$6=b;)RJy&syDe;+aCrXw^gq8h7
z#0QlmPZ-$vub~vH&CmQyZa#$$qxm0my?%wMnV;Fi1hI@EP}uiW)OgmRh^Y4{4Qn&i
z66@6HLBz9RwMq2!c$x({;Gp2i<2YiK@G}!?yJa>TuChtUYd;k+BMhm3Sn>I~K-6oj
zhb4SNxwfybs4=4wyPq_cH*8KyC8q+rwyKk7Qkp}cE+B{U8DWjBzP}9<QtBc*v>C6~
zW-6@4_JJdc@<cmly`(b|HM~{$NJsbWWm2l~lUSq)?KRQ{?VG8BDW^w(*%prSn8r&6
zS)Qgi>apNEA%K+0`dMxx!mIFEssddVE$oIQF+Z~2pa%oIVUmjIwtPur=n*_|B+h*J
zJ&6_t_qu9njRl$pPd(OnP;Ka>EO5cvv|EJ!BD<UFVOf;3&BQaSy=gW~Q5}n?4@nbB
zkr5f|ohj-@Ot4UhaveHjJHv-(09K|9$frr(*H(%GpK#pCW|v#Z%03wB7Fn%sPv+Nd
zV>a)At(SPa@usqT&l%ACyl#0(;7Z}4UnzEUBqtT!M!9_R@y(e)@vn4nNcS_hEA))f
zq!nWFY{+oqMEAarLIys5qF3)NIPi`WJSn+;X82ygsSBJ8f_A)2v>#c!INfKy_V}-M
zqkh^#Fs`+(f4Q|+pIiY;3c}emAp1lzQxr*iSxXSgg9jIwYBg%=#x)$b&@We>Ys$m{
zh@1@muePo%wvFqGDoGPN-Gk;M2@KRl`jNEA(lkMqDpumyj$&)GHc}!filj-A6t@{J
zGu(&Ea3_c2P7W7wr$~z0MNzgRLqZZKbyJ`Pg0yJ+*`odGUq9RTL<?xpzQM)4GjHB~
z=Pe)y9G-hkZI!$X-tqEMvgHHST50;KCLYLbQ;~g~zlOR2ru-epjH9#sljHk^KN^Bv
zQjtB{h|g7mc45$J{l~jzMez4tXU;x;KW5k<DQ6jbCrq+Fnw||M0{iUzFEUQ~{>n`X
zaY=le;0Y4MEwK1fX=};7X0eCD(^J>en@Er#OEvZoxn|V+Bis7Jef4t7%GY0EQaMqj
zVU+qYUp&-daaB!Jq1%cpI(E?($e^V&Y3fv@9FfuHtlDo%%T@eqd568C%De6fq(1Q5
zycYe{%@o2y!itl<T9Kf%4#O+&wS#D9Rj8hf-^7u*8N|5dmR6omR(lwDRR9>MQy_du
zTC%L3NlRQ-@OdN1d*ge$C10w%`<@6%IH`9P@yT2Il*(Wf&BxwS^+;v*h%uLSHUm!g
z8=~A?)-Dp*eF+0WVj!U<4IXz&VjZ&mTU)am^)eDmijKU2ID&{nWAIAmwV{O_uQNj4
zmbS#5gog?fiddeAaR{Yv!NJUnb}30Uf#h&&VFvTX_^haEmzR}c?67d0U@E9Pl~uAH
zDb3MGVh7J7GPV&hdxXko((_#TPQc8rmr(O%C5n2|6L!gBS-PcJ3B1_QX&LW$3hge=
zb2Efe5p8_&9`*AZtv5ZqW>|_$#$(6D;i`9?&IGXpx1Hs4hP$1QI1LGDFuS#aOSH~L
zaXry29*%TB$!CkB<#<EsAZ}ZooOB~QqUY0O!n%`>7#cl#qwI1Ob>ULOL}ezn%p5t0
z5Jm>Wh$vexq?~9==Zs2(v4dFD7w*hh)OKQWfm~kYkh3k(Y&?nFSl-s=lgVsEV@WFK
zn8jpGD;KP>Czj{-ZqQ-HA{Le_B7|B|V$9m-nVRdb&vHjCOE+yI1~pE5@hdI6Y)m5v
z3*>6i&B!nw{cxIi#cl}F9CA*s)-Ut@v|{qA>r#8gzH}|lp6xEMed7iLeta#O(hg5e
zi6vO+GT%1sIU(mJxac(yfsB+{y}B>2j${*Vek`n9i4!9_5wa2&8U&3&t!~OLRgkhK
zp>LWBNF@>VyGl%ut!ytat=2oLf;DutlzFL`UvXu<y8^vV5xQPf8irR~Bu|HlgT}19
zq_ll+dn#^ZjqVMRL2o@tEZyid7haB0x=vAebBo{-PtzC3s4&wMB)#ngwMFd8=-+)V
zlCY)&>+=Z_YlD9cOCRojx;%autqpE2^BUEt);k$G*(d#TmE)lX%1GIyt(`IO7$=;(
zJ9>3<v7TEo8EI5QDoE-zjp@1d*!-J!e#v*c-?-c`XFAMa>(Faoc7t6W6N}~eLMg-=
z=YoJULMcb9<4rKFu5fWo!g0xhYTA}hn!QUhGm^SB)vlH{swNsMq0-Q2H7r;c;_bsj
z`;qqW;AI6qeYs>y>bKmyaBXbxb~)SIK?tm>Jjr&5lu>LqRcv<|MV0eQw#ntmH9{c0
zPs0~ta}`{p+rY9WTD1NP(V!(l7Y%4NjOi`WfTO%uGl6t+s~@Kn{4gq@t5*bRQ%85X
zfZ0Jb@vO`=*;z^@MWS?E=9EdtWN)*MM=@{Cs><<pnPt|`INYk)@72HLHcl+*oz^4+
zeC>P39L*cf_K4vFyA;2OePIFgNo#~m%y_Za1`$n&uUht{^s3EaZMSfwxNdbEt%SJy
zW1G(xxahN2dY|0Q1$p>qrL<94PvJyyj9ajoQ>mrT6ZbyQpx5a2uCcDU`**e3?$i~$
ztg9a1CS7MSumjEUwpesIgI!YY*jKNbH`~p<8_i58K-p$JRWv{3uCXw7T`2V2UfuNL
zpB=e5J*mHYODb!WUtmbp+N_V~9}JikJNX#7d=NOnax?EQ?p*RFjBE1gw-X@qH`5p=
z9E<xLzE;iDvY9OTd^dGV|2|UUvm=5J#v(E{W#AVPkT6XYxn;6&8;R83s(+}}q0~$D
zb@YVEa;e?+Hx5dTs^z+eDX17-I-i(Fo9dFFH+d*`j@h|;^FIm=;RkowgIf&$N4<1s
z*ObCC{zd8Ly7zC{`|k02bR<1eQ#Ug1%tkV@G9DS8s>W9l>qX~k&A7B1Z4|F(Iy0Va
zqs^q>miV!-VbI9Lm>IT19cD8~P1=etu3RX@c%!`GVOKGoz;$Vyv@u4h`AJ<$$BL%k
zPDCy)8g%&L3rg=+MBYtn(fNFh(vGQv*@BoMFo7I0h-m{!XGO}|{bWw*ce^%ZwN$P3
zhu(}{l`R>L+DRE5z%=2^_7}AtS<zDJu;^f6C-cCOH_BpKi)#wcuZE(2NAB|^!qpgP
zWn~`qHT54K1Z?2nv63_)(}hwB@QChgg+(7b7BUqv2fUolwW6*df49V%-p$EKhR4WL
zPzSS9hE|)yK~1;~4|SwUY+oC1BI^^B37v4J*4zq|%GmHm5DOXSMHsqjuM?;`X_53S
z++BUW<;f>3@x8K{4wcK!N<LI78q}!a(@N_%NV?;>)A4{T1uI_DdeYy!huRF4r1O|t
zE5-@>Q=2BdN4M7(+vRkt6qOnB%~16~^?)pdxXwSt5o@iJH{I(_P94Of8%J6~m_JOZ
zKG|*%e-7Ao1Dnxa;&)1u%H9j@JMcZr-C&n{OD`lWshy*CGq?6tAzmr$Y_1kM`C@yG
zLJNkR+@Zxkcdj?0Smy+f#j$!N(K~VXJd{;XEi^mbq*r6=Qg_S`3RtC{{q<G?P`$rD
z@XWyZfpdV7{X@XM2B|z?c@%QRA@3UMS0CDt_x%)*KIeOsry14CxO@pu4X%9`9I*5`
z-GGyRYJZd84(p-M1>cX~f?W9xz%{=F2;~oe{u=PmUk5z(4}lDVa~7!I1K0iqY>*aY
z;Gdvj0LTR(UxV`ikYON@Lk~IYKs^a<h!=9mfBI0v{y>V6(|*4LbiX_SM*OV-Kw$@k
z7$W-Z&qHoIY<~d=#0NFx_Wu9~93340nLl6x5AYZmxYuw0_*onP*gtFwHM}3<hZ_2U
zMF++R)FZIJ{}%DX>t`6?7|t-jbvX|NVm|W;%z;VJ2A)G1?zjI24#=xN!vV3v`yQ_G
zsr3v%cmA7e|3A2%K3z?C*SXXAfgYhxP#!tM`YJd%!wUNVAq!6D4%)Eae);z4Ji<M4
zHkYveY0$Ed;oL*N&z$*x6xRR0K0q3o)479w;XP1bfxGx@7s53=tGftnhzZUI+&2*a
u*&h9}KX1=|)z?q{rLUj*YhOPG!Xo^8;Hf9T+&|vmGmpXjbFOcL&i(`K8`Tv6

literal 0
HcmV?d00001

diff --git a/arch/arm/ARMnn/delegate/python/test/test_external_delegate.py b/arch/arm/ARMnn/delegate/python/test/test_external_delegate.py
new file mode 100644
index 0000000000..f01a2d3928
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/python/test/test_external_delegate.py
@@ -0,0 +1,274 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import numpy as np
+import pytest
+import tflite_runtime.interpreter as tflite
+import os
+from utils import run_mock_model, run_inference, compare_outputs
+
+def test_external_delegate_unknown_options(delegate_dir):
+    print(delegate_dir)
+    with pytest.raises(ValueError):
+        tflite.load_delegate(
+            delegate_dir,
+            options={"wrong": "wrong"})
+
+def test_external_delegate_options_multiple_backends(delegate_dir):
+    tflite.load_delegate(
+        delegate_dir,
+        options={"backends": "GpuAcc,CpuAcc,CpuRef,Unknown"})
+
+
+@pytest.mark.GpuAccTest
+def test_external_delegate_options_gpu_tuning(delegate_dir, test_data_folder, tmp_path):
+
+    tuning_file = os.path.join(str(tmp_path), "test_gpu.tuning")
+    # cleanup previous test run if necessary
+    if os.path.exists(tuning_file):
+        os.remove(tuning_file)
+
+    # with tuning level 2 a tuning file should be created
+    armnn_delegate = tflite.load_delegate(
+        delegate_dir,
+        options={
+            "backends": "GpuAcc",
+            "gpu-tuning-level": "2",
+            "gpu-tuning-file": tuning_file,
+            "logging-severity": "info"})
+
+    run_mock_model(armnn_delegate, test_data_folder)
+
+    # destroy delegate, otherwise tuning file won't be written to file
+    armnn_delegate.__del__()
+    assert (os.path.exists(tuning_file))
+
+    # if no tuning level is provided it defaults to 0 which means it will use the tuning parameters from a tuning
+    # file if one is provided
+    armnn_delegate2 = tflite.load_delegate(
+        delegate_dir,
+        options={
+            "backends": "GpuAcc",
+            "gpu-tuning-file": tuning_file,
+            "logging-severity": "info"})
+
+    run_mock_model(armnn_delegate2, test_data_folder)
+
+    # cleanup
+    os.remove(tuning_file)
+
+@pytest.mark.GpuAccTest
+def test_external_delegate_options_gpu_cached_network(delegate_dir, test_data_folder, tmp_path):
+
+    binary_file = os.path.join(str(tmp_path), "test_binary.bin")
+    # cleanup previous test run if necessary
+    if os.path.exists(binary_file):
+        os.remove(binary_file)
+
+    # Create blank binary file to write to.
+    open(binary_file, 'a').close()
+    assert (os.path.exists(binary_file))
+    assert (os.stat(binary_file).st_size == 0)
+
+    # Run inference to save cached network.
+    armnn_delegate = tflite.load_delegate(
+        delegate_dir,
+        options={
+            "backends": "GpuAcc",
+            "save-cached-network": "1",
+            "cached-network-filepath": binary_file,
+            "logging-severity": "info"})
+
+    run_mock_model(armnn_delegate, test_data_folder)
+
+    # destroy delegate and check if file has been saved.
+    armnn_delegate.__del__()
+    assert (os.stat(binary_file).st_size != 0)
+
+    # Create second delegate to load in binary file created.
+    armnn_delegate2 = tflite.load_delegate(
+        delegate_dir,
+        options={
+            "backends": "GpuAcc",
+            "cached-network-filepath": binary_file,
+            "logging-severity": "info"})
+
+    run_mock_model(armnn_delegate2, test_data_folder)
+
+    # cleanup
+    os.remove(binary_file)
+
+@pytest.mark.GpuAccTest
+def test_external_delegate_gpu_fastmath(delegate_dir, test_data_folder):
+    # create armnn delegate with enable-fast-math
+    # fast-math is only enabled on Conv2d layer, so use conv2d model.
+    armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'GpuAcc',
+                                                                   'enable-fast-math': '1',
+                                                                   "logging-severity": "info"})
+
+    model_file_name = 'conv2d.tflite'
+
+    inputShape = [ 1, 5, 5, 1 ]
+    outputShape = [ 1, 3, 3, 1 ]
+
+    inputValues = [ 1, 5, 2, 3, 5,
+                    8, 7, 3, 6, 3,
+                    3, 3, 9, 1, 9,
+                    4, 1, 8, 1, 3,
+                    6, 8, 1, 9, 2 ]
+
+    expectedResult = [ 28, 38, 29,
+                       96, 104, 53,
+                       31, 55, 24 ]
+
+    input = np.array(inputValues, dtype=np.float32).reshape(inputShape)
+    expected_output = np.array(expectedResult, dtype=np.float32).reshape(outputShape)
+
+    # run the inference
+    armnn_outputs = run_inference(test_data_folder, model_file_name, [input], [armnn_delegate])
+
+    # check results
+    compare_outputs(armnn_outputs, [expected_output])
+
+@pytest.mark.CpuAccTest
+def test_external_delegate_cpu_options(capfd, delegate_dir, test_data_folder):
+    # create armnn delegate with enable-fast-math and number-of-threads options
+    # fast-math is only enabled on Conv2d layer, so use conv2d model.
+    armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'CpuAcc',
+                                                                   'enable-fast-math': '1',
+                                                                   'number-of-threads': '4',
+                                                                   "logging-severity": "info"})
+
+    model_file_name = 'conv2d.tflite'
+
+    inputShape = [ 1, 5, 5, 1 ]
+    outputShape = [ 1, 3, 3, 1 ]
+
+    inputValues = [ 1, 5, 2, 3, 5,
+                    8, 7, 3, 6, 3,
+                    3, 3, 9, 1, 9,
+                    4, 1, 8, 1, 3,
+                    6, 8, 1, 9, 2 ]
+
+    expectedResult = [ 28, 38, 29,
+                       96, 104, 53,
+                       31, 55, 24 ]
+
+    input = np.array(inputValues, dtype=np.float32).reshape(inputShape)
+    expected_output = np.array(expectedResult, dtype=np.float32).reshape(outputShape)
+
+    # run the inference
+    armnn_outputs = run_inference(test_data_folder, model_file_name, [input], [armnn_delegate])
+
+    # check results
+    compare_outputs(armnn_outputs, [expected_output])
+
+    captured = capfd.readouterr()
+    assert 'Set CPPScheduler to Linear mode, with 4 threads to use' in captured.out
+
+def test_external_delegate_options_wrong_logging_level(delegate_dir):
+    with pytest.raises(ValueError):
+        tflite.load_delegate(
+            delegate_dir,
+            options={"logging-severity": "wrong"})
+
+def test_external_delegate_options_debug(capfd, delegate_dir, test_data_folder):
+    # create armnn delegate with debug option
+    armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'CpuRef', 'debug-data': '1'})
+
+    model_file_name = 'fp32_model.tflite'
+
+    tensor_shape = [1, 2, 2, 1]
+
+    input0 = np.array([1, 2, 3, 4], dtype=np.float32).reshape(tensor_shape)
+    input1 = np.array([2, 2, 3, 4], dtype=np.float32).reshape(tensor_shape)
+    inputs = [input0, input0, input1]
+    expected_output = np.array([1, 2, 2, 2], dtype=np.float32).reshape(tensor_shape)
+
+    # run the inference
+    armnn_outputs = run_inference(test_data_folder, model_file_name, inputs, [armnn_delegate])
+
+    # check results
+    compare_outputs(armnn_outputs, [expected_output])
+
+    captured = capfd.readouterr()
+    assert 'layerGuid' in captured.out
+
+
+def test_external_delegate_options_fp32_to_fp16(capfd, delegate_dir, test_data_folder):
+    # create armnn delegate with reduce-fp32-to-fp16 option
+    armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'CpuRef',
+                                                                   'debug-data': '1',
+                                                                   'reduce-fp32-to-fp16': '1'})
+
+    model_file_name = 'fp32_model.tflite'
+
+    tensor_shape = [1, 2, 2, 1]
+
+    input0 = np.array([1, 2, 3, 4], dtype=np.float32).reshape(tensor_shape)
+    input1 = np.array([2, 2, 3, 4], dtype=np.float32).reshape(tensor_shape)
+    inputs = [input0, input0, input1]
+    expected_output = np.array([1, 2, 2, 2], dtype=np.float32).reshape(tensor_shape)
+
+    # run the inference
+    armnn_outputs = run_inference(test_data_folder, model_file_name, inputs, [armnn_delegate])
+
+    # check results
+    compare_outputs(armnn_outputs, [expected_output])
+
+    captured = capfd.readouterr()
+    assert 'convert_fp32_to_fp16' in captured.out
+    assert 'convert_fp16_to_fp32' in captured.out
+
+def test_external_delegate_options_fp32_to_bf16(capfd, delegate_dir, test_data_folder):
+    # create armnn delegate with reduce-fp32-to-bf16 option
+    armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'CpuRef',
+                                                                   'debug-data': '1',
+                                                                   'reduce-fp32-to-bf16': '1'})
+
+    model_file_name = 'conv2d.tflite'
+
+    inputShape = [ 1, 5, 5, 1 ]
+    outputShape = [ 1, 3, 3, 1 ]
+
+    inputValues = [ 1, 5, 2, 3, 5,
+                    8, 7, 3, 6, 3,
+                    3, 3, 9, 1, 9,
+                    4, 1, 8, 1, 3,
+                    6, 8, 1, 9, 2 ]
+
+    expectedResult = [ 28, 38, 29,
+                       96, 104, 53,
+                       31, 55, 24 ]
+
+    input = np.array(inputValues, dtype=np.float32).reshape(inputShape)
+    expected_output = np.array(expectedResult, dtype=np.float32).reshape(outputShape)
+
+    # run the inference
+    armnn_outputs = run_inference(test_data_folder, model_file_name, [input], [armnn_delegate])
+
+    # check results
+    compare_outputs(armnn_outputs, [expected_output])
+
+    captured = capfd.readouterr()
+    assert 'convert_fp32_to_bf16' in captured.out
+
+def test_external_delegate_options_memory_import(delegate_dir, test_data_folder):
+    # create armnn delegate with memory-import option
+    armnn_delegate = tflite.load_delegate(delegate_dir, options = {'backends': 'CpuAcc,CpuRef',
+                                                                   'memory-import': '1'})
+
+    model_file_name = 'fallback_model.tflite'
+
+    tensor_shape = [1, 2, 2, 1]
+
+    input0 = np.array([1, 2, 3, 4], dtype=np.uint8).reshape(tensor_shape)
+    input1 = np.array([2, 2, 3, 4], dtype=np.uint8).reshape(tensor_shape)
+    inputs = [input0, input0, input1]
+    expected_output = np.array([1, 2, 2, 2], dtype=np.uint8).reshape(tensor_shape)
+
+    # run the inference
+    armnn_outputs = run_inference(test_data_folder, model_file_name, inputs, [armnn_delegate])
+
+    # check results
+    compare_outputs(armnn_outputs, [expected_output])
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/python/test/utils.py b/arch/arm/ARMnn/delegate/python/test/utils.py
new file mode 100644
index 0000000000..f3761ec8a1
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/python/test/utils.py
@@ -0,0 +1,53 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import tflite_runtime.interpreter as tflite
+import numpy as np
+import os
+
+
+def run_mock_model(delegate, test_data_folder):
+    model_path = os.path.join(test_data_folder, 'mock_model.tflite')
+    interpreter = tflite.Interpreter(model_path=model_path,
+                                     experimental_delegates=[delegate])
+    interpreter.allocate_tensors()
+
+    # Get input and output tensors.
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    # Test model on random input data.
+    input_shape = input_details[0]['shape']
+    input_data = np.array(np.random.random_sample(input_shape), dtype=np.uint8)
+    interpreter.set_tensor(input_details[0]['index'], input_data)
+
+    interpreter.invoke()
+
+def run_inference(test_data_folder, model_filename, inputs, delegates=None):
+    model_path = os.path.join(test_data_folder, model_filename)
+    interpreter = tflite.Interpreter(model_path=model_path,
+                                     experimental_delegates=delegates)
+    interpreter.allocate_tensors()
+
+    # Get input and output tensors.
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    # Set inputs to tensors.
+    for i in range(len(inputs)):
+        interpreter.set_tensor(input_details[i]['index'], inputs[i])
+
+    interpreter.invoke()
+
+    results = []
+    for output in output_details:
+        results.append(interpreter.get_tensor(output['index']))
+
+    return results
+
+def compare_outputs(outputs, expected_outputs):
+    assert len(outputs) == len(expected_outputs), 'Incorrect number of outputs'
+    for i in range(len(expected_outputs)):
+        assert outputs[i].shape == expected_outputs[i].shape, 'Incorrect output shape on output#{}'.format(i)
+        assert outputs[i].dtype == expected_outputs[i].dtype, 'Incorrect output data type on output#{}'.format(i)
+        assert outputs[i].all() == expected_outputs[i].all(), 'Incorrect output value on output#{}'.format(i)
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/Activation.hpp b/arch/arm/ARMnn/delegate/src/Activation.hpp
new file mode 100644
index 0000000000..5f14e2c45c
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Activation.hpp
@@ -0,0 +1,126 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidateActivationOperator(DelegateData& delegateData,
+                                        TfLiteContext* tfLiteContext,
+                                        const armnn::TensorInfo& inputInfo,
+                                        const armnn::TensorInfo& outputInfo,
+                                        armnn::ActivationDescriptor& activationDesc)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsActivationSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo,
+                                   outputInfo,
+                                   activationDesc);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus VisitActivationOperator(DelegateData& delegateData,
+                                     TfLiteContext* tfLiteContext,
+                                     TfLiteNode* tfLiteNode,
+                                     int nodeIndex,
+                                     int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::ActivationDescriptor activationDesc;
+    switch(operatorCode)
+    {
+        case kTfLiteBuiltinRelu:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::ReLu;
+            break;
+        }
+        case kTfLiteBuiltinRelu6:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::BoundedReLu;
+            activationDesc.m_A = 6.0f;
+            break;
+        }
+        case kTfLiteBuiltinLogistic:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::Sigmoid;
+            break;
+        }
+        case kTfLiteBuiltinTanh:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::TanH;
+            activationDesc.m_A = 1.0f;
+            activationDesc.m_B = 1.0f;
+            break;
+        }
+        case kTfLiteBuiltinElu:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::Elu;
+            activationDesc.m_A = 1.0f;
+            break;
+        }
+        case kTfLiteBuiltinHardSwish:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::HardSwish;
+            break;
+        }
+        default:
+        {
+            return kTfLiteError;
+        }
+    }
+    if (!delegateData.m_Network)
+    {
+        return ValidateActivationOperator(delegateData,
+                                          tfLiteContext,
+                                          inputTensorInfo,
+                                          outputTensorInfo,
+                                          activationDesc);
+    }
+    armnn::IConnectableLayer* activationLayer = delegateData.m_Network->AddActivationLayer(activationDesc);
+    ARMNN_ASSERT(activationLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = activationLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(activationLayer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/ArgMinMax.hpp b/arch/arm/ARMnn/delegate/src/ArgMinMax.hpp
new file mode 100644
index 0000000000..54994dfef7
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/ArgMinMax.hpp
@@ -0,0 +1,123 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/kernels/internal/tensor_ctypes.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitArgMinMaxOperator(DelegateData& delegateData,
+                                    TfLiteContext* tfLiteContext,
+                                    TfLiteNode* tfLiteNode,
+                                    int nodeIndex,
+                                    int32_t argMinMaxOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, argMinMaxOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, argMinMaxOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // Get const axis value from model and set it to descriptor.
+    const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteAxisTensor, argMinMaxOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    armnn::ArgMinMaxDescriptor desc;
+    // Get the axis value from the input tensor
+    switch (tfLiteAxisTensor.type)
+    {
+        case kTfLiteInt32:
+        case kTfLiteInt64:
+            desc.m_Axis = tflite::GetTensorData<int>(&tfLiteAxisTensor)[0];
+            break;
+        default:
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Axis value data type is not supported in operator #%d node #%d: ",
+                argMinMaxOperatorCode, nodeIndex);
+            return kTfLiteError;
+    }
+
+    // If output_type is int32 then set Signed32 else Signed64. Default type is Signed64.
+    if (argMinMaxOperatorCode == kTfLiteBuiltinArgMax)
+    {
+        desc.m_Function = armnn::ArgMinMaxFunction::Max;
+        auto* argMaxParameters = reinterpret_cast<TfLiteArgMaxParams*>(tfLiteNode->builtin_data);
+        if (argMaxParameters->output_type != kTfLiteInt32 && argMaxParameters->output_type != kTfLiteInt64)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: output_type data type is not supported in operator #%d node #%d: ",
+                argMinMaxOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+    }
+    else
+    {
+        desc.m_Function = armnn::ArgMinMaxFunction::Min;
+        auto* argMinParameters = reinterpret_cast<TfLiteArgMinParams*>(tfLiteNode->builtin_data);
+        if (argMinParameters->output_type != kTfLiteInt32 && argMinParameters->output_type != kTfLiteInt64)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: output_type data type is not supported in operator #%d node #%d: ",
+                    argMinMaxOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+    }
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsArgMinMaxSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo,
+                                   desc);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add an ArgMinMax layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddArgMinMaxLayer(desc);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/BatchSpace.hpp b/arch/arm/ARMnn/delegate/src/BatchSpace.hpp
new file mode 100644
index 0000000000..318806feef
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/BatchSpace.hpp
@@ -0,0 +1,198 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitBatchToSpaceNdOperator(DelegateData& delegateData,
+                                         TfLiteContext* tfLiteContext,
+                                         TfLiteNode* tfLiteNode,
+                                         int nodeIndex,
+                                         int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteBlockShapeTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteBlockShapeTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteCropsTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+    if (!IsValid(tfLiteContext, tfLiteCropsTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo      = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& blockShapeTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBlockShapeTensor);
+    const armnn::TensorInfo& cropsTensorInfo      = GetTensorInfoForTfLiteTensor(tfLiteCropsTensor);
+    const armnn::TensorInfo& outputTensorInfo     = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    std::vector<unsigned int> blockShape(blockShapeTensorInfo.GetNumElements());
+    ::memcpy(blockShape.data(), tfLiteBlockShapeTensor.data.data, blockShapeTensorInfo.GetNumBytes());
+
+    std::vector<unsigned int> cropsVector(cropsTensorInfo.GetNumElements());
+    std::memcpy(cropsVector.data(), tfLiteCropsTensor.data.data, cropsTensorInfo.GetNumBytes());
+
+    size_t step = 2;
+    std::vector<std::pair<unsigned int, unsigned int>> crops;
+    for (unsigned int i = 0; i < cropsTensorInfo.GetNumElements() / step; ++i)
+    {
+        crops.emplace_back(cropsVector[i * step], cropsVector[i * step + 1]);
+    }
+
+    armnn::BatchToSpaceNdDescriptor descriptor;
+    descriptor.m_BlockShape = blockShape;
+    descriptor.m_Crops = crops;
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsBatchToSpaceNdSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitBatchToSpaceNdOperator will be called again to add the layer to the network as seen below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a BatchToSpace layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddBatchToSpaceNdLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+TfLiteStatus VisitSpaceToBatchNdOperator(DelegateData& delegateData,
+                                         TfLiteContext* tfLiteContext,
+                                         TfLiteNode* tfLiteNode,
+                                         int nodeIndex,
+                                         int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteBlockShapeTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteBlockShapeTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLitePadListTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+    if (!IsValid(tfLiteContext, tfLitePadListTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo      = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& blockShapeTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBlockShapeTensor);
+    const armnn::TensorInfo& padListTensorInfo    = GetTensorInfoForTfLiteTensor(tfLitePadListTensor);
+    const armnn::TensorInfo& outputTensorInfo     = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    std::vector<unsigned int> blockShape(blockShapeTensorInfo.GetNumElements());
+    std::memcpy(blockShape.data(), tfLiteBlockShapeTensor.data.data, blockShapeTensorInfo.GetNumBytes());
+
+    std::vector<unsigned int> padListVector(padListTensorInfo.GetNumElements());
+    std::memcpy(padListVector.data(), tfLitePadListTensor.data.data, padListTensorInfo.GetNumBytes());
+
+    size_t step = 2;
+    std::vector<std::pair<unsigned int, unsigned int>> padList;
+    for (unsigned int i = 0; i < padListTensorInfo.GetNumElements() / step; ++i)
+    {
+        padList.emplace_back(padListVector[i * step], padListVector[i * step + 1]);
+    }
+
+    armnn::SpaceToBatchNdDescriptor descriptor;
+    descriptor.m_BlockShape = blockShape;
+    descriptor.m_PadList = padList;
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSpaceToBatchNdSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitSpaceToBatchNdOperator will be called again to add the layer to the network as seen below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a SpaceToBatch layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddSpaceToBatchNdLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Comparison.hpp b/arch/arm/ARMnn/delegate/src/Comparison.hpp
new file mode 100644
index 0000000000..2e6a7db4b6
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Comparison.hpp
@@ -0,0 +1,130 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitComparisonOperator(DelegateData& delegateData,
+                                     TfLiteContext* tfLiteContext,
+                                     TfLiteNode* tfLiteNode,
+                                     int nodeIndex,
+                                     int32_t tfLiteComparisonOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor0 = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor0))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLiteComparisonOperatorCode, nodeIndex);
+
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteInputTensor1 = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (IsDynamicTensor(tfLiteInputTensor1))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLiteComparisonOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLiteComparisonOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo0 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor0);
+    const armnn::TensorInfo& inputTensorInfo1 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor1);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::ComparisonOperation comparisonOperation = armnn::ComparisonOperation::Equal;
+    switch(tfLiteComparisonOperatorCode)
+    {
+        case kTfLiteBuiltinEqual:
+            comparisonOperation = armnn::ComparisonOperation::Equal;
+            break;
+        case kTfLiteBuiltinGreater:
+            comparisonOperation = armnn::ComparisonOperation::Greater;
+            break;
+        case kTfLiteBuiltinGreaterEqual:
+            comparisonOperation = armnn::ComparisonOperation::GreaterOrEqual;
+            break;
+        case kTfLiteBuiltinLess:
+            comparisonOperation = armnn::ComparisonOperation::Less;
+            break;
+        case kTfLiteBuiltinLessEqual:
+            comparisonOperation = armnn::ComparisonOperation::LessOrEqual;
+            break;
+        case kTfLiteBuiltinNotEqual:
+            comparisonOperation = armnn::ComparisonOperation::NotEqual;
+            break;
+        default:
+            return kTfLiteError;
+    }
+
+    armnn::ComparisonDescriptor descriptor(comparisonOperation);
+    bool isSupported = false;
+
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsComparisonSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo0,
+                                   inputTensorInfo1,
+                                   outputTensorInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* comparisonLayer = delegateData.m_Network->AddComparisonLayer(descriptor);
+    ARMNN_ASSERT(comparisonLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = comparisonLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    auto reshapeLayer = BroadcastTensor(inputTensorInfo0,
+                                        inputTensorInfo1,
+                                        comparisonLayer,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        delegateData);
+    if (!reshapeLayer)
+    {
+        return kTfLiteError;
+    }
+    return kTfLiteOk;
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Control.hpp b/arch/arm/ARMnn/delegate/src/Control.hpp
new file mode 100644
index 0000000000..f20b9f0cc3
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Control.hpp
@@ -0,0 +1,311 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/kernels/internal/tensor_ctypes.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace armnnDelegate
+{
+
+void SetupConcatViewOrigin(const armnn::TensorInfo& inputTensorInfo,
+                           armnn::OriginsDescriptor& concatDescriptor,
+                           const unsigned int concatAxis,
+                           unsigned int inputIndex,
+                           unsigned int& mergeDimOrigin)
+{
+    const uint32_t inputRank = concatDescriptor.GetNumDimensions();
+
+    // double check dimensions of the tensors
+    if (inputTensorInfo.GetNumDimensions() != inputRank)
+    {
+        throw armnn::ParseException("The number of dimensions for input tensors "
+                                    "of the concatenation operator should be: " + std::to_string(inputRank));
+    }
+
+    for (unsigned int j = 0; j < concatAxis; ++j)
+    {
+        concatDescriptor.SetViewOriginCoord(inputIndex, j, 0);
+    }
+
+    concatDescriptor.SetViewOriginCoord(inputIndex, concatAxis, mergeDimOrigin);
+    mergeDimOrigin += inputTensorInfo.GetShape()[concatAxis];
+
+    for (unsigned int j = concatAxis + 1; j < inputRank; ++j)
+    {
+        concatDescriptor.SetViewOriginCoord(inputIndex, j, 0);
+    }
+}
+
+TfLiteStatus VisitConcatenationOperator(DelegateData& delegateData,
+                                        TfLiteContext* tfLiteContext,
+                                        TfLiteNode* tfLiteNode,
+                                        int nodeIndex,
+                                        int32_t tfLiteConcatOperatorCode)
+{
+    unsigned int numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+            2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    std::vector<armnn::TensorInfo> inputTensorInfos;
+    for (unsigned int i = 0; i < numInputs; ++i)
+    {
+        const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteInputTensor, tfLiteConcatOperatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+
+        armnn::TensorInfo inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+        inputTensorInfos.emplace_back(inputTensorInfo);
+    }
+
+    // Convert input tensors to const armnn::TensorInfo* type for FORWARD_LAYER_SUPPORT_FUNC.
+    std::vector<const armnn::TensorInfo*> inputConstTensorInfos;
+    std::transform(inputTensorInfos.begin(),
+                   inputTensorInfos.end(),
+                   std::back_inserter(inputConstTensorInfos),
+                   [](armnn::TensorInfo& t)->const armnn::TensorInfo*{ return &t; });
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, tfLiteConcatOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    // Setup OriginsDescriptor, axis and view origin
+    unsigned int numConcatView = static_cast<unsigned int>(numInputs);
+    uint32_t inputRank = tfLiteTensors[tfLiteNode->inputs->data[0]].dims->size;
+
+    auto* concatenationParameters = reinterpret_cast<TfLiteConcatenationParams*>(tfLiteNode->builtin_data);
+    const unsigned int concatDimInput = static_cast<unsigned int>(
+            (static_cast<int>(inputRank) + concatenationParameters->axis) % static_cast<int>(inputRank));
+
+    armnn::OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numConcatView), inputRank);
+    concatDescriptor.SetConcatAxis(concatDimInput);
+
+    unsigned int mergeDimOrigin = 0;
+    for (unsigned int viewIndex = 0; viewIndex < numConcatView; ++viewIndex)
+    {
+        armnn::TensorInfo inputTensorInfo = GetTensorInfoForTfLiteTensor(
+                tfLiteTensors[tfLiteNode->inputs->data[viewIndex]]);
+
+        // Sets up concatDescriptor view origin
+        SetupConcatViewOrigin(inputTensorInfo, concatDescriptor, concatDimInput, viewIndex, mergeDimOrigin);
+    }
+
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsConcatSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputConstTensorInfos,
+                                   outputTensorInfo,
+                                   concatDescriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Setup layer and connect.
+    armnn::IConnectableLayer* concatenationLayer = delegateData.m_Network->AddConcatLayer(concatDescriptor);
+    ARMNN_ASSERT(concatenationLayer != nullptr);
+
+    // Connect the Constant Inputs
+    auto inputsTensorsProcess = ProcessInputs(concatenationLayer,
+                                              delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode);
+    if (inputsTensorsProcess == kTfLiteError)
+    {
+        return inputsTensorsProcess;
+    }
+
+    armnn::IOutputSlot& outputSlot = concatenationLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+    Connect(concatenationLayer, tfLiteNode, delegateData);
+
+    if (!concatenationParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+
+    // Check activation
+    TfLiteFusedActivation activationType = concatenationParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, concatenationLayer, 0, delegateData);
+}
+
+TfLiteStatus VisitMeanOperator(DelegateData& delegateData,
+                               TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t tfLiteMeanOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteAxisTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid axis tensor in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteAxisTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic axis tensors are not supported in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            tfLiteAxisTensor, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo =  GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& axisTensorInfo =   GetTensorInfoForTfLiteTensor(tfLiteAxisTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    auto* axisTensorData = tflite::GetTensorData<int32_t>(&tfLiteAxisTensor);
+
+    std::vector<int32_t> axis;
+    // Add axis data to vector to be converter to unsigned int and assigned to descriptor axis.
+    for (unsigned int i = 0; i < axisTensorInfo.GetNumElements(); ++i)
+    {
+        axis.emplace_back(axisTensorData[i]);
+    }
+
+    // Convert the axis to unsigned int and remove duplicates.
+    unsigned int rank = inputTensorInfo.GetNumDimensions();
+    std::set<unsigned int> uniqueAxis;
+    std::transform(axis.begin(),
+                   axis.end(),
+                   std::inserter(uniqueAxis, uniqueAxis.begin()),
+                   [rank](int i)->unsigned int{ return (i + rank) % rank; });
+
+    // Setup MeanDescriptor and assign axis and keepDims
+    armnn::MeanDescriptor desc;
+    desc.m_Axis.assign(uniqueAxis.begin(), uniqueAxis.end());
+    desc.m_KeepDims = inputTensorInfo.GetNumDimensions() == outputTensorInfo.GetNumDimensions() ? true : false;
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsMeanSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   desc);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Setup layer and connect.
+    armnn::IConnectableLayer* meanLayer = delegateData.m_Network->AddMeanLayer(desc);
+    ARMNN_ASSERT(meanLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = meanLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+    return Connect(meanLayer, tfLiteNode, delegateData);
+}
+
+TfLiteStatus VisitControlOperator(DelegateData& delegateData,
+                                  TfLiteContext* tfLiteContext,
+                                  TfLiteNode* tfLiteNode,
+                                  int nodeIndex,
+                                  int32_t operatorCode)
+{
+    armnn::IgnoreUnused(delegateData,
+                        tfLiteContext,
+                        tfLiteNode,
+                        nodeIndex,
+                        operatorCode);
+                        
+    switch(operatorCode)
+    {
+        case kTfLiteBuiltinConcatenation:
+            return VisitConcatenationOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        case kTfLiteBuiltinMean:
+            return VisitMeanOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        default:
+            return kTfLiteError;
+    }
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Convolution.hpp b/arch/arm/ARMnn/delegate/src/Convolution.hpp
new file mode 100644
index 0000000000..a7d6c1de26
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Convolution.hpp
@@ -0,0 +1,760 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include "tensorflow/lite/kernels/internal/tensor.h"
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitConv2dOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+            2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    armnn::Convolution2dDescriptor descriptor;
+    const auto params = reinterpret_cast<TfLiteConvParams*>(tfLiteNode->builtin_data);
+
+    bool biasEnabled = tfLiteNode->inputs->size > 2;
+    descriptor.m_BiasEnabled = biasEnabled;
+    descriptor.m_StrideX = NonNegative(params->stride_width, nodeIndex);
+    descriptor.m_StrideY = NonNegative(params->stride_height, nodeIndex);
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+    descriptor.m_DilationX = NonNegative(params->dilation_width_factor, nodeIndex);
+    descriptor.m_DilationY = NonNegative(params->dilation_height_factor, nodeIndex);
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteTensors[tfLiteNode->inputs->data[0]]))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid filter tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic filter tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor);
+
+    armnn::TensorInfo biasTensorInfo;
+    if(biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(!IsValid(&tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Invalid bias tensor in operator #%d node #%d: ",
+                operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        if (IsDynamicTensor(tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Dynamic bias tensors are not supported in node #%d: ",
+                nodeIndex);
+            return kTfLiteError;
+        }
+        biasTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBiasTensor);
+    }
+    else
+    {
+        biasTensorInfo = armnn::TensorInfo(armnn::TensorShape({1}), GetDataType(tfLiteInputTensor));
+    }
+
+    armnn::Optional<armnn::TensorInfo> optionalBiasInfo(biasTensorInfo);
+
+    // TfLite uses NHWC tensors
+    const unsigned int inputHeight = inputTensorInfo.GetShape()[1];
+    const unsigned int inputWidth  = inputTensorInfo.GetShape()[2];
+
+    const unsigned int filterHeight = filterTensorInfo.GetShape()[1];
+    const unsigned int filterWidth  = filterTensorInfo.GetShape()[2];
+
+    // Calculate padding
+    CalcPadding(inputHeight, filterHeight, descriptor.m_StrideY, descriptor.m_DilationY,
+                descriptor.m_PadTop, descriptor.m_PadBottom, params->padding);
+    CalcPadding(inputWidth, filterWidth, descriptor.m_StrideX, descriptor.m_DilationX,
+                descriptor.m_PadLeft, descriptor.m_PadRight, params->padding);
+
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsConvolution2dSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor,
+                                   filterTensorInfo,
+                                   optionalBiasInfo);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = nullptr;
+
+    // Set up filter and biases
+    auto filter =
+        CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[1]],
+                          filterTensorInfo,
+                          armnn::Optional<armnn::PermutationVector&>());
+
+    if(biasEnabled)
+    {
+        auto biases =
+            CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[2]],
+                              biasTensorInfo,
+                              armnn::Optional<armnn::PermutationVector&>());
+        layer = delegateData.m_Network->AddConvolution2dLayer(descriptor,
+                                                              filter,
+                                                              armnn::Optional<armnn::ConstTensor>(biases));
+    }
+    else
+    {
+        layer = delegateData.m_Network->AddConvolution2dLayer(descriptor,
+                                                              filter,
+                                                              armnn::EmptyOptional());
+    }
+
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    Connect(layer, tfLiteNode, delegateData);
+
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteConvParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+    // Check activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, layer, 0, delegateData);
+
+}
+
+// Conv3d is only correctly supported for external delegates from TF Lite v2.6, as there was a breaking bug in v2.5.
+#if defined(ARMNN_POST_TFLITE_2_5)
+TfLiteStatus VisitConv3dOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+                2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    armnn::Convolution3dDescriptor descriptor;
+    const auto params = reinterpret_cast<TfLiteConv3DParams*>(tfLiteNode->builtin_data);
+
+    bool biasEnabled = tfLiteNode->inputs->size == 3 ? true : false;
+    descriptor.m_BiasEnabled = biasEnabled;
+    descriptor.m_DataLayout = armnn::DataLayout::NDHWC;
+    descriptor.m_StrideX = NonNegative(params->stride_width, nodeIndex);
+    descriptor.m_StrideY = NonNegative(params->stride_height, nodeIndex);
+    descriptor.m_StrideZ = NonNegative(params->stride_depth, nodeIndex);
+    descriptor.m_DilationX = NonNegative(params->dilation_width_factor, nodeIndex);
+    descriptor.m_DilationY = NonNegative(params->dilation_height_factor, nodeIndex);
+    descriptor.m_DilationZ = NonNegative(params->dilation_depth_factor, nodeIndex);
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteFilterTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor);
+
+    armnn::TensorInfo biasTensorInfo;
+    if(biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if (!IsValid(tfLiteContext, tfLiteBiasTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        biasTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBiasTensor);
+    }
+    else
+    {
+        biasTensorInfo = armnn::TensorInfo(armnn::TensorShape({1}), GetDataType(tfLiteInputTensor));
+    }
+
+    armnn::Optional<armnn::TensorInfo> optionalBiasInfo(biasTensorInfo);
+
+    // TfLite uses NDHWC tensors
+    const unsigned int inputDepth  = inputTensorInfo.GetShape()[1];
+    const unsigned int inputHeight = inputTensorInfo.GetShape()[2];
+    const unsigned int inputWidth  = inputTensorInfo.GetShape()[3];
+
+    // Assuming the filter is DHWIO : Depth, Height, Width, OutputChannels, InputChannels
+    const unsigned int filterDepth  = filterTensorInfo.GetShape()[0];
+    const unsigned int filterHeight = filterTensorInfo.GetShape()[1];
+    const unsigned int filterWidth  = filterTensorInfo.GetShape()[2];
+
+    // Calculate padding
+    CalcPadding(inputDepth, filterDepth, descriptor.m_StrideZ, descriptor.m_DilationZ,
+                descriptor.m_PadFront, descriptor.m_PadBack, params->padding);
+    CalcPadding(inputHeight, filterHeight, descriptor.m_StrideY, descriptor.m_DilationY,
+                descriptor.m_PadTop, descriptor.m_PadBottom, params->padding);
+    CalcPadding(inputWidth, filterWidth, descriptor.m_StrideX, descriptor.m_DilationX,
+                descriptor.m_PadLeft, descriptor.m_PadRight, params->padding);
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitConvolutionOperator will be called again to add the layer to the network as seen below.
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsConvolution3dSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor,
+                                   filterTensorInfo,
+                                   optionalBiasInfo);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer =  delegateData.m_Network->AddConvolution3dLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    // Add a constant layer for weights and biases if inputs are constant,
+    // which are connected to the Convolution3d layer as inputs.
+    if (tflite::IsConstantTensor(&tfLiteFilterTensor))
+    {
+        auto filter = CreateConstTensor(&tfLiteFilterTensor,
+                                        filterTensorInfo,
+                                        armnn::Optional<armnn::PermutationVector&>());
+
+        armnn::IConnectableLayer* weightsLayer = delegateData.m_Network->AddConstantLayer(filter);
+        ARMNN_ASSERT(weightsLayer != nullptr);
+
+        weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(filterTensorInfo);
+    }
+
+    if(biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(tflite::IsConstantTensor(&tfLiteBiasTensor))
+        {
+            auto biases = CreateConstTensor(&tfLiteBiasTensor,
+                                            biasTensorInfo,
+                                            armnn::Optional<armnn::PermutationVector&>());
+
+            armnn::IConnectableLayer* biasLayer = delegateData.m_Network->AddConstantLayer(biases);
+            ARMNN_ASSERT(biasLayer != nullptr);
+
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensorInfo);
+        }
+    }
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    Connect(layer, tfLiteNode, delegateData);
+
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteConv3DParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+
+    // Check activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, layer, 0, delegateData);
+}
+#endif
+
+TfLiteStatus VisitDepthwiseConv2dOperator(DelegateData& delegateData,
+                                          TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          int nodeIndex,
+                                          int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+            2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    bool biasEnabled = tfLiteNode->inputs->size > 2;
+
+    armnn::DepthwiseConvolution2dDescriptor descriptor;
+    const auto params = reinterpret_cast<TfLiteDepthwiseConvParams*>(tfLiteNode->builtin_data);
+
+    descriptor.m_BiasEnabled = biasEnabled;
+    descriptor.m_StrideX = NonNegative(params->stride_width, nodeIndex);
+    descriptor.m_StrideY = NonNegative(params->stride_height, nodeIndex);
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+    descriptor.m_DilationX = NonNegative(params->dilation_width_factor, nodeIndex);
+    descriptor.m_DilationY = NonNegative(params->dilation_height_factor, nodeIndex);
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid filter tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic filter tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor);
+
+    // Assuming input is NHWC
+    unsigned int inputHeight = inputTensorInfo.GetShape()[1];
+    unsigned int inputWidth  = inputTensorInfo.GetShape()[2];
+
+    // TensorflowLite weights come in the format [1, H, W, I * M]
+    unsigned int filterHeight = filterTensorInfo.GetShape()[1];
+    unsigned int filterWidth  = filterTensorInfo.GetShape()[2];
+
+    // Calculate padding
+    CalcPadding(inputHeight, filterHeight, descriptor.m_StrideY, descriptor.m_DilationY,
+                descriptor.m_PadTop, descriptor.m_PadBottom, params->padding);
+    CalcPadding(inputWidth, filterWidth, descriptor.m_StrideX, descriptor.m_DilationX,
+                descriptor.m_PadLeft, descriptor.m_PadRight, params->padding);
+
+    armnn::TensorInfo biasTensorInfo;
+    if(biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(!IsValid(&tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Invalid bias tensor in operator #%d node #%d: ",
+                operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        if (IsDynamicTensor(tfLiteBiasTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Dynamic bias tensors are not supported in node #%d: ",
+                nodeIndex);
+            return kTfLiteError;
+        }
+        biasTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBiasTensor);
+    }
+    else
+    {
+        biasTensorInfo = armnn::TensorInfo(armnn::TensorShape({1}), GetDataType(tfLiteInputTensor));
+    }
+
+    // For depthwise the weights layout is the same as for tflite [1, H, W, I*M]. No permutation required.
+    auto filter = CreateConstTensor(&tfLiteFilterTensor, filterTensorInfo);
+
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsDepthwiseConvolutionSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor,
+                                   filter.GetInfo(),
+                                   armnn::Optional<armnn::TensorInfo>(biasTensorInfo));
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = nullptr;
+
+    if(biasEnabled)
+    {
+        auto biases =
+            CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[2]],
+                              biasTensorInfo);
+        layer = delegateData.m_Network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                                       filter,
+                                                                       armnn::Optional<armnn::ConstTensor>(biases));
+    }
+    else
+    {
+        layer = delegateData.m_Network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                                       filter,
+                                                                       armnn::EmptyOptional());
+    }
+
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    Connect(layer, tfLiteNode, delegateData);
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteDepthwiseConvParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+    // Check activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, layer, 0, delegateData);
+}
+
+TfLiteStatus VisitTransposeConv2dOperator(DelegateData& delegateData,
+                                          TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          int nodeIndex,
+                                          int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    armnn::TransposeConvolution2dDescriptor descriptor;
+    auto* parameters = reinterpret_cast<TfLiteTransposeConvParams*>(tfLiteNode->builtin_data);
+    descriptor.m_BiasEnabled = false;
+    descriptor.m_StrideX = NonNegative(parameters->stride_width, nodeIndex);
+    descriptor.m_StrideY = NonNegative(parameters->stride_height, nodeIndex);
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteOutputShapeTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteOutputShapeTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputShapeTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    armnn::TensorInfo tensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputShapeTensor);
+    std::vector<int32_t> outputShape(tensorInfo.GetNumElements());
+    if (tensorInfo.GetDataType() == armnn::DataType::Signed32)
+    {
+        for(unsigned int i=0; i < tensorInfo.GetNumElements(); i++)
+        {
+            outputShape[i] = ::tflite::GetTensorData<int32_t>(&tfLiteOutputShapeTensor)[i];
+        }
+    }
+
+    if (tensorInfo.GetDataType() == armnn::DataType::QAsymmU8)
+    {
+        for(unsigned int i=0; i < tensorInfo.GetNumElements(); i++)
+        {
+            outputShape[i] = ::tflite::GetTensorData<uint8_t>(&tfLiteOutputShapeTensor)[i];
+        }
+    }
+    // Change from signed to unsigned int to store in TransposeConvolution2dDescriptor.
+    for (int dimension : outputShape)
+    {
+        descriptor.m_OutputShape.push_back(static_cast<unsigned int>(dimension));
+    }
+    descriptor.m_OutputShapeEnabled = true;
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+    if(!IsValid(&tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteFilterTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid filter tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteFilterTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic filter tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+    armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor);
+
+    // TfLite uses NHWC tensors
+    const unsigned int inputHeight = inputTensorInfo.GetShape()[1];
+    const unsigned int inputWidth  = inputTensorInfo.GetShape()[2];
+
+    const unsigned int filterHeight = filterTensorInfo.GetShape()[1];
+    const unsigned int filterWidth  = filterTensorInfo.GetShape()[2];
+
+    // Calculate padding
+    CalcPadding(inputHeight,
+                filterHeight,
+                descriptor.m_StrideY,
+                1, // dilation y
+                descriptor.m_PadTop,
+                descriptor.m_PadBottom,
+                parameters->padding);
+    CalcPadding(inputWidth,
+                filterWidth,
+                descriptor.m_StrideX,
+                1, // dilation x
+                descriptor.m_PadLeft,
+                descriptor.m_PadRight,
+                parameters->padding);
+
+    // Set up filter
+    auto filterTensor = CreateConstTensor(&tfLiteFilterTensor,
+                                          filterTensorInfo,
+                                          armnn::Optional<armnn::PermutationVector&>());
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsTransposeConvolution2dSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor,
+                                   filterTensorInfo,
+                                   armnn::EmptyOptional());
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddTransposeConvolution2dLayer(descriptor,
+                                                                                             filterTensor,
+                                                                                             armnn::EmptyOptional());
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    if (delegateData.m_OutputSlotForNode[static_cast<unsigned int>(tfLiteNode->inputs->data[2])] != nullptr)
+    {
+        delegateData.m_OutputSlotForNode[static_cast<unsigned int>(tfLiteNode->inputs->data[2])]->
+                                                                   Connect(layer->GetInputSlot(0));
+    }
+
+    // Prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < layer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(outputIndex);
+        delegateData.m_OutputSlotForNode[static_cast<unsigned int>(tfLiteNode->outputs->data[outputIndex])] =
+                                                                   &outputSlot;
+    }
+    return kTfLiteOk;
+}
+
+TfLiteStatus VisitConvolutionOperator(DelegateData& delegateData,
+                                      TfLiteContext* tfLiteContext,
+                                      TfLiteNode* tfLiteNode,
+                                      int nodeIndex,
+                                      int32_t operatorCode)
+{
+    switch(operatorCode)
+    {
+        case kTfLiteBuiltinConv2d:
+            return VisitConv2dOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+// Conv3d is only correctly supported for external delegates from TF Lite v2.6, as there was a breaking bug in v2.5.
+#if defined(ARMNN_POST_TFLITE_2_5)
+        case kTfLiteBuiltinConv3d:
+            return VisitConv3dOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+#endif
+        case kTfLiteBuiltinDepthwiseConv2d:
+            return VisitDepthwiseConv2dOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        case kTfLiteBuiltinTransposeConv:
+            return VisitTransposeConv2dOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        default:
+            return kTfLiteError;
+    }
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/DelegateOptions.cpp b/arch/arm/ARMnn/delegate/src/DelegateOptions.cpp
new file mode 100644
index 0000000000..d477d9839d
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/DelegateOptions.cpp
@@ -0,0 +1,238 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <DelegateOptions.hpp>
+#include <armnn/utility/NumericCast.hpp>
+#include <armnn/utility/StringUtils.hpp>
+
+namespace armnnDelegate
+{
+
+DelegateOptions::DelegateOptions(armnn::Compute computeDevice,
+                                 const std::vector<armnn::BackendOptions>& backendOptions,
+                                 const armnn::Optional<armnn::LogSeverity> logSeverityLevel)
+    : m_Backends({computeDevice}), m_RuntimeOptions(), m_LoggingSeverity(logSeverityLevel)
+{
+    m_RuntimeOptions.m_BackendOptions = backendOptions;
+}
+
+DelegateOptions::DelegateOptions(const std::vector<armnn::BackendId>& backends,
+                                 const std::vector<armnn::BackendOptions>& backendOptions,
+                                 const armnn::Optional<armnn::LogSeverity> logSeverityLevel)
+    : m_Backends(backends), m_RuntimeOptions(), m_LoggingSeverity(logSeverityLevel)
+{
+    m_RuntimeOptions.m_BackendOptions = backendOptions;
+}
+
+DelegateOptions::DelegateOptions(armnn::Compute computeDevice,
+                                 const armnn::OptimizerOptions& optimizerOptions,
+                                 const armnn::Optional<armnn::LogSeverity>& logSeverityLevel,
+                                 const armnn::Optional<armnn::DebugCallbackFunction>& func)
+    : m_Backends({computeDevice}),
+      m_RuntimeOptions(),
+      m_OptimizerOptions(optimizerOptions),
+      m_LoggingSeverity(logSeverityLevel),
+      m_DebugCallbackFunc(func)
+{
+}
+
+DelegateOptions::DelegateOptions(const std::vector<armnn::BackendId>& backends,
+                                 const armnn::OptimizerOptions& optimizerOptions,
+                                 const armnn::Optional<armnn::LogSeverity>& logSeverityLevel,
+                                 const armnn::Optional<armnn::DebugCallbackFunction>& func)
+    : m_Backends(backends),
+      m_RuntimeOptions(),
+      m_OptimizerOptions(optimizerOptions),
+      m_LoggingSeverity(logSeverityLevel),
+      m_DebugCallbackFunc(func)
+{
+}
+
+DelegateOptions::DelegateOptions(char const* const* options_keys,
+                                 char const* const* options_values,
+                                 size_t num_options,
+                                 void (*report_error)(const char*))
+{
+    armnn::IRuntime::CreationOptions runtimeOptions;
+    armnn::OptimizerOptions optimizerOptions;
+    bool internalProfilingState = false;
+    armnn::ProfilingDetailsMethod internalProfilingDetail = armnn::ProfilingDetailsMethod::DetailsWithEvents;
+    armnn::IRuntime::CreationOptions::ExternalProfilingOptions extProfilingParams;
+    for (size_t i = 0; i < num_options; ++i)
+    {
+        // Process backends
+        if (std::string(options_keys[i]) == std::string("backends"))
+        {
+            // The backend option is a comma separated string of backendIDs that needs to be split
+            std::vector<armnn::BackendId> backends;
+            char* dup = strdup(options_values[i]);
+            char* pch = std::strtok(dup, ",");
+            while (pch != NULL)
+            {
+                backends.push_back(pch);
+                pch = strtok (NULL, ",");
+            }
+            this->SetBackends(backends);
+        }
+            // Process dynamic-backends-path
+        else if (std::string(options_keys[i]) == std::string("dynamic-backends-path"))
+        {
+            runtimeOptions.m_DynamicBackendsPath = std::string(options_values[i]);
+        }
+            // Process logging level
+        else if (std::string(options_keys[i]) == std::string("logging-severity"))
+        {
+            this->SetLoggingSeverity(options_values[i]);
+        }
+            // Process GPU backend options
+        else if (std::string(options_keys[i]) == std::string("gpu-tuning-level"))
+        {
+            armnn::BackendOptions option("GpuAcc", {{"TuningLevel", atoi(options_values[i])}});
+            runtimeOptions.m_BackendOptions.push_back(option);
+        }
+        else if (std::string(options_keys[i]) == std::string("gpu-mlgo-tuning-file"))
+        {
+            armnn::BackendOptions option("GpuAcc", {{"MLGOTuningFilePath", std::string(options_values[i])}});
+            optimizerOptions.m_ModelOptions.push_back(option);
+        }
+        else if (std::string(options_keys[i]) == std::string("gpu-tuning-file"))
+        {
+            armnn::BackendOptions option("GpuAcc", {{"TuningFile", std::string(options_values[i])}});
+            runtimeOptions.m_BackendOptions.push_back(option);
+        }
+        else if (std::string(options_keys[i]) == std::string("gpu-enable-profiling"))
+        {
+            runtimeOptions.m_EnableGpuProfiling = (*options_values[i] != '0');
+        }
+        else if (std::string(options_keys[i]) == std::string("gpu-kernel-profiling-enabled"))
+        {
+            armnn::BackendOptions option("GpuAcc", {{"KernelProfilingEnabled",
+                                                     armnn::stringUtils::StringToBool(options_values[i])}});
+            runtimeOptions.m_BackendOptions.push_back(option);
+        }
+        else if (std::string(options_keys[i]) == std::string("save-cached-network"))
+        {
+            armnn::BackendOptions option("GpuAcc", {{"SaveCachedNetwork",
+                                                     armnn::stringUtils::StringToBool(options_values[i])}});
+            optimizerOptions.m_ModelOptions.push_back(option);
+        }
+        else if (std::string(options_keys[i]) == std::string("cached-network-filepath"))
+        {
+            armnn::BackendOptions option("GpuAcc", {{"CachedNetworkFilePath", std::string(options_values[i])}});
+            optimizerOptions.m_ModelOptions.push_back(option);
+        }
+            // Process GPU & CPU backend options
+        else if (std::string(options_keys[i]) == std::string("enable-fast-math"))
+        {
+            armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled",
+                                                             armnn::stringUtils::StringToBool(options_values[i])}});
+            optimizerOptions.m_ModelOptions.push_back(modelOptionGpu);
+
+            armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled",
+                                                             armnn::stringUtils::StringToBool(options_values[i])}});
+            optimizerOptions.m_ModelOptions.push_back(modelOptionCpu);
+        }
+            // Process CPU backend options
+        else if (std::string(options_keys[i]) == std::string("number-of-threads"))
+        {
+            unsigned int numberOfThreads = armnn::numeric_cast<unsigned int>(atoi(options_values[i]));
+            armnn::BackendOptions modelOption("CpuAcc", {{"NumberOfThreads", numberOfThreads}});
+            optimizerOptions.m_ModelOptions.push_back(modelOption);
+        }
+            // Process reduce-fp32-to-fp16 option
+        else if (std::string(options_keys[i]) == std::string("reduce-fp32-to-fp16"))
+        {
+            optimizerOptions.m_ReduceFp32ToFp16 = armnn::stringUtils::StringToBool(options_values[i]);
+        }
+            // Process reduce-fp32-to-bf16 option
+        else if (std::string(options_keys[i]) == std::string("reduce-fp32-to-bf16"))
+        {
+            optimizerOptions.m_ReduceFp32ToBf16 = armnn::stringUtils::StringToBool(options_values[i]);
+        }
+            // Process debug-data
+        else if (std::string(options_keys[i]) == std::string("debug-data"))
+        {
+            optimizerOptions.m_Debug = armnn::stringUtils::StringToBool(options_values[i]);
+        }
+            // Process memory-import
+        else if (std::string(options_keys[i]) == std::string("memory-import"))
+        {
+            optimizerOptions.m_ImportEnabled = armnn::stringUtils::StringToBool(options_values[i]);
+        }
+            // Process enable-internal-profiling
+        else if (std::string(options_keys[i]) == std::string("enable-internal-profiling"))
+        {
+            internalProfilingState = *options_values[i] != '0';
+            optimizerOptions.m_ProfilingEnabled = internalProfilingState;
+        }
+            // Process internal-profiling-detail
+        else if (std::string(options_keys[i]) == std::string("internal-profiling-detail"))
+        {
+            uint32_t detailLevel = static_cast<uint32_t>(std::stoul(options_values[i]));
+            switch (detailLevel)
+            {
+                case 1:
+                    internalProfilingDetail = armnn::ProfilingDetailsMethod::DetailsWithEvents;
+                    break;
+                case 2:
+                    internalProfilingDetail = armnn::ProfilingDetailsMethod::DetailsOnly;
+                    break;
+                default:
+                    internalProfilingDetail = armnn::ProfilingDetailsMethod::Undefined;
+                    break;
+            }
+        }
+            // Process enable-external-profiling
+        else if (std::string(options_keys[i]) == std::string("enable-external-profiling"))
+        {
+            extProfilingParams.m_EnableProfiling = armnn::stringUtils::StringToBool(options_values[i]);
+        }
+            // Process timeline-profiling
+        else if (std::string(options_keys[i]) == std::string("timeline-profiling"))
+        {
+            extProfilingParams.m_TimelineEnabled = armnn::stringUtils::StringToBool(options_values[i]);
+        }
+            // Process outgoing-capture-file
+        else if (std::string(options_keys[i]) == std::string("outgoing-capture-file"))
+        {
+            extProfilingParams.m_OutgoingCaptureFile = options_values[i];
+        }
+            // Process incoming-capture-file
+        else if (std::string(options_keys[i]) == std::string("incoming-capture-file"))
+        {
+            extProfilingParams.m_IncomingCaptureFile = options_values[i];
+        }
+            // Process file-only-external-profiling
+        else if (std::string(options_keys[i]) == std::string("file-only-external-profiling"))
+        {
+            extProfilingParams.m_FileOnly = armnn::stringUtils::StringToBool(options_values[i]);
+        }
+            // Process counter-capture-period
+        else if (std::string(options_keys[i]) == std::string("counter-capture-period"))
+        {
+            extProfilingParams.m_CapturePeriod = static_cast<uint32_t>(std::stoul(options_values[i]));
+        }
+            // Process profiling-file-format
+        else if (std::string(options_keys[i]) == std::string("profiling-file-format"))
+        {
+            extProfilingParams.m_FileFormat = options_values[i];
+        }
+            // Process serialize-to-dot
+        else if (std::string(options_keys[i]) == std::string("serialize-to-dot"))
+        {
+            this->SetSerializeToDot(options_values[i]);
+        }
+        else
+        {
+            throw armnn::Exception("Unknown option for the ArmNN Delegate given: " + std::string(options_keys[i]));
+        }
+    }
+
+    this->SetRuntimeOptions(runtimeOptions);
+    this->SetOptimizerOptions(optimizerOptions);
+    this->SetInternalProfilingParams(internalProfilingState, internalProfilingDetail);
+    this->SetExternalProfilingParams(extProfilingParams);
+}
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/DelegateUtils.hpp b/arch/arm/ARMnn/delegate/src/DelegateUtils.hpp
new file mode 100644
index 0000000000..678a3db002
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/DelegateUtils.hpp
@@ -0,0 +1,653 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/BackendHelper.hpp>
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/NumericCast.hpp>
+
+#include <armnnUtils/Permute.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace
+{
+
+// Macro to call an Is<layer_name>Supported function and log caller name together with reason for lack of support
+#define FORWARD_LAYER_SUPPORT_FUNC(funcName, tfLiteContext, func, backends, supported, ...) \
+try \
+{ \
+    for (auto&& backendId : backends) \
+    { \
+        auto layerSupportObject = armnn::GetILayerSupportByBackendId(backendId); \
+        if (layerSupportObject.IsBackendRegistered()) \
+        { \
+            std::string reasonIfUnsupported; \
+            supported = \
+                layerSupportObject.func(__VA_ARGS__, armnn::Optional<std::string&>(reasonIfUnsupported)); \
+            if (supported) \
+            { \
+                break; \
+            } \
+            else \
+            { \
+                if (reasonIfUnsupported.size() > 0) \
+                { \
+                    TFLITE_LOG_PROD(tflite::TFLITE_LOG_WARNING, \
+                                    "%s: not supported by armnn: %s", funcName, reasonIfUnsupported.c_str()); \
+                } \
+                else \
+                { \
+                    TFLITE_LOG_PROD(tflite::TFLITE_LOG_WARNING, \
+                                    "%s: not supported by armnn", funcName); \
+                } \
+            } \
+        } \
+        else \
+        { \
+            TF_LITE_KERNEL_LOG(tfLiteContext, "%s: backend not registered: %s", funcName, backendId.Get().c_str()); \
+        } \
+    } \
+    if (!supported) \
+    { \
+        TF_LITE_KERNEL_LOG(tfLiteContext, "%s: not supported by any specified backend", funcName); \
+    } \
+} \
+catch (const armnn::InvalidArgumentException &e) \
+{ \
+    throw armnn::InvalidArgumentException(e, "Failed to check layer support", CHECK_LOCATION()); \
+}
+
+TfLiteStatus ValidateNumInputs(TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               const unsigned int expectedSize,
+                               int nodeIndex)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (static_cast<unsigned int >(numInputs) != expectedSize)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Unexpected number of inputs (%d != %d) in node #%d",
+            numInputs, expectedSize, nodeIndex);
+        return kTfLiteError;
+    }
+    return kTfLiteOk;
+}
+
+TfLiteStatus ValidateNumOutputs(TfLiteContext* tfLiteContext,
+                                TfLiteNode* tfLiteNode,
+                                const unsigned int expectedSize,
+                                int nodeIndex)
+{
+    auto numOutputs = tfLiteNode->outputs->size;
+    if (static_cast<unsigned int >(numOutputs) != expectedSize)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Unexpected number of outputs (%d != %d) in node #%d",
+            numOutputs, expectedSize, nodeIndex);
+        return kTfLiteError;
+    }
+    return kTfLiteOk;
+}
+
+bool IsDynamicTensor(const TfLiteTensor& tfLiteTensor)
+{
+    auto tensorAllocationType = tfLiteTensor.allocation_type;
+    if (tensorAllocationType == kTfLiteDynamic)
+    {
+        return true;
+    }
+    return false;
+}
+
+bool IsValid(const TfLiteTensor* tfLiteTensor)
+{
+    return tfLiteTensor == nullptr ? false : true;
+}
+
+bool IsValid(TfLiteContext* tfLiteContext, const TfLiteTensor& tfLiteTensor, int32_t operatorCode, int32_t nodeIndex)
+{
+    if(!IsValid(&tfLiteTensor))
+    {
+        std::cout << "..Is Not Valid" << std::endl;
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid TfLite tensor in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return false;
+    }
+    if (IsDynamicTensor(tfLiteTensor))
+    {
+        std::cout << "..IsDynamicTensor" << std::endl;
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic tensors are not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return false;
+    }
+    return true;
+}
+
+uint32_t NonNegative(int32_t value, int nodeIndex)
+{
+    if (value < 0)
+    {
+        throw armnn::Exception(
+            "TfLiteArmnnDelegate: Non-negative value in node " + std::to_string(static_cast<int>(nodeIndex)));
+    }
+    else
+    {
+        return static_cast<uint32_t>(value);
+    }
+}
+
+bool IsAffineQuantization(const TfLiteTensor& tfLiteTensor)
+{
+    auto quantizationInfo = tfLiteTensor.quantization;
+    if (quantizationInfo.type == kTfLiteAffineQuantization)
+    {
+        return true;
+    }
+    return false;
+}
+
+TfLiteStatus Connect(armnn::IConnectableLayer* layer,
+                     TfLiteNode* tfLiteNode,
+                     armnnDelegate::DelegateData& data)
+{
+    ARMNN_ASSERT(static_cast<unsigned int>(tfLiteNode->outputs->size) == layer->GetNumOutputSlots());
+
+    // Connect the input slots
+    for (unsigned int inputIndex = 0; inputIndex < layer->GetNumInputSlots(); ++inputIndex)
+    {
+        if (data.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]] != nullptr)
+        {
+            data.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]]->Connect(layer->GetInputSlot(inputIndex));
+        }
+    }
+
+    // Prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < layer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(outputIndex);
+        data.m_OutputSlotForNode[static_cast<unsigned long>(tfLiteNode->outputs->data[outputIndex])] = &outputSlot;
+    }
+
+    return kTfLiteOk;
+}
+
+armnn::IConnectableLayer* BroadcastTensor(const armnn::TensorInfo& inputInfo0,
+                                          const armnn::TensorInfo& inputInfo1,
+                                          armnn::IConnectableLayer* startLayer,
+                                          TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          armnnDelegate::DelegateData& delegateData)
+{
+    unsigned int inputDimensions0 = inputInfo0.GetNumDimensions();
+    unsigned int inputDimensions1 = inputInfo1.GetNumDimensions();
+
+    if (inputDimensions0 == inputDimensions1)
+    {
+        auto status = Connect(startLayer, tfLiteNode, delegateData);
+        return status == kTfLiteOk ? startLayer : nullptr;
+    }
+
+    unsigned int biggerInputDimensions = std::max(inputDimensions0, inputDimensions1);
+    unsigned int dimDifference = static_cast<unsigned int>(std::abs(armnn::numeric_cast<int>(inputDimensions0) -
+                                                                    armnn::numeric_cast<int>(inputDimensions1)));
+
+    bool input0IsSmaller = inputDimensions0 < inputDimensions1;
+    const armnn::TensorInfo& smallInfo = input0IsSmaller ? inputInfo0 : inputInfo1;
+    const armnn::TensorShape& smallShape = smallInfo.GetShape();
+
+    std::vector<unsigned int> reshapedDimensions(biggerInputDimensions, 1);
+    for (unsigned int i = dimDifference; i < biggerInputDimensions; ++i)
+    {
+        reshapedDimensions[i] = smallShape[i - dimDifference];
+    }
+
+    armnn::TensorInfo reshapedInfo = smallInfo;
+    reshapedInfo.SetShape(armnn::TensorShape{ armnn::numeric_cast<unsigned int>(reshapedDimensions.size()),
+                                              reshapedDimensions.data() });
+
+    armnn::ReshapeDescriptor reshapeDescriptor;
+    reshapeDescriptor.m_TargetShape = reshapedInfo.GetShape();
+    bool isSupported = false;
+    FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                               tfLiteContext,
+                               IsReshapeSupported,
+                               delegateData.m_Backends,
+                               isSupported,
+                               smallInfo,
+                               reshapedInfo,
+                               reshapeDescriptor);
+    if (!isSupported)
+    {
+        return nullptr;
+    }
+
+    ARMNN_ASSERT(delegateData.m_Network != nullptr);
+    // Add Reshape layer
+    armnn::IConnectableLayer* reshapeLayer = delegateData.m_Network->AddReshapeLayer(reshapeDescriptor);
+    ARMNN_ASSERT(reshapeLayer != nullptr);
+    reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
+
+    if (input0IsSmaller)
+    {
+        delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tfLiteNode->inputs->data[0])]
+            ->Connect(reshapeLayer->GetInputSlot(0));
+        reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0));
+        delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tfLiteNode->inputs->data[1])]
+            ->Connect(startLayer->GetInputSlot(1));
+    }
+    else
+    {
+        delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tfLiteNode->inputs->data[1])]
+            ->Connect(reshapeLayer->GetInputSlot(0));
+        reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(1));
+        delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tfLiteNode->inputs->data[0])]
+            ->Connect(startLayer->GetInputSlot(0));
+    }
+
+    // Prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < startLayer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::IOutputSlot& outputSlot = startLayer->GetOutputSlot(outputIndex);
+        delegateData.m_OutputSlotForNode
+            [static_cast<unsigned long>(tfLiteNode->outputs->data[outputIndex])] = &outputSlot;
+    }
+
+    return reshapeLayer;
+}
+
+TfLiteStatus FusedActivation(TfLiteContext* tfLiteContext,
+                             TfLiteNode* tfLiteNode,
+                             TfLiteFusedActivation activationType,
+                             armnn::IConnectableLayer* prevLayer,
+                             unsigned int outputSlotIndex,
+                             armnnDelegate::DelegateData& data)
+{
+
+    const armnn::TensorInfo& activationOutputInfo = prevLayer->GetOutputSlot(outputSlotIndex).GetTensorInfo();
+
+    armnn::ActivationDescriptor activationDesc;
+
+    switch (activationType)
+    {
+        case kTfLiteActNone:
+        {
+            // No Activation
+            return kTfLiteOk;
+        }
+        case kTfLiteActRelu:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::ReLu;
+            break;
+        }
+// The name of kTfLiteActRelu1 changed after TF Lite v2.3
+#if defined(ARMNN_POST_TFLITE_2_3)
+        case kTfLiteActReluN1To1:
+#else
+        case kTfLiteActRelu1:
+#endif
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::BoundedReLu;
+            activationDesc.m_A = 1.0f;
+            activationDesc.m_B = -1.0f;
+            break;
+        }
+        case kTfLiteActRelu6:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::BoundedReLu;
+            activationDesc.m_A = 6.0f;
+            activationDesc.m_B = 0.0f;
+            break;
+        }
+        case kTfLiteActSigmoid:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::Sigmoid;
+            break;
+        }
+        case kTfLiteActTanh:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::TanH;
+            activationDesc.m_A = 1.0f;
+            activationDesc.m_B = 1.0f;
+            break;
+        }
+        default:
+            return kTfLiteError;
+    }
+
+    bool isSupported = false;
+    FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                               tfLiteContext,
+                               IsActivationSupported,
+                               data.m_Backends,
+                               isSupported,
+                               prevLayer->GetOutputSlot(0).GetTensorInfo(),
+                               activationOutputInfo,
+                               activationDesc);
+    if (!isSupported)
+    {
+        return kTfLiteError;
+    }
+    armnn::IConnectableLayer* activationLayer = data.m_Network->AddActivationLayer(activationDesc);
+
+    ARMNN_ASSERT(activationLayer != nullptr);
+    activationLayer->GetOutputSlot(0).SetTensorInfo(activationOutputInfo);
+
+    // Connect and prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < activationLayer->GetNumOutputSlots(); ++outputIndex)
+    {
+        data.m_OutputSlotForNode[static_cast<unsigned long>(
+                tfLiteNode->outputs->data[outputIndex])]->Connect(activationLayer->GetInputSlot(0));
+        armnn::IOutputSlot& outputSlot = activationLayer->GetOutputSlot(outputIndex);
+        data.m_OutputSlotForNode[static_cast<unsigned long>(
+                tfLiteNode->outputs->data[outputIndex])] = &outputSlot;
+    }
+    return kTfLiteOk;
+}
+
+armnn::DataType GetDataType(const TfLiteTensor& tfLiteTensor)
+{
+    switch (tfLiteTensor.type)
+    {
+        case kTfLiteBool:
+            return armnn::DataType::Boolean;
+        case kTfLiteFloat32:
+            return armnn::DataType::Float32;
+        case kTfLiteFloat16:
+            return armnn::DataType::Float16;
+        case kTfLiteUInt8:
+            return armnn::DataType::QAsymmU8;
+        case kTfLiteInt8:
+        {
+            auto quantizationInfo = tfLiteTensor.quantization;
+            if (quantizationInfo.type == kTfLiteAffineQuantization)
+            {
+                auto* quantization =
+                    reinterpret_cast<TfLiteAffineQuantization*>(tfLiteTensor.quantization.params);
+                if (quantization->zero_point != nullptr && quantization->zero_point->size == 1)
+                {
+                    return armnn::DataType::QAsymmS8;
+                }
+                else
+                {
+                    return armnn::DataType::QSymmS8;
+                }
+            }
+            else
+            {
+                return armnn::DataType::QAsymmS8;
+            }
+        }
+        case kTfLiteInt16:
+            return armnn::DataType::QSymmS16;
+        case kTfLiteInt32:
+            return armnn::DataType::Signed32;
+        case kTfLiteInt64:
+            return armnn::DataType::Signed64;
+        default:
+            throw armnn::Exception(&"TfLiteArmnnDelegate: Unsupported data type: " [ tfLiteTensor.type]);
+    }
+}
+
+armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
+{
+    armnn::DataType type = GetDataType(tfLiteTensor);
+    armnn::TensorInfo ret;
+    auto tensorDimensionSize = tfLiteTensor.dims->size;
+    if (tensorDimensionSize == 0)
+    {
+        if(tflite::IsConstantTensor(&tfLiteTensor))
+        {
+            std::vector<unsigned int> safeShape = { 1 };
+            bool dimensionsSpecificity[1] = { true };
+            armnn::TensorShape tensorShape(armnn::numeric_cast<unsigned int>(safeShape.size()),
+                                           safeShape.data(),
+                                           dimensionsSpecificity);
+            ret = armnn::TensorInfo(tensorShape, type);
+            ret.SetConstant(true);
+        }
+        else
+        {
+            armnn::TensorShape tensorShape(armnn::Dimensionality::NotSpecified);
+            ret = armnn::TensorInfo(tensorShape, type);
+        }
+    }
+    else
+    {
+        std::vector<unsigned int> tensorDims(static_cast<unsigned int>(tensorDimensionSize));
+        bool dimensionsSpecificity[5] = { true, true, true, true, true };
+        for (unsigned int i = 0; i < static_cast<unsigned int>(tensorDimensionSize); ++i) {
+            auto dim = tfLiteTensor.dims->data[i];
+            if (dim == 0)
+            {
+                dimensionsSpecificity[i] = false;
+            }
+            tensorDims[i] = static_cast<unsigned int>(dim);
+        }
+        armnn::TensorShape tensorShape(static_cast<unsigned int>(tensorDimensionSize),
+                                       tensorDims.data(),
+                                       dimensionsSpecificity);
+
+        if(tflite::IsConstantTensor(&tfLiteTensor))
+        {
+            ret = armnn::TensorInfo(tensorShape, type);
+            ret.SetConstant(true);
+        }
+        else
+        {
+            ret = armnn::TensorInfo(tensorShape, type);
+        }
+    }
+
+    auto quantizationInfo = tfLiteTensor.quantization;
+    if (quantizationInfo.type == kTfLiteAffineQuantization)
+    {
+        // get per-channel quantization parameters
+        const auto* affineQuantization =
+            reinterpret_cast<TfLiteAffineQuantization*>(tfLiteTensor.quantization.params);
+        if (affineQuantization->scale->size > 1)
+        {
+            std::vector<float> quantizationScales;
+            for (unsigned int i = 0; i < static_cast<unsigned int>(affineQuantization->scale->size); ++i)
+            {
+                quantizationScales.push_back(affineQuantization->scale->data[i]);
+            }
+            ret.SetQuantizationScales(quantizationScales);
+            ret.SetQuantizationDim(armnn::numeric_cast<unsigned int>(affineQuantization->quantized_dimension));
+        }
+        else
+        {
+            ret.SetQuantizationScale(affineQuantization->scale->data[0]);
+            ret.SetQuantizationOffset(affineQuantization->zero_point->data[0]);
+        }
+    }
+    else
+    {
+        auto quantizationParameters = tfLiteTensor.params;
+        ret.SetQuantizationScale(quantizationParameters.scale);
+        ret.SetQuantizationOffset(quantizationParameters.zero_point);
+    }
+
+    return ret;
+}
+
+armnn::ConstTensor CreateConstTensor(const TfLiteTensor* tfLiteTensor,
+                                     armnn::TensorInfo& tensorInfo,
+                                     armnn::Optional<armnn::PermutationVector&>
+                                             permutationVector = armnn::EmptyOptional(),
+                                     void* permutationData = nullptr)
+{
+    if (tfLiteTensor->allocation_type != kTfLiteMmapRo)
+    {
+        throw armnn::Exception(
+            "TfLiteArmnnDelegate:  Not constant allocation type: " + std::to_string(tfLiteTensor->allocation_type));
+    }
+
+    if(tflite::IsConstantTensor(tfLiteTensor))
+    {
+        tensorInfo.SetConstant();
+    }
+
+    if (permutationVector.has_value() && permutationVector.value().GetSize() > 0 && permutationData != nullptr)
+    {
+        // Permute tensor info
+        tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector.value());
+        // then permute data using the shape from permuted tensor info
+        armnnUtils::Permute(tensorInfo.GetShape(),
+                            permutationVector.value(),
+                            tfLiteTensor->data.data,
+                            permutationData,
+                            armnn::GetDataTypeSize(tensorInfo.GetDataType()));
+
+        return armnn::ConstTensor(tensorInfo, permutationData);
+    }
+    else
+    {
+        return armnn::ConstTensor(tensorInfo, tfLiteTensor->data.data);
+    }
+}
+
+armnn::ConstTensor* GetConstTensorForTfLiteTensor(const TfLiteTensor* tfLiteTensors, TfLiteNode* tfLiteNode, int index)
+{
+    const TfLiteTensor &tfLiteTensor = tfLiteTensors[tfLiteNode->inputs->data[index]];
+    armnn::TensorInfo tensorInfo = GetTensorInfoForTfLiteTensor(tfLiteTensor);
+    return new armnn::ConstTensor(tensorInfo, tfLiteTensor.data.data);
+}
+
+void CalcPadding(uint32_t inputSize,
+                 uint32_t filterSize,
+                 uint32_t stride,
+                 uint32_t dilation,
+                 uint32_t& paddingFront,
+                 uint32_t& paddingBack,
+                 TfLitePadding padding)
+{
+    paddingFront = 0;
+    paddingBack = 0;
+    if (padding == kTfLitePaddingSame)
+    {
+        uint32_t outputSize = (inputSize + stride - 1) / stride;
+        uint32_t dilatedSize = filterSize + (dilation - 1) * (filterSize - 1);
+        uint32_t temp = (outputSize - 1) * stride + dilatedSize;
+        if (temp > inputSize)
+        {
+            paddingFront = (temp - inputSize) / 2;
+            paddingBack = (temp - inputSize) - paddingFront;
+        }
+    }
+}
+
+TfLiteStatus ConnectConstant(armnn::IConnectableLayer* layer,
+                             armnn::TensorInfo& constTensorInfo,
+                             TfLiteContext* tfLiteContext,
+                             const TfLiteTensor& tfLiteTensor,
+                             armnnDelegate::DelegateData& data,
+                             unsigned int slotIndex)
+{
+    IgnoreUnused(layer);
+    bool isSupported = false;
+    FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                               tfLiteContext,
+                               IsConstantSupported,
+                               data.m_Backends,
+                               isSupported,
+                               constTensorInfo);
+    if (!isSupported)
+    {
+        return kTfLiteError;
+    }
+
+    auto constantInput = CreateConstTensor(&tfLiteTensor,
+                                           constTensorInfo,
+                                           armnn::Optional<armnn::PermutationVector&>());
+    armnn::IConnectableLayer* constantLayer = data.m_Network->AddConstantLayer(constantInput);
+    armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(constTensorInfo);
+
+    data.m_OutputSlotForNode[static_cast<unsigned long>(slotIndex)] = &outputSlot;
+
+    return kTfLiteOk;
+}
+
+bool IsOptionalOperandPresent(TfLiteNode* tfLiteNode, const int operandIndex)
+{
+    if (tfLiteNode->inputs->data[operandIndex] < 0) {
+        return true;
+    }
+    return false;
+
+}
+
+TfLiteStatus ProcessInputs(armnn::IConnectableLayer* layer,
+                           armnnDelegate::DelegateData& delegateData,
+                           TfLiteContext* tfLiteContext,
+                           TfLiteNode* tfLiteNode)
+{
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    // Process input tensors
+    // If input tensor is a Constant tensor create a constant layer and connect it to the network
+    for (unsigned int inputIndex = 0; inputIndex < layer->GetNumInputSlots(); ++inputIndex)
+    {
+        const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[inputIndex]];
+        if (tflite::IsConstantTensor(&tfLiteInputTensor))
+        {
+            armnn::TensorInfo inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+            bool isSupported = false;
+            FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                       tfLiteContext,
+                                       IsConstantSupported,
+                                       delegateData.m_Backends,
+                                       isSupported,
+                                       inputTensorInfo);
+            if (!isSupported)
+            {
+                return kTfLiteError;
+            }
+            auto constantInput = CreateConstTensor(&tfLiteInputTensor,
+                                                   inputTensorInfo,
+                                                   armnn::Optional<armnn::PermutationVector&>());
+            armnn::IConnectableLayer* constantLayer = delegateData.m_Network->AddConstantLayer(constantInput);
+            armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0);
+            outputSlot.SetTensorInfo(inputTensorInfo);
+
+            delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]] = &outputSlot;
+        }
+    }
+    return kTfLiteOk;
+}
+
+unsigned int ComputeWrappedIndex(int index, unsigned int numDimensions)
+{
+    int numDims = armnn::numeric_cast<int>(numDimensions);
+    int wrappedIndex = index < 0 ? numDims + index : index;
+    ARMNN_ASSERT(wrappedIndex >= 0);
+    ARMNN_ASSERT(wrappedIndex < numDims);
+
+    return static_cast<unsigned int>(wrappedIndex);
+};
+
+bool AreAllSigned32(const armnn::TensorInfo& inputInfo1,
+                    const armnn::TensorInfo& inputInfo2,
+                    const armnn::TensorInfo& outputInfo)
+{
+    return (armnn::DataType::Signed32 == inputInfo1.GetDataType()) &&
+           (armnn::DataType::Signed32 == inputInfo2.GetDataType()) &&
+           (armnn::DataType::Signed32 == outputInfo.GetDataType());
+}
+
+} // namespace anonymous
diff --git a/arch/arm/ARMnn/delegate/src/ElementwiseBinary.hpp b/arch/arm/ARMnn/delegate/src/ElementwiseBinary.hpp
new file mode 100644
index 0000000000..0534c070be
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/ElementwiseBinary.hpp
@@ -0,0 +1,369 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+#include "MultiLayerFacade.hpp"
+#include "SharedFunctions.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include "tensorflow/lite/delegates/utils.h"
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidateAddOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 const armnn::TensorInfo& inputInfo1,
+                                 const armnn::TensorInfo& inputInfo2,
+                                 const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsAdditionSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+
+TfLiteStatus ValidateDivOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 const armnn::TensorInfo& inputInfo1,
+                                 const armnn::TensorInfo& inputInfo2,
+                                 const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsDivisionSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus ValidateFloorDivOperator(DelegateData& delegateData,
+                                      TfLiteContext* tfLiteContext,
+                                      const armnn::TensorInfo& inputInfo1,
+                                      const armnn::TensorInfo& inputInfo2,
+                                      const armnn::TensorInfo& outputInfo)
+{
+    // need first to validate that the div operator is supported
+    // then that the floor operator is supported
+    TfLiteStatus status = ValidateDivOperator(delegateData, tfLiteContext, inputInfo1, inputInfo2, outputInfo);
+    if (status != kTfLiteOk)
+    {
+        return status;
+    }
+    // if the inputs and output of the div are all Signed32 we don't need to add the floor operator afterward.
+    if (AreAllSigned32(inputInfo1, inputInfo2, outputInfo))
+    {
+        return status;
+    }
+    // in case broadcasting is being done from one of the inputs to the div
+    // choose the full sized input tensor to pass to the floor validation routine
+    armnn::TensorInfo floorInputInfo = inputInfo1;
+    if (inputInfo1.GetNumDimensions() < inputInfo2.GetNumDimensions())
+    {
+        floorInputInfo = inputInfo2;
+    }
+    status = ValidateFloorOperator(delegateData, tfLiteContext, floorInputInfo, outputInfo);
+    return status;
+}
+
+TfLiteStatus ValidateMaximumOperator(DelegateData& delegateData,
+                                     TfLiteContext* tfLiteContext,
+                                     const armnn::TensorInfo& inputInfo1,
+                                     const armnn::TensorInfo& inputInfo2,
+                                     const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsMaximumSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus ValidateMinimumOperator(DelegateData& delegateData,
+                                     TfLiteContext* tfLiteContext,
+                                     const armnn::TensorInfo& inputInfo1,
+                                     const armnn::TensorInfo& inputInfo2,
+                                     const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsMinimumSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus ValidateMulOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 const armnn::TensorInfo& inputInfo1,
+                                 const armnn::TensorInfo& inputInfo2,
+                                 const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsMultiplicationSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus ValidateSubOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 const armnn::TensorInfo& inputInfo1,
+                                 const armnn::TensorInfo& inputInfo2,
+                                 const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSubtractionSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+std::pair<armnn::IConnectableLayer*, armnn::IConnectableLayer*> AddFloorDivLayer(
+    DelegateData& delegateData,
+    const armnn::TensorInfo& outputTensorInfo)
+{
+    armnn::IConnectableLayer* divisionLayer = delegateData.m_Network->AddDivisionLayer();
+    // if the output of the div is Signed32 the Floor layer is not required
+    if (armnn::DataType::Signed32 == outputTensorInfo.GetDataType())
+    {
+        return std::make_pair(divisionLayer, divisionLayer);
+    }
+    armnn::IOutputSlot& outputSlot = divisionLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+    armnn::IConnectableLayer* floorLayer = delegateData.m_Network->AddFloorLayer();
+    outputSlot.Connect(floorLayer->GetInputSlot(0));
+    return std::make_pair(divisionLayer, floorLayer);
+}
+
+TfLiteStatus VisitElementwiseBinaryOperator(DelegateData& delegateData,
+                                            TfLiteContext* tfLiteContext,
+                                            TfLiteNode* tfLiteNode,
+                                            int nodeIndex,
+                                            int32_t elementwiseBinaryOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor0 = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor0))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            elementwiseBinaryOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteInputTensor1 = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (IsDynamicTensor(tfLiteInputTensor1))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            elementwiseBinaryOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            elementwiseBinaryOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    armnn::TensorInfo inputTensorInfo0 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor0);
+    armnn::TensorInfo inputTensorInfo1 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor1);
+
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    if (!delegateData.m_Network)
+    {
+        switch(elementwiseBinaryOperatorCode)
+        {
+            case kTfLiteBuiltinAdd:
+                return ValidateAddOperator(delegateData,
+                                           tfLiteContext,
+                                           inputTensorInfo0,
+                                           inputTensorInfo1,
+                                           outputTensorInfo);
+            case kTfLiteBuiltinDiv:
+                return ValidateDivOperator(delegateData,
+                                           tfLiteContext,
+                                           inputTensorInfo0,
+                                           inputTensorInfo1,
+                                           outputTensorInfo);
+            case kTfLiteBuiltinFloorDiv:
+                return ValidateFloorDivOperator(delegateData,
+                                                tfLiteContext,
+                                                inputTensorInfo0,
+                                                inputTensorInfo1,
+                                                outputTensorInfo);
+            case kTfLiteBuiltinMaximum:
+                return ValidateMaximumOperator(delegateData,
+                                               tfLiteContext,
+                                               inputTensorInfo0,
+                                               inputTensorInfo1,
+                                               outputTensorInfo);
+            case kTfLiteBuiltinMinimum:
+                return ValidateMinimumOperator(delegateData,
+                                               tfLiteContext,
+                                               inputTensorInfo0,
+                                               inputTensorInfo1,
+                                               outputTensorInfo);
+            case kTfLiteBuiltinMul:
+                return ValidateMulOperator(delegateData,
+                                           tfLiteContext,
+                                           inputTensorInfo0,
+                                           inputTensorInfo1,
+                                           outputTensorInfo);
+            case kTfLiteBuiltinSub:
+                return ValidateSubOperator(delegateData,
+                                           tfLiteContext,
+                                           inputTensorInfo0,
+                                           inputTensorInfo1,
+                                           outputTensorInfo);
+            default:
+                return kTfLiteError;
+        }
+    }
+
+    armnn::IConnectableLayer* elementwiseBinaryLayer = nullptr;
+    MultiLayerFacade multiLayer;
+    switch(elementwiseBinaryOperatorCode)
+    {
+        case kTfLiteBuiltinAdd:
+            elementwiseBinaryLayer = delegateData.m_Network->AddAdditionLayer();
+            break;
+        case kTfLiteBuiltinDiv:
+            elementwiseBinaryLayer = delegateData.m_Network->AddDivisionLayer();
+            break;
+        case kTfLiteBuiltinFloorDiv:
+            {
+                auto layers = AddFloorDivLayer(delegateData, outputTensorInfo);
+                multiLayer.AssignValues(layers.first, layers.second);
+                elementwiseBinaryLayer = &multiLayer;
+            }
+            break;
+        case kTfLiteBuiltinMaximum:
+            elementwiseBinaryLayer = delegateData.m_Network->AddMaximumLayer();
+            break;
+        case kTfLiteBuiltinMinimum:
+            elementwiseBinaryLayer = delegateData.m_Network->AddMinimumLayer();
+            break;
+        case kTfLiteBuiltinMul:
+            elementwiseBinaryLayer = delegateData.m_Network->AddMultiplicationLayer();
+            break;
+        case kTfLiteBuiltinSub:
+            elementwiseBinaryLayer = delegateData.m_Network->AddSubtractionLayer();
+            break;
+        default:
+            return kTfLiteError;
+    }
+    ARMNN_ASSERT(elementwiseBinaryLayer != nullptr);
+    armnn::IOutputSlot& outputSlot = elementwiseBinaryLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    auto inputsTensorsProcess = ProcessInputs(elementwiseBinaryLayer,
+                                              delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode);
+    if (inputsTensorsProcess == kTfLiteError)
+    {
+        return inputsTensorsProcess;
+    }
+
+    auto reshapeLayer = BroadcastTensor(inputTensorInfo0,
+                                        inputTensorInfo1,
+                                        elementwiseBinaryLayer,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        delegateData);
+    if (!reshapeLayer)
+    {
+        return kTfLiteError;
+    }
+
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteAddParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+    // Check activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, elementwiseBinaryLayer, 0, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/ElementwiseUnary.hpp b/arch/arm/ARMnn/delegate/src/ElementwiseUnary.hpp
new file mode 100644
index 0000000000..f2f5301635
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/ElementwiseUnary.hpp
@@ -0,0 +1,83 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <armnn/utility/Assert.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitElementwiseUnaryOperator(DelegateData& delegateData,
+                                           TfLiteContext* tfLiteContext,
+                                           TfLiteNode* tfLiteNode,
+                                           int nodeIndex,
+                                           armnn::UnaryOperation unaryOperation)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::ElementwiseUnaryDescriptor descriptor(unaryOperation);
+    bool isSupported = false;
+
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsElementwiseUnarySupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddElementwiseUnaryLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Fill.hpp b/arch/arm/ARMnn/delegate/src/Fill.hpp
new file mode 100644
index 0000000000..c6f94dd83c
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Fill.hpp
@@ -0,0 +1,111 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitFillOperator(DelegateData& delegateData,
+                               TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t tfLiteFillOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    switch(tfLiteFillOperatorCode)
+    {
+        case kTfLiteBuiltinFill:
+            TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+            break;
+        default:
+            return kTfLiteError;
+    }
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, tfLiteFillOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteFillTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteFillTensor, tfLiteFillOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, tfLiteFillOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    armnn::TensorInfo inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::FillDescriptor descriptor;
+    switch (tfLiteFillTensor.type)
+    {
+        case kTfLiteFloat32:
+            descriptor.m_Value = tflite::GetTensorData<float>(&tfLiteFillTensor)[0];
+            break;
+        case kTfLiteInt32:
+            descriptor.m_Value = tflite::GetTensorData<int32_t>(&tfLiteFillTensor)[0];
+            break;
+        default:
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: FILL value data type is not supported in operator #%d node #%d: ",
+                tfLiteFillOperatorCode, nodeIndex);
+            return kTfLiteError;
+    }
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsFillSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddFillLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    auto inputsTensorsProcess = ProcessInputs(layer,
+                                              delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode);
+    if (inputsTensorsProcess == kTfLiteError)
+    {
+        return inputsTensorsProcess;
+    }
+
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/FullyConnected.hpp b/arch/arm/ARMnn/delegate/src/FullyConnected.hpp
new file mode 100644
index 0000000000..49686d6eaf
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/FullyConnected.hpp
@@ -0,0 +1,213 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData,
+                                         TfLiteContext* tfLiteContext,
+                                         TfLiteNode* tfLiteNode,
+                                         int nodeIndex,
+                                         int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+            2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    bool biasEnabled = (numInputs == 3);
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteWeightsTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteWeightsTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo   = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    armnn::TensorInfo weightsTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteWeightsTensor);
+    const armnn::TensorInfo& outputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // Fully Connected Layer accepts two dimensional weights input
+    int32_t weightsDimension = static_cast<int32_t>(weightsTensorInfo.GetNumDimensions());
+    if (weightsDimension != 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dimension #$d for Fully Connected weights is not supported by Armnn"
+            " in operator #%d node #%d: ", weightsDimension, operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    bool isConstantWeights = tflite::IsConstantTensor(&tfLiteWeightsTensor);
+
+    armnn::TensorInfo biasTensorInfo;
+    if (biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if (!IsValid(tfLiteContext, tfLiteBiasTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        biasTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBiasTensor);
+    }
+    else
+    {
+        biasTensorInfo = armnn::TensorInfo(armnn::TensorShape({1}), GetDataType(tfLiteInputTensor));
+    }
+
+    armnn::TensorInfo reshapedTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    if (inputTensorInfo.GetNumDimensions() > 2)
+    {
+        // Calculate reshape to flatten to 2D [batch_size, input_size]
+        std::vector<unsigned int> reshapedDimensions(2);
+        reshapedDimensions[1] = weightsTensorInfo.GetShape()[1];
+        reshapedDimensions[0] = inputTensorInfo.GetNumElements() / reshapedDimensions[1];
+
+        if (inputTensorInfo.GetNumElements() % reshapedDimensions[1] != 0)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Failed to deduce input tensor shape from filter size #%d #%d node #%d: ",
+                reshapedDimensions[1], operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+
+        reshapedTensorInfo.SetShape(armnn::TensorShape{ 2, reshapedDimensions.data() });
+    }
+
+    armnn::FullyConnectedDescriptor descriptor;
+    descriptor.m_TransposeWeightMatrix = true;
+    descriptor.m_BiasEnabled           = biasEnabled;
+    descriptor.m_ConstantWeights       = isConstantWeights;
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsFullyConnectedSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   reshapedTensorInfo,
+                                   outputTensorInfo,
+                                   weightsTensorInfo,
+                                   biasTensorInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    // Add a constant layer for weights and biases if inputs are constant.
+    if (isConstantWeights)
+    {
+        auto weightsTensor = CreateConstTensor(&tfLiteWeightsTensor,
+                                               weightsTensorInfo,
+                                               armnn::Optional<armnn::PermutationVector&>());
+
+        armnn::IConnectableLayer* weightsLayer = delegateData.m_Network->AddConstantLayer(weightsTensor);
+
+        weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsTensorInfo);
+    }
+
+    if (biasEnabled)
+    {
+        const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        if(tflite::IsConstantTensor(&tfLiteBiasTensor))
+        {
+            auto biasTensor = CreateConstTensor(&tfLiteBiasTensor,
+                                                biasTensorInfo,
+                                                armnn::Optional<armnn::PermutationVector&>());
+
+            armnn::IConnectableLayer* biasLayer = delegateData.m_Network->AddConstantLayer(biasTensor);
+            ARMNN_ASSERT(biasLayer != nullptr);
+
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensorInfo);
+        }
+    }
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    armnn::IConnectableLayer* reshapeLayer = nullptr;
+    if (inputTensorInfo.GetNumDimensions() > 2)
+    {
+        // Add reshape to flatten to 2D [batch_size, input_size]
+        armnn::ReshapeDescriptor reshapeDescriptor;
+        reshapeDescriptor.m_TargetShape = reshapedTensorInfo.GetShape();
+        reshapeLayer = delegateData.m_Network->AddReshapeLayer(reshapeDescriptor);
+        ARMNN_ASSERT(reshapeLayer != nullptr);
+
+        reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedTensorInfo);
+
+        // Connect
+        delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(reshapeLayer->GetInputSlot(0));
+        reshapeLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+        if (!descriptor.m_ConstantWeights)
+        {
+            delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[1]]->Connect(layer->GetInputSlot(1));
+        }
+
+        if (biasEnabled && !tflite::IsConstantTensor(&tfLiteTensors[tfLiteNode->inputs->data[2]]))
+        {
+            delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[2]]->Connect(layer->GetInputSlot(2));
+        }
+        delegateData.m_OutputSlotForNode[tfLiteNode->outputs->data[0]] = &outputSlot;
+    }
+
+    if (reshapeLayer == nullptr)
+    {
+        Connect(layer, tfLiteNode, delegateData);
+    }
+
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteFullyConnectedParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+
+    // Check Activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, layer, 0, delegateData);
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/Gather.hpp b/arch/arm/ARMnn/delegate/src/Gather.hpp
new file mode 100644
index 0000000000..634373a341
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Gather.hpp
@@ -0,0 +1,105 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace armnnDelegate
+{
+TfLiteStatus VisitGatherOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteIndicesTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteIndicesTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    auto* gatherParameters = reinterpret_cast<TfLiteGatherParams*>(tfLiteNode->builtin_data);
+    auto axis = gatherParameters->axis;
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& indicesTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteIndicesTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+    armnn::GatherDescriptor gatherDescriptor;
+    gatherDescriptor.m_Axis = axis;
+
+    auto inputDimensions = static_cast<int32_t>(inputTensorInfo.GetNumDimensions());
+    auto indicesDimensions = indicesTensorInfo.GetNumDimensions();
+    auto outputDimensions = outputTensorInfo.GetNumDimensions();
+    if (((axis < -inputDimensions) && (axis < 0)) || ((axis >= inputDimensions) && (axis > 0)))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG( tfLiteContext,
+            "TfLiteArmnnDelegate: Operation has invalid axis: %d. It is out of bounds [-%d, %d))",
+            axis, inputDimensions, inputDimensions);
+        return kTfLiteError;
+    }
+    if (outputDimensions != static_cast<unsigned int>(inputDimensions) + indicesDimensions - 1)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG( tfLiteContext,
+            "Operation has invalid output dimensions: %d. Output must be an (%d + %d - 1)-D tensor",
+            outputDimensions, inputDimensions, indicesDimensions);
+        return kTfLiteError;
+    }
+
+    if (!delegateData.m_Network)
+    {
+        // Check if supported
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsGatherSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   indicesTensorInfo,
+                                   outputTensorInfo,
+                                   gatherDescriptor);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddGatherLayer(gatherDescriptor);
+    ARMNN_ASSERT(layer != nullptr);
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    auto inputsTensorsProcess = ProcessInputs(layer,
+                                              delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode);
+    if (inputsTensorsProcess == kTfLiteError)
+    {
+        return inputsTensorsProcess;
+    }
+
+    Connect(layer, tfLiteNode, delegateData);
+
+    return kTfLiteOk;
+}
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/LogicalBinary.hpp b/arch/arm/ARMnn/delegate/src/LogicalBinary.hpp
new file mode 100644
index 0000000000..d877585849
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/LogicalBinary.hpp
@@ -0,0 +1,103 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitLogicalBinaryOperator(DelegateData& delegateData,
+                                        TfLiteContext* tfLiteContext,
+                                        TfLiteNode* tfLiteNode,
+                                        int nodeIndex,
+                                        int32_t logicalOperatorCode,
+                                        armnn::LogicalBinaryOperation binaryOperation)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor0 = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor0, logicalOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteInputTensor1 = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor1, logicalOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, logicalOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    armnn::TensorInfo inputTensorInfo0 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor0);
+    armnn::TensorInfo inputTensorInfo1 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor1);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // Setup descriptor and assign operation
+    armnn::LogicalBinaryDescriptor desc;
+    desc.m_Operation = binaryOperation;
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsLogicalBinarySupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo0,
+                                   inputTensorInfo1,
+                                   outputTensorInfo,
+                                   desc);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* logicalBinaryLayer = delegateData.m_Network->AddLogicalBinaryLayer(desc);
+    ARMNN_ASSERT(logicalBinaryLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = logicalBinaryLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    auto inputsTensorsProcess = ProcessInputs(logicalBinaryLayer,
+                                              delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode);
+    if (inputsTensorsProcess == kTfLiteError)
+    {
+        return inputsTensorsProcess;
+    }
+
+    // LogicalBinary operators support broadcasting
+    auto reshapeLayer = BroadcastTensor(inputTensorInfo0,
+                                        inputTensorInfo1,
+                                        logicalBinaryLayer,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        delegateData);
+    if (!reshapeLayer)
+    {
+        return kTfLiteError;
+    }
+    return kTfLiteOk;
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Lstm.hpp b/arch/arm/ARMnn/delegate/src/Lstm.hpp
new file mode 100644
index 0000000000..8d719ee351
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Lstm.hpp
@@ -0,0 +1,265 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <armnn/LstmParams.hpp>
+#include <armnn/Tensor.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitLstmOperator(DelegateData& delegateData,
+                               TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+                2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const auto nodeParams = reinterpret_cast<TfLiteLSTMParams*>(tfLiteNode->builtin_data);
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    // Set the params structure for the AddLstmLayer call
+    armnn::LstmInputParams params;
+
+    if (!IsOptionalOperandPresent(tfLiteNode, 1))
+    {
+        params.m_InputToInputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 1);
+    }
+
+    params.m_InputToForgetWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 2);
+    params.m_InputToCellWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 3);
+    params.m_InputToOutputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 4);
+
+    // Recurrent weight tensors of size {n_cell, n_output}
+    if (!IsOptionalOperandPresent(tfLiteNode, 5))
+    {
+        params.m_RecurrentToInputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 5);
+    }
+
+    params.m_RecurrentToForgetWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 6);
+    params.m_RecurrentToCellWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 7);
+    params.m_RecurrentToOutputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 8);
+
+    // Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
+    if (!IsOptionalOperandPresent(tfLiteNode, 9))
+    {
+        params.m_CellToInputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 9);
+    }
+
+    if (!IsOptionalOperandPresent(tfLiteNode, 10))
+    {
+        params.m_CellToForgetWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 10);
+    }
+
+    if (!IsOptionalOperandPresent(tfLiteNode, 11))
+    {
+        params.m_CellToOutputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 11);
+    }
+
+    // Gates bias tensors of size {n_cell}
+    if (!IsOptionalOperandPresent(tfLiteNode, 12))
+    {
+        params.m_InputGateBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 12);
+    }
+
+    params.m_ForgetGateBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 13);
+    params.m_CellBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 14);
+    params.m_OutputGateBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 15);
+
+    // Projection weight tensor of size {n_output, n_cell}
+    if (!IsOptionalOperandPresent(tfLiteNode, 16))
+    {
+        params.m_ProjectionWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 16);
+    }
+    // Projection bias tensor of size {n_output}
+    if (!IsOptionalOperandPresent(tfLiteNode, 17))
+    {
+        params.m_ProjectionBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 17);
+    }
+
+    // These state tensors are defined as variable tensors, and will be modified by this op.
+    armnn::TensorInfo outputStateInInfo = GetTensorInfoForTfLiteTensor(tfLiteTensors[tfLiteNode->inputs->data[18]]);
+    armnn::TensorInfo cellStateInInfo = GetTensorInfoForTfLiteTensor(tfLiteTensors[tfLiteNode->inputs->data[19]]);
+
+    // Layer norm coefficient tensors of size {n_cell}, representing a diagonal matrix.
+    if (tfLiteNode->inputs->size >= 21 && !IsOptionalOperandPresent(tfLiteNode, 20))
+    {
+        params.m_InputLayerNormWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 20);
+    }
+
+    if (tfLiteNode->inputs->size >= 22 && !IsOptionalOperandPresent(tfLiteNode, 21))
+    {
+        params.m_ForgetLayerNormWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 21);
+    }
+
+    if (tfLiteNode->inputs->size >= 23 && !IsOptionalOperandPresent(tfLiteNode, 22))
+    {
+        params.m_CellLayerNormWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 22);
+    }
+
+    if (tfLiteNode->inputs->size >= 24 && !IsOptionalOperandPresent(tfLiteNode, 23))
+    {
+        params.m_OutputLayerNormWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 23);
+    }
+
+    // set the layer descriptor
+    armnn::LstmDescriptor desc;
+    desc.m_ActivationFunc    = NonNegative(nodeParams->activation, nodeIndex);
+    desc.m_ClippingThresCell = nodeParams->cell_clip;
+    desc.m_ClippingThresProj = nodeParams->proj_clip;
+    desc.m_CifgEnabled       = (params.m_InputToInputWeights == nullptr
+                                || params.m_RecurrentToInputWeights == nullptr
+                                || params.m_InputGateBias == nullptr);
+    desc.m_PeepholeEnabled   = (params.m_CellToForgetWeights != nullptr || params.m_CellToOutputWeights != nullptr);
+    desc.m_ProjectionEnabled = (params.m_ProjectionWeights != nullptr);
+    desc.m_LayerNormEnabled  = (params.m_InputLayerNormWeights != nullptr
+                                || params.m_ForgetLayerNormWeights != nullptr
+                                || params.m_CellLayerNormWeights != nullptr
+                                || params.m_OutputLayerNormWeights != nullptr);
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    unsigned int batchSize  = inputTensorInfo.GetShape()[0];
+    unsigned int outputSize = outputTensorInfo.GetShape()[1];
+    unsigned int numUnits   = cellStateInInfo.GetShape()[1];
+
+    armnn::DataType dataType = inputTensorInfo.GetDataType();
+    float qScale = inputTensorInfo.GetQuantizationScale();
+    float qOffset = inputTensorInfo.GetQuantizationOffset();
+
+    armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 3}, dataType, qScale, qOffset);
+    if (!desc.m_CifgEnabled)
+    {
+        scratchBufferTensorInfo = armnn::TensorInfo({batchSize, numUnits * 4}, dataType, qScale, qOffset);
+    }
+    armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, dataType, qScale, qOffset);
+    armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, dataType, qScale, qOffset);
+
+    armnn::LstmInputParamsInfo paramsInfo;
+    paramsInfo.m_InputToForgetWeights     = &(params.m_InputToForgetWeights->GetInfo());
+    paramsInfo.m_InputToCellWeights       = &(params.m_InputToCellWeights->GetInfo());
+    paramsInfo.m_InputToOutputWeights     = &(params.m_InputToOutputWeights->GetInfo());
+    paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo());
+    paramsInfo.m_RecurrentToCellWeights   = &(params.m_RecurrentToCellWeights->GetInfo());
+    paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo());
+    paramsInfo.m_ForgetGateBias           = &(params.m_ForgetGateBias->GetInfo());
+    paramsInfo.m_CellBias                 = &(params.m_CellBias->GetInfo());
+    paramsInfo.m_OutputGateBias           = &(params.m_OutputGateBias->GetInfo());
+
+    if (!desc.m_CifgEnabled)
+    {
+        paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo());
+        paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo());
+        if (params.m_CellToInputWeights != nullptr)
+        {
+            paramsInfo.m_CellToInputWeights = &(params.m_CellToInputWeights->GetInfo());
+        }
+        paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo());
+    }
+
+    if (desc.m_ProjectionEnabled)
+    {
+        paramsInfo.m_ProjectionWeights = &(params.m_ProjectionWeights->GetInfo());
+        if (params.m_ProjectionBias != nullptr)
+        {
+            paramsInfo.m_ProjectionBias = &(params.m_ProjectionBias->GetInfo());
+        }
+    }
+
+    if (desc.m_PeepholeEnabled)
+    {
+        paramsInfo.m_CellToForgetWeights = &(params.m_CellToForgetWeights->GetInfo());
+        paramsInfo.m_CellToOutputWeights = &(params.m_CellToOutputWeights->GetInfo());
+    }
+
+    if (desc.m_LayerNormEnabled)
+    {
+        if(!desc.m_CifgEnabled)
+        {
+            paramsInfo.m_InputLayerNormWeights = &(params.m_InputLayerNormWeights->GetInfo());
+        }
+        paramsInfo.m_ForgetLayerNormWeights = &(params.m_ForgetLayerNormWeights->GetInfo());
+        paramsInfo.m_CellLayerNormWeights = &(params.m_CellLayerNormWeights->GetInfo());
+        paramsInfo.m_OutputLayerNormWeights = &(params.m_OutputLayerNormWeights->GetInfo());
+    }
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsLstmSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputStateInInfo,
+                                   cellStateInInfo,
+                                   scratchBufferTensorInfo,
+                                   outputStateOutTensorInfo,
+                                   cellStateOutTensorInfo,
+                                   outputInfo,
+                                   desc,
+                                   paramsInfo);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddLstmLayer(desc, params);
+    ARMNN_ASSERT(layer != nullptr);
+
+    layer->GetOutputSlot(0).SetTensorInfo(scratchBufferTensorInfo);
+    layer->GetOutputSlot(1).SetTensorInfo(outputStateOutTensorInfo);
+    layer->GetOutputSlot(2).SetTensorInfo(cellStateOutTensorInfo);
+    layer->GetOutputSlot(3).SetTensorInfo(outputTensorInfo);
+
+    // Connect the inputs
+    // input_layer
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(layer->GetInputSlot(0));
+    // cellStateIn
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[18]]->Connect(layer->GetInputSlot(1));
+    //outputStateIn
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[19]]->Connect(layer->GetInputSlot(2));
+
+    // In the test_model there is only 1 Output
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(1);
+    delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tfLiteNode->outputs->data[0])] = &outputSlot;
+    return kTfLiteOk;
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/MultiLayerFacade.hpp b/arch/arm/ARMnn/delegate/src/MultiLayerFacade.hpp
new file mode 100644
index 0000000000..02be26cefd
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/MultiLayerFacade.hpp
@@ -0,0 +1,147 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+// NOTE: the MultiLayerFacade class is a utility class which makes a chain
+//       of operators look like a single IConnectableLayer with the first
+//       layer in the chain supplying the input slots and the last supplying
+//       the output slots. It enables us, for example, to simulate a
+//       Tensorflow Lite FloorDiv operator by chaining a Div layer followed
+//       by a Floor layer and pass them as a single unit to the code that
+//       connects up the graph as the delegate proceeds to build up the
+//       Arm NN subgraphs.
+//
+
+#include <common/include/ProfilingGuid.hpp>
+#include <armnn/INetwork.hpp>
+
+namespace armnnDelegate
+{
+
+class MultiLayerFacade : public armnn::IConnectableLayer
+{
+public:
+    MultiLayerFacade() :
+        m_FirstLayer(nullptr), m_LastLayer(nullptr) {}
+
+    MultiLayerFacade(armnn::IConnectableLayer* firstLayer, armnn::IConnectableLayer* lastLayer) :
+        m_FirstLayer(firstLayer), m_LastLayer(lastLayer) {}
+
+    MultiLayerFacade(const MultiLayerFacade& obj) :
+        m_FirstLayer(obj.m_FirstLayer), m_LastLayer(obj.m_LastLayer) {}
+
+    ~MultiLayerFacade() {} // we don't own the pointers
+
+    MultiLayerFacade& operator=(const MultiLayerFacade& obj)
+    {
+        m_FirstLayer = obj.m_FirstLayer;
+        m_LastLayer = obj.m_LastLayer;
+        return *this;
+    }
+
+    void AssignValues(armnn::IConnectableLayer* firstLayer, armnn::IConnectableLayer* lastLayer)
+    {
+        m_FirstLayer = firstLayer;
+        m_LastLayer = lastLayer;
+    }
+
+    virtual const char* GetName() const override
+    {
+        return m_FirstLayer->GetName();
+    }
+
+    virtual unsigned int GetNumInputSlots() const override
+    {
+        return m_FirstLayer->GetNumInputSlots();
+    }
+
+    virtual unsigned int GetNumOutputSlots() const override
+    {
+        return m_LastLayer->GetNumOutputSlots();
+    }
+
+    virtual const armnn::IInputSlot& GetInputSlot(unsigned int index) const override
+    {
+        return m_FirstLayer->GetInputSlot(index);
+    }
+
+    virtual armnn::IInputSlot& GetInputSlot(unsigned int index) override
+    {
+        return m_FirstLayer->GetInputSlot(index);
+    }
+
+    virtual const armnn::IOutputSlot& GetOutputSlot(unsigned int index) const override
+    {
+        return m_LastLayer->GetOutputSlot(index);
+    }
+
+    virtual armnn::IOutputSlot& GetOutputSlot(unsigned int index) override
+    {
+        return m_LastLayer->GetOutputSlot(index);
+    }
+
+    virtual std::vector<armnn::TensorShape> InferOutputShapes(
+        const std::vector<armnn::TensorShape>& inputShapes) const override
+    {
+        // NOTE: do not expect this function to be used. Likely that if it is it might need to be overridden
+        //       for particular sequences of operators.
+        return m_FirstLayer->InferOutputShapes(inputShapes);
+    }
+
+    virtual armnn::LayerGuid GetGuid() const override
+    {
+        return m_FirstLayer->GetGuid();
+    }
+
+    // The Accept function needs to be wrapped in a no warn macro to avoid deprecation warnings from
+    // the deprecated ILayerVisitor which is used in the function.
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Accept is deprecated. The ILayerVisitor that works in conjunction with this "
+                                      "Accept function is deprecated. Use IStrategy in combination with "
+                                      "ExecuteStrategy instead, which is an ABI/API stable version of the "
+                                      "visitor pattern.",
+                                      "22.05")
+    virtual void Accept(armnn::ILayerVisitor& visitor) const override
+    {
+        // Do not expect this function to be used so not providing an implementation
+    }
+    ARMNN_NO_DEPRECATE_WARN_END
+
+    virtual void ExecuteStrategy(armnn::IStrategy& strategy) const override
+    {
+        // Do not expect this function to be used so not providing an implementation
+        // if an implementation is required and the chain contains more than two operators
+        // would have to provide a way to record the intermediate layers so they could be
+        // visited... the same applies to the Accept method above and the BackendSelectionHint
+        // below.
+    }
+
+    virtual void BackendSelectionHint(armnn::Optional<armnn::BackendId> backend) override
+    {
+        // Do not expect this function to be used so not providing an implementation
+    }
+
+    virtual armnn::LayerType GetType() const override
+    {
+        return m_FirstLayer->GetType();
+    }
+
+    virtual const armnn::BaseDescriptor& GetParameters() const override { return m_NullDescriptor; }
+
+protected:
+    /// Retrieve the handles to the constant values stored by the layer.
+    /// @return A vector of the constant tensors stored by this layer.
+    ConstantTensors GetConstantTensorsByRef() override { return {}; }
+
+private:
+    armnn::IConnectableLayer* m_FirstLayer;
+    armnn::IConnectableLayer* m_LastLayer;
+
+    // to satisfy the GetParameters method need to hand back a NullDescriptor
+    armnn::NullDescriptor m_NullDescriptor;
+};
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Normalization.hpp b/arch/arm/ARMnn/delegate/src/Normalization.hpp
new file mode 100644
index 0000000000..68ff3af32d
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Normalization.hpp
@@ -0,0 +1,144 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitL2NormalizationOperator(DelegateData& delegateData,
+                                          TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          int nodeIndex,
+                                          int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::L2NormalizationDescriptor descriptor;
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsL2NormalizationSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a L2Normalization layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddL2NormalizationLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+
+TfLiteStatus VisitLocalResponseNormalizationOperator(DelegateData& delegateData,
+                                                     TfLiteContext* tfLiteContext,
+                                                     TfLiteNode* tfLiteNode,
+                                                     int nodeIndex,
+                                                     int32_t normalizationOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, normalizationOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, normalizationOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::NormalizationDescriptor descriptor;
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+    descriptor.m_NormChannelType = armnn::NormalizationAlgorithmChannel::Across;
+    descriptor.m_NormMethodType  = armnn::NormalizationAlgorithmMethod::LocalBrightness;
+
+    auto* params = reinterpret_cast<TfLiteLocalResponseNormParams*>(tfLiteNode->builtin_data);
+    descriptor.m_NormSize = params->radius;
+    descriptor.m_K        = params->bias;
+    descriptor.m_Alpha    = params->alpha;
+    descriptor.m_Beta     = params->beta;
+
+    // ArmNN expects normSize to be the full size of the normalization window
+    descriptor.m_NormSize = 1 + (2 * descriptor.m_NormSize);
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsNormalizationSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a Normalization layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddNormalizationLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Pack.hpp b/arch/arm/ARMnn/delegate/src/Pack.hpp
new file mode 100644
index 0000000000..5e93ba3f2a
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Pack.hpp
@@ -0,0 +1,119 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitPackOperator(DelegateData& delegateData,
+                               TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t operatorCode)
+{
+    unsigned int numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 1)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext, "TfLiteArmnnDelegate: Must have at least one input in (%d != %d) in node #%d",
+                1, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    // Validate all inputs and get TensorInfo
+    std::vector<armnn::TensorInfo> inputTensorInfos;
+    for (unsigned int i = 0; i < numInputs; ++i)
+    {
+        const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+
+        armnn::TensorInfo inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+        inputTensorInfos.emplace_back(inputTensorInfo);
+    }
+
+    // Convert input tensors to const armnn::TensorInfo* type for FORWARD_LAYER_SUPPORT_FUNC.
+    std::vector<const armnn::TensorInfo*> inputConstTensorInfos;
+    std::transform(inputTensorInfos.begin(),
+                   inputTensorInfos.end(),
+                   std::back_inserter(inputConstTensorInfos),
+                   [](armnn::TensorInfo& t)->const armnn::TensorInfo*{ return &t; });
+
+    // Validate output and get TensorInfo
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::StackDescriptor desc;
+    desc.m_NumInputs = static_cast<uint32_t>(numInputs);
+
+    // Get axis from TfLite parameters
+    auto* params = reinterpret_cast<TfLitePackParams*>(tfLiteNode->builtin_data);
+    desc.m_Axis = static_cast<uint32_t>(params->axis);
+
+    // Use the tensor shape of the first input as the "correct" input shape in the descriptor
+    desc.m_InputShape = inputTensorInfos[0].GetShape();
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsStackSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputConstTensorInfos,
+                                   outputTensorInfo,
+                                   desc);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitPackOperator will be called again to add the layer to the network as seen below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // The TfLite Pack operator is equivalent to the ArmNN Stack operator
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddStackLayer(desc);
+    ARMNN_ASSERT(layer != nullptr);
+
+    // Connect the Constant Inputs
+    auto inputsTensorsProcess = ProcessInputs(layer,
+                                              delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode);
+    if (inputsTensorsProcess == kTfLiteError)
+    {
+        return inputsTensorsProcess;
+    }
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Pad.hpp b/arch/arm/ARMnn/delegate/src/Pad.hpp
new file mode 100644
index 0000000000..78e07760fb
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Pad.hpp
@@ -0,0 +1,176 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitPadOperator(DelegateData& delegateData,
+                              TfLiteContext* tfLiteContext,
+                              TfLiteNode* tfLiteNode,
+                              int nodeIndex,
+                              int32_t tfLitePadOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    switch(tfLitePadOperatorCode)
+    {
+        case kTfLiteBuiltinMirrorPad:
+        case kTfLiteBuiltinPad:
+            TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+            break;
+        case kTfLiteBuiltinPadv2:
+            TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+            break;
+        default:
+            return kTfLiteError;
+    }
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    const TfLiteTensor& tfLitepaddingTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLitePadOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLitePadOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& paddingTensorInfo = GetTensorInfoForTfLiteTensor(tfLitepaddingTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // Get the padding data from the input tensor
+    auto* paddingData = tflite::GetTensorData<int32_t>(&tfLitepaddingTensor);
+
+    size_t step = 2;
+    armnn::PadDescriptor descriptor;
+    for (unsigned int i = 0; i < paddingTensorInfo.GetNumElements() / step; ++i)
+    {
+        descriptor.m_PadList.emplace_back(paddingData[i * step], paddingData[i * step + 1]);
+    }
+
+    if (tfLitePadOperatorCode == kTfLiteBuiltinPad && inputTensorInfo.IsQuantized())
+    {
+        descriptor.m_PadValue = inputTensorInfo.GetQuantizationOffset();
+    }
+    else if (tfLitePadOperatorCode == kTfLiteBuiltinPadv2)
+    {
+        const TfLiteTensor& tfLitepaddingValue = tfLiteTensors[tfLiteNode->inputs->data[2]];
+        armnn::TensorInfo paddingValueTensorInfo = GetTensorInfoForTfLiteTensor(tfLitepaddingValue);
+        if (paddingValueTensorInfo.GetNumElements() != 1)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Multiple padding value are not supported in operator #%d node #%d: ",
+                tfLitePadOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        // Get the padding value from the input tensor
+        switch (tfLitepaddingValue.type)
+        {
+            case kTfLiteFloat32:
+                descriptor.m_PadValue = tflite::GetTensorData<float>(&tfLitepaddingValue)[0];
+                break;
+            case kTfLiteUInt8:
+                descriptor.m_PadValue = tflite::GetTensorData<uint8>(&tfLitepaddingValue)[0];
+                break;
+            case kTfLiteInt8:
+                descriptor.m_PadValue = tflite::GetTensorData<int8>(&tfLitepaddingValue)[0];
+                break;
+            default:
+                TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: Padding value datatype is not supported in operator #%d node #%d: ",
+                    tfLitePadOperatorCode, nodeIndex);
+                return kTfLiteError;
+        }
+    }
+    else if (tfLitePadOperatorCode == kTfLiteBuiltinMirrorPad)
+    {
+        TfLiteMirrorPaddingParams* options = reinterpret_cast<TfLiteMirrorPaddingParams*>(tfLiteNode->builtin_data);
+
+
+        if (options->mode == TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect)
+        {
+            descriptor.m_PaddingMode = armnn::PaddingMode::Reflect;
+        }
+        else if (options->mode == TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingSymmetric)
+        {
+            descriptor.m_PaddingMode = armnn::PaddingMode::Symmetric;
+        }
+        else
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: PaddingMode must be either REFLECT or SYMMETRIC in operator #%d node #%d: ",
+                tfLitePadOperatorCode, nodeIndex);
+        }
+
+        // If padding mode is Reflect then both paddings must be no greater than inputShape(i) - 1.
+        // If padding mode is Symmetric then both paddings must be no greater than inputShape(i).
+        auto inputShape = inputTensorInfo.GetShape();
+        auto padList = descriptor.m_PadList;
+
+        const unsigned int isReflect =
+                static_cast<unsigned int>(descriptor.m_PaddingMode == armnn::PaddingMode::Reflect);
+        for(unsigned int i = 0; i < padList.size(); ++i)
+        {
+            if(padList.at(i).first > (inputShape[i] - isReflect) ||
+               padList.at(i).second > (inputShape[i] - isReflect))
+            {
+                TF_LITE_MAYBE_KERNEL_LOG(
+                        tfLiteContext,
+                        "TfLiteArmnnDelegate: Padding values must be less (Reflect) or "
+                        "equal (Symmetric) to the dimension size in operator #%d node #%d: ",
+                        tfLitePadOperatorCode, nodeIndex);
+            }
+        }
+    }
+
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsPadSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor);
+
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* padLayer = delegateData.m_Network->AddPadLayer(descriptor);
+    ARMNN_ASSERT(padLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = padLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    return Connect(padLayer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Pooling.hpp b/arch/arm/ARMnn/delegate/src/Pooling.hpp
new file mode 100644
index 0000000000..07c1946d73
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Pooling.hpp
@@ -0,0 +1,116 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitPoolingOperator(DelegateData& delegateData,
+                                  TfLiteContext* tfLiteContext,
+                                  TfLiteNode* tfLiteNode,
+                                  int nodeIndex,
+                                  int32_t tfLitePoolingOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLitePoolingOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLitePoolingOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::PoolingAlgorithm poolingAlgorithm;
+    switch(tfLitePoolingOperatorCode)
+    {
+        case kTfLiteBuiltinAveragePool2d:
+            poolingAlgorithm = armnn::PoolingAlgorithm::Average;
+            break;
+        case kTfLiteBuiltinL2Pool2d:
+            poolingAlgorithm = armnn::PoolingAlgorithm::L2;
+            break;
+        case kTfLiteBuiltinMaxPool2d:
+            poolingAlgorithm = armnn::PoolingAlgorithm::Max;
+            break;
+        default:
+            return kTfLiteError;
+    }
+
+    armnn::Pooling2dDescriptor descriptor;
+    descriptor.m_PoolType = poolingAlgorithm;
+
+    auto* params = reinterpret_cast<TfLitePoolParams*>(tfLiteNode->builtin_data);
+    descriptor.m_PoolWidth = params->filter_width;
+    descriptor.m_PoolHeight = params->filter_height;
+    descriptor.m_StrideX = params->stride_width;
+    descriptor.m_StrideY = params->stride_height;
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+
+    unsigned int inputHeight = inputTensorInfo.GetShape()[1];
+    unsigned int inputWidth  = inputTensorInfo.GetShape()[2];
+
+    CalcPadding(inputHeight, descriptor.m_PoolHeight, descriptor.m_StrideY, 1u,
+                descriptor.m_PadTop, descriptor.m_PadBottom, params->padding);
+    CalcPadding(inputWidth, descriptor.m_PoolWidth, descriptor.m_StrideX, 1u,
+                descriptor.m_PadLeft, descriptor.m_PadRight, params->padding);
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsPooling2dSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* poolingLayer = delegateData.m_Network->AddPooling2dLayer(descriptor);
+    ARMNN_ASSERT(poolingLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = poolingLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+    Connect(poolingLayer, tfLiteNode, delegateData);
+
+    // Check activation
+    TfLiteFusedActivation activationType = params->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, poolingLayer, 0, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Prelu.hpp b/arch/arm/ARMnn/delegate/src/Prelu.hpp
new file mode 100644
index 0000000000..9baeaf475d
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Prelu.hpp
@@ -0,0 +1,107 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidatePreluOperator(DelegateData& delegateData,
+                                   TfLiteContext* tfLiteContext,
+                                   const armnn::TensorInfo& inputInfo,
+                                   const armnn::TensorInfo& alphaInfo,
+                                   const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsPreluSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo,
+                                   alphaInfo,
+                                   outputInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus VisitPreluOperator(DelegateData& delegateData,
+                                TfLiteContext* tfLiteContext,
+                                TfLiteNode* tfLiteNode,
+                                int nodeIndex,
+                                int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteAlphaTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteAlphaTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& alphaTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteAlphaTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    if (!delegateData.m_Network)
+    {
+        return ValidatePreluOperator(delegateData,
+                                     tfLiteContext,
+                                     inputTensorInfo,
+                                     alphaTensorInfo,
+                                     outputTensorInfo);
+    }
+
+    armnn::IConnectableLayer* preluLayer = delegateData.m_Network->AddPreluLayer();
+    ARMNN_ASSERT(preluLayer != nullptr);
+
+    bool isConstantAlpha = tflite::IsConstantTensor(&tfLiteAlphaTensor);
+
+    // Add constant layer for constant alpha
+    if (isConstantAlpha)
+    {
+        auto constAlphaTensor = armnn::ConstTensor(alphaTensorInfo, tfLiteAlphaTensor.data.data);
+
+        armnn::IConnectableLayer* constLayer = delegateData.m_Network->AddConstantLayer(constAlphaTensor);
+        ARMNN_ASSERT(constLayer != nullptr);
+
+        constLayer->GetOutputSlot(0).SetTensorInfo(alphaTensorInfo);
+        constLayer->GetOutputSlot(0).Connect(preluLayer->GetInputSlot(1));
+    }
+
+    armnn::IOutputSlot& outputSlot = preluLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(preluLayer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/Quantization.hpp b/arch/arm/ARMnn/delegate/src/Quantization.hpp
new file mode 100644
index 0000000000..3c274c6ff5
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Quantization.hpp
@@ -0,0 +1,158 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitDequantizeOperator(DelegateData& delegateData,
+                                     TfLiteContext* tfLiteContext,
+                                     TfLiteNode* tfLiteNode,
+                                     int nodeIndex,
+                                     int32_t tfLiteDequantizeOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLiteDequantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLiteDequantizeOperatorCode, nodeIndex);
+
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsDequantizeSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* dequantizeLayer = delegateData.m_Network->AddDequantizeLayer();
+    ARMNN_ASSERT(dequantizeLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = dequantizeLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    auto inputsTensorsProcess = ProcessInputs(dequantizeLayer,
+                                              delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode);
+    if (inputsTensorsProcess == kTfLiteError)
+    {
+        return inputsTensorsProcess;
+    }
+
+    return Connect(dequantizeLayer, tfLiteNode, delegateData);
+}
+
+TfLiteStatus VisitQuantizeOperator(DelegateData& delegateData,
+                                   TfLiteContext* tfLiteContext,
+                                   TfLiteNode* tfLiteNode,
+                                   int nodeIndex,
+                                   int32_t tfLiteQuantizeOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLiteQuantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLiteQuantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Only affine per-layer quantization is supported.
+    if (!IsAffineQuantization(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Only affine per-layer quantization is supported in operator #%d node #%d: ",
+            tfLiteQuantizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsQuantizeSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* quantizeLayer = delegateData.m_Network->AddQuantizeLayer();
+    ARMNN_ASSERT(quantizeLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = quantizeLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    return Connect(quantizeLayer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Redefine.hpp b/arch/arm/ARMnn/delegate/src/Redefine.hpp
new file mode 100644
index 0000000000..766e600c7c
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Redefine.hpp
@@ -0,0 +1,271 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include <numeric>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitCastOperator(DelegateData& delegateData,
+                               TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsCastSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitCastOperator will be called again to add the layer to the network as seen further below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a Cast layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddCastLayer();
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+
+TfLiteStatus CreateOutputTensorShape(const armnn::TensorInfo& inputTensorInfo,
+                                     const std::vector<int32_t>& targetShape,
+                                     armnn::ReshapeDescriptor& reshapeDesc)
+{
+    std::vector<unsigned int> outputDims(targetShape.begin(), targetShape.end());
+    const auto stretchDim = std::find(targetShape.begin(), targetShape.end(), -1);
+
+    if (stretchDim != targetShape.end())
+    {
+        if (std::find(std::next(stretchDim), targetShape.end(), -1) != targetShape.end())
+        {
+            // Return kTfLiteError and log the error after returning
+            return kTfLiteError;
+        }
+
+        auto targetNumElements =
+            armnn::numeric_cast<unsigned int>(
+                std::accumulate(targetShape.begin(), targetShape.end(), -1, std::multiplies<int32_t>()));
+
+        auto stretchIndex = static_cast<size_t>(std::distance(targetShape.begin(), stretchDim));
+        outputDims[stretchIndex] = inputTensorInfo.GetNumElements() / targetNumElements;
+    }
+
+    armnn::TensorShape outputShape = armnn::TensorShape(static_cast<unsigned int>(outputDims.size()),
+                                                        outputDims.data());
+    reshapeDesc.m_TargetShape = outputShape;
+    return kTfLiteOk;
+}
+
+TfLiteStatus VisitReshapeOperator(DelegateData& delegateData,
+                                  TfLiteContext* tfLiteContext,
+                                  TfLiteNode* tfLiteNode,
+                                  int nodeIndex,
+                                  int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+
+    if (numInputs == 2)
+    {
+        TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    }
+    else
+    {
+        TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor0 = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor0, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo0 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor0);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::ReshapeDescriptor reshapeDesc;
+    std::vector<int32_t> targetShape;
+
+    TfLiteReshapeParams* reshapeOptions = reinterpret_cast<TfLiteReshapeParams*>(tfLiteNode->builtin_data);
+
+    // The new shape can be defined by either a second input tensor or by a builtin option, we need to check for both.
+    // Options might be set without valid data. we need to check the dimensions are in a valid range.
+    if (reshapeOptions && reshapeOptions->num_dimensions > 0 && reshapeOptions->num_dimensions <= 8)
+    {
+        for (int i=0; i < reshapeOptions->num_dimensions; ++i)
+        {
+            targetShape.push_back(reshapeOptions->shape[i]);
+        }
+    }
+    else if (numInputs == 2)
+    {
+        // Get shape from the second input tensor
+        const TfLiteTensor& tfLiteShapeInputTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+        if (!IsValid(tfLiteContext, tfLiteShapeInputTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+
+        if (tfLiteShapeInputTensor.dims->size != 1)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(tfLiteContext,
+                                     "TfLiteArmnnDelegate: Target 'shape' input is not a 1D tensor in "
+                                     "operator #%d node #%d: Falling back to TfLiteOptions.",
+                                     operatorCode, nodeIndex);
+        }
+        else
+        {
+            // Get the shape data out of the input tensor
+            auto* shapeTensorDataPtr = tflite::GetTensorData<int32_t>(&tfLiteShapeInputTensor);
+            auto shapeTensorNumValues = tfLiteShapeInputTensor.dims->data[0];
+            for (auto i=0; i < shapeTensorNumValues; ++i)
+            {
+                targetShape.push_back(*(shapeTensorDataPtr+i));
+            }
+        }
+    }
+    else
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(tfLiteContext,
+                                 "Target shape not defined in reshape parameters or input tensor. "
+                                 "At least one method required in operator #%d node #%d: ",
+                                 operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Use the data to create the required tensor shape.
+    if (CreateOutputTensorShape(inputTensorInfo0, targetShape, reshapeDesc) != kTfLiteOk)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(tfLiteContext,
+                                 "TfLiteArmnnDelegate: At most one component of shape can be -1 in: "
+                                 "operator #%d node #%d: ",
+                                 operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    if (reshapeDesc.m_TargetShape.GetNumElements() != inputTensorInfo0.GetNumElements())
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Reshape, number of elements in output shape does not match input "
+            "operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsReshapeSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo0,
+                                   outInfo,
+                                   reshapeDesc);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddReshapeLayer(reshapeDesc);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+TfLiteStatus VisitSqueezeOperator(DelegateData& delegateData,
+                                  TfLiteContext* tfLiteContext,
+                                  TfLiteNode* tfLiteNode,
+                                  int nodeIndex,
+                                  int32_t operatorCode)
+{
+    armnn::IgnoreUnused(delegateData,
+                        tfLiteContext,
+                        tfLiteNode,
+                        nodeIndex,
+                        operatorCode);
+
+    return kTfLiteError;
+}
+
+TfLiteStatus VisitExpandDimsOperator(DelegateData& delegateData,
+                                     TfLiteContext* tfLiteContext,
+                                     TfLiteNode* tfLiteNode,
+                                     int nodeIndex,
+                                     int32_t operatorCode)
+{
+    armnn::IgnoreUnused(delegateData,
+                        tfLiteContext,
+                        tfLiteNode,
+                        nodeIndex,
+                        operatorCode);
+
+    return kTfLiteError;
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Reduce.hpp b/arch/arm/ARMnn/delegate/src/Reduce.hpp
new file mode 100644
index 0000000000..bcea16fdb0
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Reduce.hpp
@@ -0,0 +1,137 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/kernels/internal/tensor_ctypes.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitReduceOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t reduceOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, reduceOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, reduceOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // Get const axis value from model and set it to descriptor.
+    const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteAxisTensor, reduceOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& axisTensorInfo =   GetTensorInfoForTfLiteTensor(tfLiteAxisTensor);
+    auto* axisTensorData = tflite::GetTensorData<int32_t>(&tfLiteAxisTensor);
+
+    std::vector<int32_t> axis;
+    // Add axis data to vector to be converter to unsigned int and assigned to descriptor axis.
+    if (axisTensorData != nullptr)
+    {
+        for (unsigned int i = 0; i < axisTensorInfo.GetNumElements(); ++i)
+        {
+            axis.emplace_back(axisTensorData[i]);
+        }
+    }
+    else
+    {
+        for (unsigned int i = 0; i < inputTensorInfo.GetNumDimensions(); ++i)
+        {
+            axis.push_back(i);
+        }
+    }
+
+    // Convert the axis to unsigned int and remove duplicates.
+    unsigned int rank = inputTensorInfo.GetNumDimensions();
+    std::set<unsigned int> uniqueAxis;
+    std::transform(axis.begin(),
+                   axis.end(),
+                   std::inserter(uniqueAxis, uniqueAxis.begin()),
+                   [rank](int i)->unsigned int{ return (i + rank) % rank; });
+
+    armnn::ReduceDescriptor desc;
+    desc.m_vAxis.assign(uniqueAxis.begin(), uniqueAxis.end());
+
+    auto* reducerParameters = reinterpret_cast<TfLiteReducerParams*>(tfLiteNode->builtin_data);
+    desc.m_KeepDims = reducerParameters->keep_dims;
+    if (reduceOperatorCode == kTfLiteBuiltinReduceMax)
+    {
+        desc.m_ReduceOperation = armnn::ReduceOperation::Max;
+    }
+    else if (reduceOperatorCode == kTfLiteBuiltinReduceMin)
+    {
+        desc.m_ReduceOperation = armnn::ReduceOperation::Min;
+    }
+    else if (reduceOperatorCode == kTfLiteBuiltinSum)
+    {
+        desc.m_ReduceOperation = armnn::ReduceOperation::Sum;
+    }
+    else if (reduceOperatorCode == kTfLiteBuiltinReduceProd)
+    {
+        desc.m_ReduceOperation = armnn::ReduceOperation::Prod;
+    }
+    else
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Unsupported Reduction Operator #%d node #%d: ",
+            reduceOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsReduceSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo,
+                                   desc);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add an Reduce layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddReduceLayer(desc);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Resize.hpp b/arch/arm/ARMnn/delegate/src/Resize.hpp
new file mode 100644
index 0000000000..937cc00c8f
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Resize.hpp
@@ -0,0 +1,198 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <armnn/Descriptors.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include <tensorflow/lite/kernels/internal/tensor_ctypes.h>
+
+namespace armnnDelegate
+{
+
+
+
+TfLiteStatus ValidateResizeOperator(DelegateData& delegateData,
+                                    TfLiteContext* tfLiteContext,
+                                    const armnn::TensorInfo& inputInfo,
+                                    const armnn::TensorInfo& outputInfo,
+                                    const armnn::ResizeDescriptor& descriptor)
+{
+    bool isSupported = false;
+    FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                               tfLiteContext,
+                               IsResizeSupported,
+                               delegateData.m_Backends,
+                               isSupported,
+                               inputInfo,
+                               outputInfo,
+                               descriptor);
+
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus VisitResizeOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t resizeOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    // The first input contains the data of the image that should be resized [batch, height, width, channels]
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            resizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // The second input contains a size tensor. The size tensor contains two integer values
+    // that describe the new height and width of the image [new_height, new_width]
+    const TfLiteTensor& tfLiteSizeTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (IsDynamicTensor(tfLiteSizeTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            resizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // The output tensor should have the shape [batch, new_height, new_width, channels]
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            resizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    std::string layerName("Resize");
+
+    // Fill descriptor
+    armnn::ResizeDescriptor desc;
+    switch (resizeOperatorCode)
+    {
+        case kTfLiteBuiltinResizeBilinear:
+        {
+            desc.m_Method = armnn::ResizeMethod::Bilinear;
+
+            layerName += "Bilinear:" + std::to_string(nodeIndex);
+
+            TfLiteResizeBilinearParams* biliniarOptions =
+                    reinterpret_cast<TfLiteResizeBilinearParams*>(tfLiteNode->builtin_data);
+
+            desc.m_AlignCorners = biliniarOptions->align_corners;
+            desc.m_HalfPixelCenters = biliniarOptions->half_pixel_centers;
+            break;
+        }
+        case kTfLiteBuiltinResizeNearestNeighbor:
+        {
+            desc.m_Method =  armnn::ResizeMethod::NearestNeighbor;
+            layerName += "NearestNeighbor:" + std::to_string(nodeIndex);
+
+            TfLiteResizeNearestNeighborParams* nearestNeighborOptions =
+                    reinterpret_cast<TfLiteResizeNearestNeighborParams*>(tfLiteNode->builtin_data);
+
+            desc.m_AlignCorners = nearestNeighborOptions->align_corners;
+            desc.m_HalfPixelCenters = nearestNeighborOptions->half_pixel_centers;
+            break;
+        }
+        default:
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: Unknown TfLite built in operation for Resize. Given operator: #%d node #%d: ",
+                    resizeOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+    }
+
+    // In armnn the values of the size input tensor [new_hight, new_width] is saved in the operator
+    // descriptor. We have to read it from the input tensor and write it to the descriptor.
+
+    auto* sizeTensorDataPtr = tflite::GetTensorData<int32_t>(&tfLiteSizeTensor);
+    auto sizeTensorNumDimensions = tfLiteSizeTensor.dims->size;
+    // The size tensor is only a 1D tensor -> [new_hight, new width]
+    if (sizeTensorNumDimensions != 1)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: The Size-Input-Tensor of the Resize operation is not allowed to be a "
+                "dynamic tensor. Operator: #%d node #%d: ",
+                resizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Get number of values in the size tensor
+    auto sizeTensorNumValues = tfLiteSizeTensor.dims->data[0];
+    if (sizeTensorNumValues == 0)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: The Size-Input-Tensor of the Resize operation is not allowed to be a "
+                "dynamic tensor. Operator: #%d node #%d: ",
+                resizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    else if (sizeTensorNumValues != 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: The Size-Input-Tensor of the Resize operation requires to "
+                "have a dimension of 2 [new_hight, new width] but a tensor with a dimension of #%d was given. "
+                "Operator: #%d node #%d: ",
+                sizeTensorNumValues, resizeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    // get size tensor data
+    std::vector<int32_t> sizeTensorData(sizeTensorDataPtr, sizeTensorDataPtr+sizeTensorNumValues);
+
+    desc.m_TargetHeight = static_cast<uint32_t> (sizeTensorData[0]);
+    desc.m_TargetWidth  = static_cast<uint32_t> (sizeTensorData[1]);
+    desc.m_DataLayout   = armnn::DataLayout::NHWC;
+
+    // No network pointer indicates that only support for this operator should be checked
+    if (!delegateData.m_Network)
+    {
+        return ValidateResizeOperator(delegateData,
+                                      tfLiteContext,
+                                      inputTensorInfo,
+                                      outputTensorInfo,
+                                      desc);
+    }
+
+
+    armnn::IConnectableLayer* resizeLayer = nullptr;
+    resizeLayer = delegateData.m_Network->AddResizeLayer(desc, layerName.c_str());
+
+    armnn::IOutputSlot& outputSlot = resizeLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    ARMNN_ASSERT(resizeLayer != nullptr);
+
+    return Connect(resizeLayer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Round.hpp b/arch/arm/ARMnn/delegate/src/Round.hpp
new file mode 100644
index 0000000000..016af11e21
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Round.hpp
@@ -0,0 +1,65 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "SharedFunctions.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitFloorOperator(DelegateData& delegateData,
+                                TfLiteContext* tfLiteContext,
+                                TfLiteNode* tfLiteNode,
+                                int nodeIndex,
+                                int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    // NOTE: looks like the outputTensorInfo is the only thing that is required for the case
+    //       where we are adding the floor layer so maybe move the other stuff inside the
+    //       if !delegateData block for efficiency.
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitFloorOperator will be called again to add the layer to the network as seen further below
+    if (!delegateData.m_Network)
+    {
+        return ValidateFloorOperator(delegateData, tfLiteContext, inputTensorInfo, outputTensorInfo);
+    }
+
+    // Add a Floor layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddFloorLayer();
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Shape.hpp b/arch/arm/ARMnn/delegate/src/Shape.hpp
new file mode 100644
index 0000000000..b173299a62
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Shape.hpp
@@ -0,0 +1,86 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include <numeric>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitShapeOperator(DelegateData& delegateData,
+                               TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    auto* shapeParameters = reinterpret_cast<TfLiteShapeParams*>(tfLiteNode->builtin_data);
+    if ( shapeParameters->out_type != kTfLiteInt32 && shapeParameters->out_type != kTfLiteInt64 )
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: output_type data type is not supported in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsShapeSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitShapeOperator will be called again to add the layer to the network as seen further below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a Shape layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddShapeLayer();
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/SharedFunctions.cpp b/arch/arm/ARMnn/delegate/src/SharedFunctions.cpp
new file mode 100644
index 0000000000..79b9f9061f
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/SharedFunctions.cpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+
+#include "SharedFunctions.hpp"
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidateFloorOperator(DelegateData& delegateData,
+                                   TfLiteContext* tfLiteContext,
+                                   const armnn::TensorInfo& inputTensorInfo,
+                                   const armnn::TensorInfo& outputTensorInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsFloorSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo);
+    };
+    validateFunc(outputTensorInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+} // namespace armnnDelegate
+
diff --git a/arch/arm/ARMnn/delegate/src/SharedFunctions.hpp b/arch/arm/ARMnn/delegate/src/SharedFunctions.hpp
new file mode 100644
index 0000000000..bf6b603cf9
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/SharedFunctions.hpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidateFloorOperator(DelegateData& delegateData,
+                                   TfLiteContext* tfLiteContext,
+                                   const armnn::TensorInfo& inputTensorInfo,
+                                   const armnn::TensorInfo& outputTensorInfo);
+
+} // namespace armnnDelegate
+
diff --git a/arch/arm/ARMnn/delegate/src/Slice.hpp b/arch/arm/ARMnn/delegate/src/Slice.hpp
new file mode 100644
index 0000000000..a237034bb6
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Slice.hpp
@@ -0,0 +1,146 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitSliceOperator(DelegateData& delegateData,
+                                TfLiteContext* tfLiteContext,
+                                TfLiteNode* tfLiteNode,
+                                int nodeIndex,
+                                int32_t sliceOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 4, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    // Read inputs [input, begin, end, strides]
+    int numInputs = tfLiteNode->inputs->size;
+    std::vector<const TfLiteTensor*> tfLiteInputs;
+    tfLiteInputs.reserve(numInputs);
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    for (int i = 0; i < numInputs; i++)
+    {
+        const TfLiteTensor* inputTensor = &tfLiteTensors[tfLiteNode->inputs->data[i]];
+        tfLiteInputs.push_back(inputTensor);
+        if (!IsValid(tfLiteContext, *inputTensor, sliceOperatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+    }
+
+    // We save the begin, end and strides tensors in our descriptor. Therefore we have to read those values from inputs
+    int inputRank = tfLiteInputs[0]->dims->size;
+    auto ReadInt32Input = [&](int inputIndex, std::vector<int32_t>& outputData) ->  TfLiteStatus
+    {
+        if (tfLiteInputs[inputIndex]->type != kTfLiteInt32)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: The Begin-, End- and Stride-Tensors of the StridedSlice operation need to "
+                    "be of type int32. Operator: #%d node #%d: ",
+                    sliceOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        int rank = tfLiteInputs[inputIndex]->dims->size;
+        if (rank != 1)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: The Begin-, End- and Stride-Tensors of the StridedSlice operation need to "
+                    "be a 1D-Tensor. Operator: #%d node #%d: ",
+                    sliceOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        int numValues = tfLiteInputs[inputIndex]->dims->data[0];
+        if (numValues != inputRank)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: The number of values in the Begin-, End- and Stride-Tensors of the "
+                    "StridedSlice operation need to be equal to the rank of the Input-Tensor. Operator: #%d node #%d: ",
+                    sliceOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        // return tensor data
+        auto* tensorDataPtr = tflite::GetTensorData<int32_t>(tfLiteInputs[inputIndex]);
+        outputData.assign(tensorDataPtr, tensorDataPtr+numValues);
+        return kTfLiteOk;
+    };
+
+    std::vector<int32_t> beginData;
+    if (ReadInt32Input(1, beginData) != kTfLiteOk)
+        return kTfLiteError;
+    std::vector<int32_t> endData;
+    if (ReadInt32Input(2, endData) != kTfLiteOk)
+        return kTfLiteError;
+    std::vector<int32_t> strideData;
+    if (ReadInt32Input(3, strideData) != kTfLiteOk)
+        return kTfLiteError;
+
+    // parse built in options
+    auto* stridedSliceParams = reinterpret_cast<TfLiteStridedSliceParams*>(tfLiteNode->builtin_data);
+
+    // Write all data to the descriptor
+    armnn::StridedSliceDescriptor descriptor;
+    descriptor.m_Begin          = std::move(beginData);
+    descriptor.m_End            = std::move(endData);
+    descriptor.m_Stride         = std::move(strideData);
+    descriptor.m_BeginMask      = stridedSliceParams->begin_mask;
+    descriptor.m_EllipsisMask   = stridedSliceParams->ellipsis_mask;
+    descriptor.m_EndMask        = stridedSliceParams->end_mask;
+    descriptor.m_NewAxisMask    = stridedSliceParams->new_axis_mask;
+    descriptor.m_ShrinkAxisMask = stridedSliceParams->shrink_axis_mask;
+    descriptor.m_DataLayout     = armnn::DataLayout::NHWC;
+
+    // Validate output
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, sliceOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(*tfLiteInputs[0]);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsStridedSliceSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a StridedSlice layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddStridedSliceLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Softmax.hpp b/arch/arm/ARMnn/delegate/src/Softmax.hpp
new file mode 100644
index 0000000000..0de8e1438c
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Softmax.hpp
@@ -0,0 +1,147 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus ValidateSoftmaxOperator(DelegateData& delegateData,
+                                     TfLiteContext* tfLiteContext,
+                                     const armnn::TensorInfo& inputInfo,
+                                     const armnn::TensorInfo& outputTensorInfo,
+                                     const armnn::SoftmaxDescriptor& descriptor)
+{
+    bool isSupported = false;
+    FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                               tfLiteContext,
+                               IsSoftmaxSupported,
+                               delegateData.m_Backends,
+                               isSupported,
+                               inputInfo,
+                               outputTensorInfo,
+                               descriptor);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+
+TfLiteStatus ValidateLogSoftmaxOperator(DelegateData& delegateData,
+                                        TfLiteContext* tfLiteContext,
+                                        const armnn::TensorInfo& inputInfo,
+                                        const armnn::TensorInfo& outputTensorInfo,
+                                        const armnn::LogSoftmaxDescriptor& descriptor)
+{
+    bool isSupported = false;
+    FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                               tfLiteContext,
+                               IsLogSoftmaxSupported,
+                               delegateData.m_Backends,
+                               isSupported,
+                               inputInfo,
+                               outputTensorInfo,
+                               descriptor);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus VisitSoftmaxOperator(DelegateData& delegateData,
+                                  TfLiteContext* tfLiteContext,
+                                  TfLiteNode* tfLiteNode,
+                                  int nodeIndex,
+                                  int32_t softmaxOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in node #%d: ",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+
+    if (!delegateData.m_Network)
+    {
+        switch(softmaxOperatorCode)
+        {
+            case kTfLiteBuiltinSoftmax:
+            {
+                armnn::SoftmaxDescriptor descriptor;
+                auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(tfLiteNode->builtin_data);
+                descriptor.m_Beta = params->beta;
+                return ValidateSoftmaxOperator(delegateData,
+                                               tfLiteContext,
+                                               inputTensorInfo,
+                                               outputTensorInfo,
+                                               descriptor);
+            }
+            case kTfLiteBuiltinLogSoftmax:
+            {
+                armnn::LogSoftmaxDescriptor descriptor;
+                return ValidateLogSoftmaxOperator(delegateData,
+                                                  tfLiteContext,
+                                                  inputTensorInfo,
+                                                  outputTensorInfo,
+                                                  descriptor);
+            }
+            default:
+                return kTfLiteError;
+        }
+    }
+
+    armnn::IConnectableLayer* softmaxLayer = nullptr;
+
+    switch(softmaxOperatorCode)
+    {
+        case kTfLiteBuiltinSoftmax:
+        {
+            armnn::SoftmaxDescriptor descriptor;
+            auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(tfLiteNode->builtin_data);
+            descriptor.m_Beta = params->beta;
+            softmaxLayer = delegateData.m_Network->AddSoftmaxLayer(descriptor);
+            break;
+        }
+        case kTfLiteBuiltinLogSoftmax:
+        {
+            armnn::LogSoftmaxDescriptor descriptor;
+            softmaxLayer = delegateData.m_Network->AddLogSoftmaxLayer(descriptor);
+            break;
+        }
+        default:
+            return kTfLiteError;
+    }
+    ARMNN_ASSERT(softmaxLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = softmaxLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(softmaxLayer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/SpaceDepth.hpp b/arch/arm/ARMnn/delegate/src/SpaceDepth.hpp
new file mode 100644
index 0000000000..03859b6fcb
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/SpaceDepth.hpp
@@ -0,0 +1,134 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitSpaceToDepthOperator(DelegateData& delegateData,
+                                       TfLiteContext* tfLiteContext,
+                                       TfLiteNode* tfLiteNode,
+                                       int nodeIndex,
+                                       int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::SpaceToDepthDescriptor descriptor;
+    auto* params = reinterpret_cast<TfLiteSpaceToDepthParams*>(tfLiteNode->builtin_data);
+    descriptor.m_BlockSize = params->block_size;
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSpaceToDepthSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a SpaceToDepth layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddSpaceToDepthLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+TfLiteStatus VisitDepthToSpaceOperator(DelegateData& delegateData,
+                                       TfLiteContext* tfLiteContext,
+                                       TfLiteNode* tfLiteNode,
+                                       int nodeIndex,
+                                       int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::DepthToSpaceDescriptor descriptor;
+    auto* params = reinterpret_cast<TfLiteDepthToSpaceParams*>(tfLiteNode->builtin_data);
+    descriptor.m_BlockSize = params->block_size;
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsDepthToSpaceSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a DepthToSpace layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddDepthToSpaceLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/Split.hpp b/arch/arm/ARMnn/delegate/src/Split.hpp
new file mode 100644
index 0000000000..ad55e53ef2
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Split.hpp
@@ -0,0 +1,335 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <algorithm>
+#include <iterator>
+#include <vector>
+
+namespace armnnDelegate
+{
+
+constexpr unsigned int MaxNumOfTensorDimensions = 5U;
+
+TfLiteStatus VisitSplitOperator(DelegateData& delegateData,
+                                TfLiteContext* tfLiteContext,
+                                TfLiteNode* tfLiteNode,
+                                int nodeIndex,
+                                int32_t tfLiteSplitOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+
+    auto* splitParameters = reinterpret_cast<TfLiteSplitParams*>(tfLiteNode->builtin_data);
+    const unsigned int numSplits =  NonNegative(splitParameters->num_splits, nodeIndex);
+
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, numSplits, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteAxisTensor, tfLiteSplitOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, tfLiteSplitOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+
+    ARMNN_ASSERT(GetTensorInfoForTfLiteTensor(tfLiteAxisTensor).GetNumElements() == 1);
+    auto* axisTensorDataPtr = tflite::GetTensorData<int32_t>(&tfLiteAxisTensor);
+    std::vector<int32_t> axisTensorData(axisTensorDataPtr, axisTensorDataPtr + 1);
+    int32_t axis = axisTensorData[0];
+
+    auto inputDimensions = static_cast<int32_t>(inputTensorInfo.GetNumDimensions());
+    if (((axis < -inputDimensions) && (axis < 0)) || ((axis >= inputDimensions) && (axis > 0)))
+    {
+        // Square bracket denotes inclusive n while parenthesis denotes exclusive n
+        // E.g. Rank 4 tensor can have axis in range [-4, 3)
+        // -1 == 3, -2 == 2, -3 == 1, -4 == 0
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Operation has invalid axis: #%d. Axis must be in range [-n, n) in node #%d:",
+                axis, nodeIndex);
+    }
+    const unsigned int splitDim = ComputeWrappedIndex(axis, inputTensorInfo.GetNumDimensions());
+
+    std::vector<armnn::TensorInfo> outputs;
+    for (unsigned int i = 0; i < numSplits; ++i)
+    {
+        const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteOutputTensor, tfLiteSplitOperatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        outputs.push_back(GetTensorInfoForTfLiteTensor(tfLiteOutputTensor));
+    }
+    const std::vector<std::reference_wrapper<armnn::TensorInfo>> outputTensorInfos(outputs.begin(), outputs.end());
+
+    auto inputDimSize = inputTensorInfo.GetNumDimensions();
+    if (inputDimSize > MaxNumOfTensorDimensions)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: The number of dimensions: #%d for input tensors of the split op cannot be greater "
+            "than #%d in node #%d: ", inputDimSize, MaxNumOfTensorDimensions, nodeIndex);
+        return kTfLiteError;
+    }
+
+    std::vector<unsigned int> splitterDimSizes(inputDimSize);
+
+    // Add current input shape to splitterDimSizes
+    for (unsigned int i = 0; i < inputDimSize; ++i)
+    {
+        splitterDimSizes[i] = inputTensorInfo.GetShape()[i];
+    }
+
+    if (splitterDimSizes[splitDim] % numSplits != 0)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Number of splits #%d must evenly divide the dimension #%d in node #%d: ",
+            numSplits, splitterDimSizes[splitDim], nodeIndex);
+        return kTfLiteError;
+    }
+    splitterDimSizes[splitDim] /= numSplits;
+
+    armnn::SplitterDescriptor splitDescriptor(numSplits, inputDimSize);
+    for (unsigned int j = 0; j < numSplits; ++j)
+    {
+        // Set the size of the views.
+        for (unsigned int dimIdx = 0; dimIdx < splitterDimSizes.size(); ++dimIdx)
+        {
+            splitDescriptor.SetViewSize(j, dimIdx, splitterDimSizes[dimIdx]);
+        }
+        splitDescriptor.SetViewOriginCoord(j, splitDim, splitterDimSizes[splitDim] * j);
+    }
+
+    if (!delegateData.m_Network)
+    {
+        // Check if supported
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSplitterSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfos,
+                                   splitDescriptor);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddSplitterLayer(splitDescriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    for (unsigned int k = 0; k < layer->GetNumOutputSlots(); ++k)
+    {
+        layer->GetOutputSlot(k).SetTensorInfo(outputs[k]);
+    }
+
+    // Connect the input slots
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[1]]->Connect(layer->GetInputSlot(0));
+
+    // Prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < layer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(outputIndex);
+        delegateData.m_OutputSlotForNode[
+            static_cast<unsigned long>(tfLiteNode->outputs->data[outputIndex])] = &outputSlot;
+    }
+
+    return kTfLiteOk;
+}
+
+TfLiteStatus VisitSplitVOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t tfLiteSplitVOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, tfLiteSplitVOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteSplitsTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteSplitsTensor, tfLiteSplitVOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+    if (!IsValid(tfLiteContext, tfLiteAxisTensor, tfLiteSplitVOperatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& splitsTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteSplitsTensor);
+    ARMNN_ASSERT(splitsTensorInfo.GetNumDimensions() == 1);
+    ARMNN_ASSERT(GetTensorInfoForTfLiteTensor(tfLiteAxisTensor).GetNumElements() == 1);
+
+    auto* axisTensorDataPtr = tflite::GetTensorData<int32_t>(&tfLiteAxisTensor);
+    std::vector<int32_t> axisTensorData(axisTensorDataPtr, axisTensorDataPtr + 1);
+    int32_t axis = axisTensorData[0];
+
+    auto inputDimensions = static_cast<int32_t>(inputTensorInfo.GetNumDimensions());
+    if (((axis < -inputDimensions) && (axis < 0)) || ((axis >= inputDimensions) && (axis > 0)))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Operation has invalid axis: #%d. Axis must be in range [-n, n) in node #%d:",
+                axis, nodeIndex);
+    }
+    const unsigned int splitDim = ComputeWrappedIndex(axisTensorData[0], inputTensorInfo.GetNumDimensions());
+
+    auto* splitVParameters = reinterpret_cast<TfLiteSplitVParams*>(tfLiteNode->builtin_data);
+    unsigned int numSplits = 0;
+    if (splitVParameters)
+    {
+        numSplits = NonNegative(splitVParameters->num_splits, nodeIndex);
+    }
+    else
+    {
+        numSplits = splitsTensorInfo.GetNumElements();
+    }
+
+    if (numSplits <= 0)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Invalid number of splits %d  in node #%d",
+            numSplits, nodeIndex);
+        return kTfLiteError;
+    }
+
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, numSplits, nodeIndex));
+    std::vector<armnn::TensorInfo> outputs;
+    for (unsigned int i = 0; i < numSplits; ++i)
+    {
+        const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteOutputTensor, tfLiteSplitVOperatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        outputs.push_back(GetTensorInfoForTfLiteTensor(tfLiteOutputTensor));
+    }
+    const std::vector<std::reference_wrapper<armnn::TensorInfo>> outputTensorInfos(outputs.begin(), outputs.end());
+
+    auto inputDimSize = inputTensorInfo.GetNumDimensions();
+    if (inputDimSize > MaxNumOfTensorDimensions)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: The number of dimensions: #%d for input tensors of the split op cannot be greater "
+            "than #%d in node #%d: ", inputDimSize, MaxNumOfTensorDimensions, nodeIndex);
+        return kTfLiteError;
+    }
+
+    std::vector<int32_t> splitsTensorData(numSplits);
+    std::memcpy(splitsTensorData.data(), tfLiteSplitsTensor.data.data, splitsTensorInfo.GetNumBytes());
+
+
+    unsigned int index         = 0;
+    unsigned int inferredIndex = 0;
+    int numberOfInferred       = 0;
+    int splitSum = 0;
+
+    for (auto splitData : splitsTensorData)
+    {
+        if (splitData < 0)
+        {
+            ++numberOfInferred;
+            inferredIndex = index;
+        }
+        else
+        {
+            splitSum += splitData;
+        }
+        ++index;
+    }
+
+    // Check for inferred axis
+    if (numberOfInferred == 0)
+    {
+        if (splitSum != armnn::numeric_cast<int>(inputTensorInfo.GetShape()[splitDim]))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext, "TfLiteArmnnDelegate: SplitV split_sizes does not sum to the dimension of value along"
+                               " split_dim in node #%d", nodeIndex);
+            return kTfLiteError;
+        }
+    }
+    else if (numberOfInferred == 1)
+    {
+        splitsTensorData[inferredIndex] = armnn::numeric_cast<int>(inputTensorInfo.GetShape()[splitDim]) - splitSum;
+    }
+    else
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: SplitV cannot infer split size for more than one split in node #%d",
+            nodeIndex);
+        return kTfLiteError;
+    }
+
+    armnn::SplitterDescriptor splitDescriptor(numSplits, inputDimSize);
+    unsigned int accumSplit = 0;
+    for (unsigned int j = 0; j < numSplits; ++j)
+    {
+        unsigned int splitSize = armnn::numeric_cast<unsigned int>(splitsTensorData[j]);
+
+        // Set the size of the views.
+        for (unsigned int dimIdx = 0; dimIdx < inputTensorInfo.GetNumDimensions(); ++dimIdx)
+        {
+            unsigned int dimSize = inputTensorInfo.GetShape()[dimIdx];
+            if (dimIdx == splitDim)
+            {
+                dimSize = splitSize;
+            }
+            splitDescriptor.SetViewSize(j, dimIdx, dimSize);
+        }
+
+        splitDescriptor.SetViewOriginCoord(j, splitDim, accumSplit);
+        accumSplit += splitSize;
+    }
+
+    if (!delegateData.m_Network)
+    {
+        // Check if supported
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSplitterSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfos,
+                                   splitDescriptor);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddSplitterLayer(splitDescriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    for (unsigned int k = 0; k < layer->GetNumOutputSlots(); ++k)
+    {
+        layer->GetOutputSlot(k).SetTensorInfo(outputs[k]);
+    }
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/Transpose.hpp b/arch/arm/ARMnn/delegate/src/Transpose.hpp
new file mode 100644
index 0000000000..580e0624e3
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Transpose.hpp
@@ -0,0 +1,102 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include <tensorflow/lite/kernels/internal/tensor_ctypes.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitTransposeOperator(DelegateData& delegateData,
+                                    TfLiteContext* tfLiteContext,
+                                    TfLiteNode* tfLiteNode,
+                                    int nodeIndex,
+                                    int32_t tfliteTransposeOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor *tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor0 = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor0))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(tfLiteContext,
+                                 "TfLiteArmnnDelegate: Dynamic input tensors are not supported in "
+                                 "operator #%d node #%d: ",
+                                 tfliteTransposeOperatorCode, nodeIndex);
+
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteInputTensor1 = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (IsDynamicTensor(tfLiteInputTensor1))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(tfLiteContext,
+                                 "TfLiteArmnnDelegate: Dynamic input tensors are not supported in "
+                                 "operator #%d node #%d: ",
+                                 tfliteTransposeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(tfLiteContext,
+                                 "TfLiteArmnnDelegate: Dynamic output tensors are not supported in "
+                                 "operator #%d node #%d: ",
+                                 tfliteTransposeOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo0 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor0);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    auto* permTensorDataPtr = tflite::GetTensorData<int32_t>(&tfLiteInputTensor1);
+    unsigned int numEl = tfLiteInputTensor1.dims->data[0];
+
+    ARMNN_ASSERT( numEl <= static_cast<int>(armnn::MaxNumOfTensorDimensions));
+    ARMNN_ASSERT( tfLiteInputTensor1.dims->size == 1); // ensure only single dimension to the permutation tensor
+
+    armnn::TransposeDescriptor descriptor(armnn::PermutationVector(
+        reinterpret_cast<const armnn::PermutationVector::ValueType *> (permTensorDataPtr),
+        static_cast<armnn::PermutationVector::SizeType>(numEl)));
+
+    bool isSupported = false;
+
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsTransposeSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo0,
+                                   outputTensorInfo,
+                                   descriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* transposeLayer = delegateData.m_Network->AddTransposeLayer(descriptor);
+    ARMNN_ASSERT(transposeLayer != nullptr);
+    ARMNN_ASSERT(transposeLayer->GetNumInputSlots() == 1);     // permutation vector given to descriptor object
+
+    armnn::IOutputSlot& outputSlot = transposeLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    return Connect(transposeLayer, tfLiteNode, delegateData);
+}
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/UnidirectionalSequenceLstm.hpp b/arch/arm/ARMnn/delegate/src/UnidirectionalSequenceLstm.hpp
new file mode 100644
index 0000000000..8aff39381e
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/UnidirectionalSequenceLstm.hpp
@@ -0,0 +1,266 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DelegateUtils.hpp"
+
+#include <armnn/LstmParams.hpp>
+#include <armnn/Tensor.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitUnidirectionalSequenceLstmOperator(DelegateData& delegateData,
+                                                     TfLiteContext* tfLiteContext,
+                                                     TfLiteNode* tfLiteNode,
+                                                     int nodeIndex,
+                                                     int32_t operatorCode)
+{
+    auto numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+                2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const auto nodeParams = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams *>(tfLiteNode->builtin_data);
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    // Set the params structure for the AddUnidirectionalSequenceLstmLayer call
+    // Please refer to each operand at
+    // https://www.tensorflow.org/mlir/tfl_ops#tflunidirectional_sequence_lstm_tflunidirectionalsequencelstmop
+    armnn::LstmInputParams params;
+
+    if (!IsOptionalOperandPresent(tfLiteNode, 1))
+    {
+        params.m_InputToInputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 1);
+    }
+
+    params.m_InputToForgetWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 2);
+    params.m_InputToCellWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 3);
+    params.m_InputToOutputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 4);
+
+    // Recurrent weight tensors of size {n_cell, n_output}
+    if (!IsOptionalOperandPresent(tfLiteNode, 5))
+    {
+        params.m_RecurrentToInputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 5);
+    }
+
+    params.m_RecurrentToForgetWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 6);
+    params.m_RecurrentToCellWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 7);
+    params.m_RecurrentToOutputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 8);
+
+    // Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
+    if (!IsOptionalOperandPresent(tfLiteNode, 9))
+    {
+        params.m_CellToInputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 9);
+    }
+
+    if (!IsOptionalOperandPresent(tfLiteNode, 10))
+    {
+        params.m_CellToForgetWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 10);
+    }
+
+    if (!IsOptionalOperandPresent(tfLiteNode, 11))
+    {
+        params.m_CellToOutputWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 11);
+    }
+
+    // Gates bias tensors of size {n_cell}
+    if (!IsOptionalOperandPresent(tfLiteNode, 12))
+    {
+        params.m_InputGateBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 12);
+    }
+
+    params.m_ForgetGateBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 13);
+    params.m_CellBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 14);
+    params.m_OutputGateBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 15);
+
+    // Projection weight tensor of size {n_output, n_cell}
+    if (!IsOptionalOperandPresent(tfLiteNode, 16))
+    {
+        params.m_ProjectionWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 16);
+    }
+    // Projection bias tensor of size {n_output}
+    if (!IsOptionalOperandPresent(tfLiteNode, 17))
+    {
+        params.m_ProjectionBias = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 17);
+    }
+
+    // These state tensors are defined as variable tensors, and will be modified by this op.
+    armnn::TensorInfo outputStateInInfo = GetTensorInfoForTfLiteTensor(tfLiteTensors[tfLiteNode->inputs->data[18]]);
+    armnn::TensorInfo cellStateInInfo = GetTensorInfoForTfLiteTensor(tfLiteTensors[tfLiteNode->inputs->data[19]]);
+
+    // Layer norm coefficient tensors of size {n_cell}, representing a diagonal matrix.
+    if (tfLiteNode->inputs->size >= 21 && !IsOptionalOperandPresent(tfLiteNode, 20))
+    {
+        params.m_InputLayerNormWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 20);
+    }
+
+    if (tfLiteNode->inputs->size >= 22 && !IsOptionalOperandPresent(tfLiteNode, 21))
+    {
+        params.m_ForgetLayerNormWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 21);
+    }
+
+    if (tfLiteNode->inputs->size >= 23 && !IsOptionalOperandPresent(tfLiteNode, 22))
+    {
+        params.m_CellLayerNormWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 22);
+    }
+
+    if (tfLiteNode->inputs->size >= 24 && !IsOptionalOperandPresent(tfLiteNode, 23))
+    {
+        params.m_OutputLayerNormWeights = GetConstTensorForTfLiteTensor(tfLiteTensors, tfLiteNode, 23);
+    }
+
+    // set the layer descriptor
+    armnn::UnidirectionalSequenceLstmDescriptor desc;
+    desc.m_ActivationFunc    = NonNegative(nodeParams->activation, nodeIndex);
+    desc.m_ClippingThresCell = nodeParams->cell_clip;
+    desc.m_ClippingThresProj = nodeParams->proj_clip;
+    desc.m_CifgEnabled       = (params.m_InputToInputWeights == nullptr
+                                || params.m_RecurrentToInputWeights == nullptr
+                                || params.m_InputGateBias == nullptr);
+    desc.m_PeepholeEnabled   = (params.m_CellToForgetWeights != nullptr || params.m_CellToOutputWeights != nullptr);
+    desc.m_ProjectionEnabled = (params.m_ProjectionWeights != nullptr);
+    desc.m_LayerNormEnabled  = (params.m_InputLayerNormWeights != nullptr
+                                || params.m_ForgetLayerNormWeights != nullptr
+                                || params.m_CellLayerNormWeights != nullptr
+                                || params.m_OutputLayerNormWeights != nullptr);
+    desc.m_TimeMajor = nodeParams->time_major;
+
+    const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    unsigned int batchSize  = inputTensorInfo.GetShape()[0];
+    unsigned int outputSize = outputTensorInfo.GetShape()[2];
+    unsigned int numUnits   = cellStateInInfo.GetShape()[1];
+
+    armnn::DataType dataType = inputTensorInfo.GetDataType();
+    float qScale = inputTensorInfo.GetQuantizationScale();
+    float qOffset = inputTensorInfo.GetQuantizationOffset();
+
+    armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 3}, dataType, qScale, qOffset);
+    if (!desc.m_CifgEnabled)
+    {
+        scratchBufferTensorInfo = armnn::TensorInfo({batchSize, numUnits * 4}, dataType, qScale, qOffset);
+    }
+    armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, dataType, qScale, qOffset);
+    armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, dataType, qScale, qOffset);
+
+    armnn::LstmInputParamsInfo paramsInfo;
+    paramsInfo.m_InputToForgetWeights     = &(params.m_InputToForgetWeights->GetInfo());
+    paramsInfo.m_InputToCellWeights       = &(params.m_InputToCellWeights->GetInfo());
+    paramsInfo.m_InputToOutputWeights     = &(params.m_InputToOutputWeights->GetInfo());
+    paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo());
+    paramsInfo.m_RecurrentToCellWeights   = &(params.m_RecurrentToCellWeights->GetInfo());
+    paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo());
+    paramsInfo.m_ForgetGateBias           = &(params.m_ForgetGateBias->GetInfo());
+    paramsInfo.m_CellBias                 = &(params.m_CellBias->GetInfo());
+    paramsInfo.m_OutputGateBias           = &(params.m_OutputGateBias->GetInfo());
+
+    if (!desc.m_CifgEnabled)
+    {
+        paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo());
+        paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo());
+        if (params.m_CellToInputWeights != nullptr)
+        {
+            paramsInfo.m_CellToInputWeights = &(params.m_CellToInputWeights->GetInfo());
+        }
+        paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo());
+    }
+
+    if (desc.m_ProjectionEnabled)
+    {
+        paramsInfo.m_ProjectionWeights = &(params.m_ProjectionWeights->GetInfo());
+        if (params.m_ProjectionBias != nullptr)
+        {
+            paramsInfo.m_ProjectionBias = &(params.m_ProjectionBias->GetInfo());
+        }
+    }
+
+    if (desc.m_PeepholeEnabled)
+    {
+        paramsInfo.m_CellToForgetWeights = &(params.m_CellToForgetWeights->GetInfo());
+        paramsInfo.m_CellToOutputWeights = &(params.m_CellToOutputWeights->GetInfo());
+    }
+
+    if (desc.m_LayerNormEnabled)
+    {
+        if(!desc.m_CifgEnabled)
+        {
+            paramsInfo.m_InputLayerNormWeights = &(params.m_InputLayerNormWeights->GetInfo());
+        }
+        paramsInfo.m_ForgetLayerNormWeights = &(params.m_ForgetLayerNormWeights->GetInfo());
+        paramsInfo.m_CellLayerNormWeights = &(params.m_CellLayerNormWeights->GetInfo());
+        paramsInfo.m_OutputLayerNormWeights = &(params.m_OutputLayerNormWeights->GetInfo());
+    }
+
+    // hiddenStateOutput and cellStateOutput do not present in TfLite UnidirectionalSequenceLstm
+    armnn::Optional<armnn::TensorInfo> optionalTensor;
+
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsUnidirectionalSequenceLstmSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputStateInInfo,
+                                   cellStateInInfo,
+                                   outputInfo,
+                                   optionalTensor,
+                                   optionalTensor,
+                                   desc,
+                                   paramsInfo);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddUnidirectionalSequenceLstmLayer(desc, params);
+    ARMNN_ASSERT(layer != nullptr);
+
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    // Connect the inputs
+    // input_layer
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(layer->GetInputSlot(0));
+    // cellStateIn
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[18]]->Connect(layer->GetInputSlot(1));
+    //outputStateIn
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[19]]->Connect(layer->GetInputSlot(2));
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tfLiteNode->outputs->data[0])] = &outputSlot;
+    return kTfLiteOk;
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/Unpack.hpp b/arch/arm/ARMnn/delegate/src/Unpack.hpp
new file mode 100644
index 0000000000..4163163243
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/Unpack.hpp
@@ -0,0 +1,208 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include <numeric>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitUnpackOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    // Get Unpack Axis
+    const auto params = reinterpret_cast<TfLiteUnpackParams*>(tfLiteNode->builtin_data);
+
+    const unsigned int unpackAxis = NonNegative(params->axis, nodeIndex);
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+
+    if (unpackAxis >= inputTensorInfo.GetNumDimensions())
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: The unpack axis #%d cannot be greater than or equal to "
+            "the number of input dimensions #%d in operator #%d node #%d",
+            unpackAxis, inputTensorInfo.GetNumDimensions(), operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Get Unpack Num
+    unsigned int unpackNum = NonNegative(params->num, nodeIndex);
+
+    // If num is not defined, automatically infer from the length of the dimension axis.
+    if(unpackNum == 0)
+    {
+        unpackNum = inputTensorInfo.GetShape()[unpackAxis];
+    }
+
+    // If unpack number cannot be inferred and is still zero, return kTfLiteError.
+    if(unpackNum == 0)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Number to unpack must greater than zero in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Check outputs
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, unpackNum, nodeIndex));
+
+
+    auto inputDimSize = inputTensorInfo.GetNumDimensions();
+    std::vector<unsigned int> unpackDimSizes(inputDimSize);
+
+    // Add current input shape to unpackDimSizes
+    for (unsigned int i = 0; i < inputDimSize; ++i)
+    {
+        unpackDimSizes[i] = inputTensorInfo.GetShape()[i];
+    }
+
+    if (unpackDimSizes[unpackAxis] != unpackNum)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Number to unpack must be the same as length "
+            "of the dimension to unpack along in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    unpackDimSizes[unpackAxis] /= unpackNum;
+
+    armnn::SplitterDescriptor splitDesc(unpackNum, static_cast<unsigned int>(unpackDimSizes.size()));
+    for (unsigned int j = 0; j < unpackNum; ++j)
+    {
+        // Set the size of the views.
+        for (unsigned int dimIdx = 0; dimIdx < unpackDimSizes.size(); ++dimIdx)
+        {
+            splitDesc.SetViewSize(j, dimIdx, unpackDimSizes[dimIdx]);
+        }
+        splitDesc.SetViewOriginCoord(j, unpackAxis, unpackDimSizes[unpackAxis] * j);
+    }
+
+    std::vector<armnn::TensorInfo> outputs;
+    for (unsigned int i = 0; i < unpackNum; ++i)
+    {
+        const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        outputs.push_back(GetTensorInfoForTfLiteTensor(tfLiteOutputTensor));
+    }
+    const std::vector<std::reference_wrapper<armnn::TensorInfo>> outputTensorInfos(outputs.begin(), outputs.end());
+
+    // Determine the shape of the Splitter layer outputs for validation
+    armnn::TensorShape splitOutShape = armnn::TensorShape(static_cast<unsigned int>(unpackDimSizes.size()),
+                                                          unpackDimSizes.data());
+
+    std::vector<armnn::TensorInfo> splitterOutputs;
+    for (unsigned int outputIndex = 0; outputIndex < outputTensorInfos.size(); ++outputIndex)
+    {
+        splitterOutputs.push_back(armnn::TensorInfo(splitOutShape,
+                                                    outputTensorInfos[outputIndex].get().GetDataType(),
+                                                    outputTensorInfos[outputIndex].get().GetQuantizationScale(),
+                                                    outputTensorInfos[outputIndex].get().GetQuantizationOffset()));
+    }
+    std::vector<std::reference_wrapper<armnn::TensorInfo>> splitterOutputTensorInfos(splitterOutputs.begin(),
+                                                                                     splitterOutputs.end());
+
+    if (!delegateData.m_Network)
+    {
+        // Check if splitter is supported
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSplitterSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   splitterOutputTensorInfos,
+                                   splitDesc);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Create Reshape descriptor from the first outputTensorInfo to validate a single Reshape layer
+    // Use this descriptor later when creating every ReshapeLayer as all Reshape Layers should be the same
+    armnn::ReshapeDescriptor reshapeDescriptor;
+    reshapeDescriptor.m_TargetShape = outputTensorInfos[0].get().GetShape();
+
+    if (!delegateData.m_Network)
+    {
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsReshapeSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   splitterOutputTensorInfos[0],
+                                   outputTensorInfos[0],
+                                   reshapeDescriptor);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    };
+
+    std::string splitterLayerName("Unpack Splitter");
+
+    armnn::IConnectableLayer* splitterLayer = delegateData.m_Network->AddSplitterLayer(splitDesc,
+                                                                                       splitterLayerName.c_str());
+    ARMNN_ASSERT(splitterLayer != nullptr);
+
+    for (unsigned int k = 0; k < splitterLayer->GetNumOutputSlots(); ++k)
+    {
+        splitterLayer->GetOutputSlot(k).SetTensorInfo(outputs[k]);
+    }
+
+    // Connect the input slots
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(splitterLayer->GetInputSlot(0));
+
+    // Create reshape to remove the unpacked dimension for unpack operator of each output from Splitter.
+    for (unsigned int outputIndex = 0; outputIndex < splitterLayer->GetNumOutputSlots(); ++outputIndex)
+    {
+        std::string reshapeLayerName("Unpack Reshape");
+        armnn::IConnectableLayer* reshapeLayer = delegateData.m_Network->AddReshapeLayer(reshapeDescriptor,
+                                                                                         reshapeLayerName.c_str());
+        ARMNN_ASSERT(reshapeLayer != nullptr);
+
+        splitterLayer->GetOutputSlot(outputIndex).SetTensorInfo(splitterOutputTensorInfos[outputIndex]);
+        splitterLayer->GetOutputSlot(outputIndex).Connect(reshapeLayer->GetInputSlot(0));
+
+        armnn::TensorInfo outputTensorInfo  = outputTensorInfos[outputIndex];
+        reshapeLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+        armnn::IOutputSlot& slot = reshapeLayer->GetOutputSlot(0);
+
+        delegateData.m_OutputSlotForNode[
+            static_cast<unsigned long>(tfLiteNode->outputs->data[outputIndex])] = &slot;
+
+    }
+
+    return kTfLiteOk;
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/armnn_delegate.cpp b/arch/arm/ARMnn/delegate/src/armnn_delegate.cpp
new file mode 100644
index 0000000000..ed19b72787
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/armnn_delegate.cpp
@@ -0,0 +1,957 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn_delegate.hpp>
+
+#include "Version.hpp"
+
+#include "Activation.hpp"
+#include "ArgMinMax.hpp"
+#include "BatchSpace.hpp"
+#include "Comparison.hpp"
+#include "Convolution.hpp"
+#include "Control.hpp"
+#include "ElementwiseBinary.hpp"
+#include "ElementwiseUnary.hpp"
+#include "Fill.hpp"
+#include "FullyConnected.hpp"
+#include "Gather.hpp"
+#include "LogicalBinary.hpp"
+#include "Lstm.hpp"
+#include "Normalization.hpp"
+#include "Pack.hpp"
+#include "Pad.hpp"
+#include "Pooling.hpp"
+#include "Prelu.hpp"
+#include "Quantization.hpp"
+#include "Redefine.hpp"
+#include "Reduce.hpp"
+#include "Resize.hpp"
+#include "Round.hpp"
+#include "Shape.hpp"
+#include "Slice.hpp"
+#include "Softmax.hpp"
+#include "SpaceDepth.hpp"
+#include "Split.hpp"
+#include "Transpose.hpp"
+#include "UnidirectionalSequenceLstm.hpp"
+#include "Unpack.hpp"
+
+#include <armnnUtils/Filesystem.hpp>
+#include <armnn/utility/Timer.hpp>
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/context_util.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+
+namespace armnnDelegate
+{
+
+DelegateOptions TfLiteArmnnDelegateOptionsDefault()
+{
+    DelegateOptions options(armnn::Compute::CpuRef);
+    return options;
+}
+
+TfLiteDelegate* TfLiteArmnnDelegateCreate(armnnDelegate::DelegateOptions options)
+{
+    auto* armnnDelegate = new ::armnnDelegate::Delegate(options);
+    return armnnDelegate->GetDelegate();
+}
+
+void TfLiteArmnnDelegateDelete(TfLiteDelegate* tfLiteDelegate)
+{
+    if (tfLiteDelegate != nullptr)
+    {
+        delete static_cast<::armnnDelegate::Delegate*>(tfLiteDelegate->data_);
+    }
+}
+
+TfLiteStatus DoPrepare(TfLiteContext* tfLiteContext, TfLiteDelegate* tfLiteDelegate)
+{
+    TfLiteIntArray* supportedOperators =
+        static_cast<::armnnDelegate::Delegate*>(tfLiteDelegate->data_)->IdentifyOperatorsToDelegate(tfLiteContext);
+
+    // ArmNN Delegate Registration
+    static const TfLiteRegistration kArmnnSubgraphRegistration = {
+        // ArmnnSubgraph Init
+        .init = [](TfLiteContext* tfLiteContext, const char* buffer, size_t length) -> void* {
+            armnn::IgnoreUnused(length);
+            const TfLiteDelegateParams* parameters = reinterpret_cast<const TfLiteDelegateParams*>(buffer);
+
+            return static_cast<void*>(ArmnnSubgraph::Create(
+                tfLiteContext, parameters, static_cast<::armnnDelegate::Delegate*>(parameters->delegate->data_)));
+        },
+        // ArmnnSubgraph Free
+        .free = [](TfLiteContext* tfLiteContext, void* buffer) -> void {
+            armnn::IgnoreUnused(tfLiteContext);
+            if (buffer != nullptr)
+            {
+                delete static_cast<ArmnnSubgraph*>(buffer);
+            }
+        },
+        // ArmnnSubgraph Prepare
+        .prepare = [](TfLiteContext* tfLiteContext, TfLiteNode* tfLiteNode) -> TfLiteStatus {
+            if (tfLiteNode->user_data == nullptr)
+            {
+                return kTfLiteError;
+            }
+            return static_cast<ArmnnSubgraph*>(tfLiteNode->user_data)->Prepare(tfLiteContext);
+        },
+        // ArmnnSubgraph Invoke
+        .invoke = [](TfLiteContext* tfLiteContext, TfLiteNode* tfLiteNode) -> TfLiteStatus {
+            if (tfLiteNode->user_data == nullptr)
+            {
+                return kTfLiteError;
+            }
+
+            return static_cast<ArmnnSubgraph*>(tfLiteNode->user_data)->Invoke(tfLiteContext, tfLiteNode);
+        },
+
+        .profiling_string = nullptr,
+        .builtin_code = kTfLiteBuiltinDelegate,
+        .custom_name = "TfLiteArmNnDelegate",
+        .version = 1,
+    };
+
+    const TfLiteStatus status =
+        tfLiteContext->ReplaceNodeSubsetsWithDelegateKernels(
+            tfLiteContext, kArmnnSubgraphRegistration, supportedOperators, tfLiteDelegate);
+
+    TfLiteIntArrayFree(supportedOperators);
+    return status;
+
+}
+
+Delegate::Delegate(armnnDelegate::DelegateOptions options)
+  : m_Runtime(nullptr, nullptr),
+    m_Options(std::move(options))
+{
+    // Configures logging for ARMNN
+    if (options.IsLoggingEnabled())
+    {
+        armnn::ConfigureLogging(true, true, options.GetLoggingSeverity());
+    }
+
+    // Create ArmNN Runtime
+    m_Runtime = armnn::IRuntime::Create(options.GetRuntimeOptions());
+
+    std::vector<armnn::BackendId> backends;
+    if (m_Runtime)
+    {
+        const armnn::BackendIdSet supportedDevices = m_Runtime->GetDeviceSpec().GetSupportedBackends();
+        for (auto& backend : m_Options.GetBackends())
+        {
+            if (std::find(supportedDevices.cbegin(), supportedDevices.cend(), backend) == supportedDevices.cend())
+            {
+                TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO,
+                    "TfLiteArmnnDelegate: Requested unknown backend %s", backend.Get().c_str());
+            }
+            else
+            {
+                backends.push_back(backend);
+            }
+        }
+    }
+
+    if (backends.empty())
+    {
+        // No known backend specified
+        throw armnn::InvalidArgumentException("TfLiteArmnnDelegate: No known backend specified.");
+    }
+    m_Options.SetBackends(backends);
+
+    TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "TfLiteArmnnDelegate: Created TfLite ArmNN delegate.");
+}
+
+TfLiteIntArray* Delegate::IdentifyOperatorsToDelegate(TfLiteContext* tfLiteContext)
+{
+    TfLiteIntArray* executionPlan = nullptr;
+    if (tfLiteContext->GetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
+    {
+        TF_LITE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnDelegate: Unable to get graph execution plan.");
+        return nullptr;
+    }
+
+    // Delegate data with null network
+    DelegateData delegateData(m_Options.GetBackends());
+
+    TfLiteIntArray* nodesToDelegate = TfLiteIntArrayCreate(executionPlan->size);
+    nodesToDelegate->size = 0;
+    for (int i = 0; i < executionPlan->size; ++i)
+    {
+        const int nodeIndex = executionPlan->data[i];
+
+        // If TfLite nodes can be delegated to ArmNN
+        TfLiteNode* tfLiteNode = nullptr;
+        TfLiteRegistration* tfLiteRegistration = nullptr;
+        if (tfLiteContext->GetNodeAndRegistration(
+            tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk)
+        {
+            TF_LITE_KERNEL_LOG(tfLiteContext,
+                               "TfLiteArmnnDelegate: Unable to get node and registration for node %d.",
+                               nodeIndex);
+            continue;
+        }
+
+        if (ArmnnSubgraph::VisitNode(
+                   delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex) != kTfLiteOk)
+        {
+            // node is not supported by ArmNN
+            continue;
+        }
+
+        nodesToDelegate->data[nodesToDelegate->size++] = nodeIndex;
+    }
+
+    std::sort(&nodesToDelegate->data[0], &nodesToDelegate->data[nodesToDelegate->size]);
+    return nodesToDelegate;
+}
+
+TfLiteDelegate* Delegate::GetDelegate()
+{
+    return &m_Delegate;
+}
+
+const std::string Delegate::GetVersion()
+{
+    return DELEGATE_VERSION;
+}
+
+TfLiteStatus ArmnnSubgraph::AddInputLayer(DelegateData& delegateData,
+                                          TfLiteContext* tfLiteContext,
+                                          const TfLiteIntArray* inputs,
+                                          std::vector<armnn::BindingPointInfo>& inputBindings)
+{
+    const size_t numInputs = static_cast<size_t>(inputs->size);
+    for (unsigned int i = 0; i < numInputs; ++i)
+    {
+        const int32_t tensorId = inputs->data[i];
+        const TfLiteTensor tensor = tfLiteContext->tensors[tensorId];
+        // Do not create bindings for constant inputs
+        if (tensor.allocation_type == kTfLiteMmapRo)
+        {
+            continue;
+        }
+
+        auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
+        armnn::IConnectableLayer* layer = delegateData.m_Network->AddInputLayer(bindingId);
+
+        auto tensorInfo = GetTensorInfoForTfLiteTensor(tensor);
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+        outputSlot.SetTensorInfo(tensorInfo);
+
+        // Store for creating connections
+        delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)] = &outputSlot;
+
+        inputBindings.push_back(std::make_pair(bindingId, tensorInfo));
+    }
+
+    return kTfLiteOk;
+}
+
+TfLiteStatus ArmnnSubgraph::AddOutputLayer(DelegateData& delegateData,
+                                           TfLiteContext* tfLiteContext,
+                                           const TfLiteIntArray* outputs,
+                                           std::vector<armnn::BindingPointInfo>& outputBindings)
+{
+    const size_t numOutputs = static_cast<size_t>(outputs->size);
+    for (unsigned int i = 0; i < numOutputs; ++i)
+    {
+        const int32_t tensorId = outputs->data[i];
+        const TfLiteTensor tensor = tfLiteContext->tensors[tensorId];
+
+        auto bindingId = static_cast<armnn::LayerBindingId>((tensorId));
+        armnn::IConnectableLayer* layer = delegateData.m_Network->AddOutputLayer(bindingId);
+
+        auto tensorInfo = GetTensorInfoForTfLiteTensor(tensor);
+        ARMNN_ASSERT(delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)] != nullptr);
+        delegateData.m_OutputSlotForNode[static_cast<unsigned long>(tensorId)]->Connect(layer->GetInputSlot(0));
+        outputBindings.push_back(std::make_pair(bindingId, tensorInfo));
+    }
+
+    return kTfLiteOk;
+}
+
+ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
+                                     const TfLiteDelegateParams* parameters,
+                                     const Delegate* delegate)
+{
+    const auto startTime = armnn::GetTimeNow();
+    ARMNN_LOG(info) << "ArmnnSubgraph creation";
+
+    TfLiteIntArray* executionPlan;
+    if (tfLiteContext->GetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
+    {
+        return nullptr;
+    }
+
+    // Initialize DelegateData holds network and output slots information
+    DelegateData delegateData(delegate->m_Options.GetBackends());
+
+    // Build ArmNN Network
+    armnn::NetworkOptions networkOptions = {};
+    armnn::NetworkId networkId;
+    delegateData.m_Network = armnn::INetwork::Create(networkOptions);
+
+    delegateData.m_OutputSlotForNode = std::vector<armnn::IOutputSlot*>(tfLiteContext->tensors_size, nullptr);
+
+    std::vector<armnn::BindingPointInfo> inputBindings;
+    std::vector<armnn::BindingPointInfo> outputBindings;
+
+    // Add input layer
+    auto status = AddInputLayer(delegateData, tfLiteContext, parameters->input_tensors, inputBindings);
+    if (status != kTfLiteOk)
+    {
+        throw armnn::Exception("TfLiteArmnnDelegate: Unable to add Inputs to the network!");
+    }
+
+    // Parse TfLite delegate nodes to ArmNN
+    const auto parseStartTime = armnn::GetTimeNow();
+    for (int i = 0; i < parameters->nodes_to_replace->size; ++i)
+    {
+        const int nodeIndex = parameters->nodes_to_replace->data[i];
+
+        TfLiteNode* tfLiteNode = nullptr;
+        TfLiteRegistration* tfLiteRegistration = nullptr;
+        if (tfLiteContext->GetNodeAndRegistration(
+            tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk)
+        {
+            throw armnn::Exception(&"TfLiteArmnnDelegate: Unable to get node registration: " [ nodeIndex]);
+        }
+
+        if (VisitNode(delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex) != kTfLiteOk)
+        {
+            throw armnn::Exception(&"TfLiteArmnnDelegate: Unable to parse node: " [ nodeIndex]);
+        }
+    }
+    ARMNN_LOG(info) << "Parse nodes to ArmNN time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(parseStartTime).count() << " ms";
+
+    // Add Output layer
+    status = AddOutputLayer(delegateData, tfLiteContext, parameters->output_tensors, outputBindings);
+    if (status != kTfLiteOk)
+    {
+        throw armnn::Exception("TfLiteArmnnDelegate: Unable to add Outputs to the network!");
+    }
+
+    // Optimize ArmNN network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    try
+    {
+        const auto optimizeStartTime = armnn::GetTimeNow();
+        optNet = armnn::Optimize(*(delegateData.m_Network.get()),
+                                 delegate->m_Options.GetBackends(),
+                                 delegate->m_Runtime->GetDeviceSpec(),
+                                 delegate->m_Options.GetOptimizerOptions());
+        ARMNN_LOG(info) << "Optimize ArmnnSubgraph time: " << std::setprecision(2)
+                        << std::fixed << armnn::GetTimeDuration(optimizeStartTime).count() << " ms";
+    }
+    catch (std::exception &ex)
+    {
+        std::stringstream exMessage;
+        exMessage << "TfLiteArmnnDelegate: Exception (" << ex.what() << ") caught from optimize.";
+        throw armnn::Exception(exMessage.str());
+    }
+    if (!optNet)
+    {
+        // Optimize failed
+        throw armnn::Exception("TfLiteArmnnDelegate: Unable to optimize the network!");
+    }
+
+    // If set, we will serialize the optimized model into a dot file.
+    const std::string serializeToDotFile = delegate->m_Options.GetSerializeToDot();
+    if (!serializeToDotFile.empty())
+    {
+        ARMNN_LOG(info) << "Writing graph to dot file: " << serializeToDotFile;
+        fs::path filename = serializeToDotFile;
+        std::fstream file(filename.c_str(), std::ios_base::out);
+        optNet->SerializeToDot(file);
+    }
+
+    try
+    {
+        const auto loadStartTime = armnn::GetTimeNow();
+
+        // Load graph into runtime
+        std::string errorMessage;
+        armnn::Status loadingStatus;
+        armnn::MemorySource memorySource = armnn::MemorySource::Undefined;
+        if (delegate->m_Options.GetOptimizerOptions().m_ImportEnabled)
+        {
+            memorySource = armnn::MemorySource::Malloc;
+        }
+        armnn::INetworkProperties networkProperties(false,
+                                                    memorySource,
+                                                    memorySource,
+                                                    delegate->m_Options.GetInternalProfilingState(),
+                                                    delegate->m_Options.GetInternalProfilingDetail());
+        loadingStatus = delegate->m_Runtime->LoadNetwork(networkId,
+                                                         std::move(optNet),
+                                                         errorMessage,
+                                                         networkProperties);
+        if (loadingStatus != armnn::Status::Success)
+        {
+            // Network load failed.
+            throw armnn::Exception("TfLiteArmnnDelegate: Network could not be loaded:" + errorMessage);
+        }
+
+        ARMNN_LOG(info) << "Load ArmnnSubgraph time: " << std::setprecision(2)
+                        << std::fixed << armnn::GetTimeDuration(loadStartTime).count() << " ms";
+    }
+    catch (std::exception& ex)
+    {
+        std::stringstream exMessage;
+        exMessage << "TfLiteArmnnDelegate: Exception (" << ex.what() << ") caught from LoadNetwork.";
+        throw armnn::Exception(exMessage.str());
+    }
+
+    // Register debug callback function
+    if (delegate->m_Options.GetDebugCallbackFunction().has_value())
+    {
+        delegate->m_Runtime->RegisterDebugCallback(networkId, delegate->m_Options.GetDebugCallbackFunction().value());
+    }
+
+    ARMNN_LOG(info) << "Overall ArmnnSubgraph creation time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n";
+
+    // Create a new SubGraph with networkId and runtime
+    return new ArmnnSubgraph(networkId, delegate->m_Runtime.get(), inputBindings, outputBindings);
+}
+
+TfLiteStatus ArmnnSubgraph::Prepare(TfLiteContext* tfLiteContext)
+{
+    armnn::IgnoreUnused(tfLiteContext);
+    return kTfLiteOk;
+}
+
+TfLiteStatus ArmnnSubgraph::Invoke(TfLiteContext* tfLiteContext, TfLiteNode* tfLiteNode)
+{
+    // Prepare inputs
+    armnn::InputTensors inputTensors;
+    size_t inputIndex = 0;
+    for (auto inputIdx : tflite::TfLiteIntArrayView(tfLiteNode->inputs))
+    {
+        TfLiteTensor* tensor = &tfLiteContext->tensors[inputIdx];
+        if (tensor->allocation_type != kTfLiteMmapRo)
+        {
+            const armnn::BindingPointInfo& inputBinding = m_InputBindings[inputIndex];
+            armnn::TensorInfo inputTensorInfo = inputBinding.second;
+            inputTensorInfo.SetConstant(true);
+            const armnn::ConstTensor inputTensor(inputTensorInfo, tensor->data.data);
+            inputTensors.emplace_back(inputIdx, inputTensor);
+
+            ++inputIndex;
+        }
+    }
+
+    // Prepare outputs
+    armnn::OutputTensors outputTensors;
+    size_t outputIndex = 0;
+    for (auto outputIdx : tflite::TfLiteIntArrayView(tfLiteNode->outputs))
+    {
+        const armnn::BindingPointInfo& outputBinding = m_OutputBindings[outputIndex];
+        TfLiteTensor* tensor = &tfLiteContext->tensors[outputIdx];
+        const armnn::Tensor outputTensor(outputBinding.second, tensor->data.data);
+        outputTensors.emplace_back(outputIdx, outputTensor);
+
+        ++outputIndex;
+    }
+
+    // Run graph
+    auto status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+    // The delegate holds its own Arm NN runtime so this is our last chance to print internal profiling data.
+    std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+    if (profiler && profiler->IsProfilingEnabled())
+    {
+        profiler->Print(std::cout);
+    }
+    return (status == armnn::Status::Success) ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData,
+                                      TfLiteContext* tfLiteContext,
+                                      TfLiteRegistration* tfLiteRegistration,
+                                      TfLiteNode* tfLiteNode,
+                                      int nodeIndex)
+{
+    switch (tfLiteRegistration->builtin_code)
+    {
+        case kTfLiteBuiltinAbs:
+            return VisitElementwiseUnaryOperator(delegateData,
+                                                 tfLiteContext,
+                                                 tfLiteNode,
+                                                 nodeIndex,
+                                                 armnn::UnaryOperation::Abs);
+        case kTfLiteBuiltinAdd:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinAdd);
+        case kTfLiteBuiltinArgMax:
+            return VisitArgMinMaxOperator(delegateData,
+                                          tfLiteContext,
+                                          tfLiteNode,
+                                          nodeIndex,
+                                          kTfLiteBuiltinArgMax);
+        case kTfLiteBuiltinArgMin:
+            return VisitArgMinMaxOperator(delegateData,
+                                          tfLiteContext,
+                                          tfLiteNode,
+                                          nodeIndex,
+                                          kTfLiteBuiltinArgMin);
+        case kTfLiteBuiltinAveragePool2d:
+            return VisitPoolingOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinAveragePool2d);
+        case kTfLiteBuiltinBatchToSpaceNd:
+            return VisitBatchToSpaceNdOperator(delegateData,
+                                               tfLiteContext,
+                                               tfLiteNode,
+                                               nodeIndex,
+                                               kTfLiteBuiltinBatchToSpaceNd);
+        case kTfLiteBuiltinCast:
+            return VisitCastOperator(delegateData,
+                                     tfLiteContext,
+                                     tfLiteNode,
+                                     nodeIndex,
+                                     kTfLiteBuiltinCast);
+        case kTfLiteBuiltinConcatenation:
+            return VisitControlOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinConcatenation);
+        case kTfLiteBuiltinConv2d:
+            return VisitConvolutionOperator(delegateData,
+                                            tfLiteContext,
+                                            tfLiteNode,
+                                            nodeIndex,
+                                            kTfLiteBuiltinConv2d);
+// Conv3d is only correctly supported for external delegates from TF Lite v2.6, as there was a breaking bug in v2.5.
+#if defined(ARMNN_POST_TFLITE_2_5)
+        case kTfLiteBuiltinConv3d:
+            return VisitConvolutionOperator(delegateData,
+                                            tfLiteContext,
+                                            tfLiteNode,
+                                            nodeIndex,
+                                            kTfLiteBuiltinConv3d);
+#endif
+        case kTfLiteBuiltinDepthToSpace:
+            return VisitDepthToSpaceOperator(delegateData,
+                                             tfLiteContext,
+                                             tfLiteNode,
+                                             nodeIndex,
+                                             kTfLiteBuiltinDepthToSpace);
+        case kTfLiteBuiltinDepthwiseConv2d:
+            return VisitConvolutionOperator(delegateData,
+                                            tfLiteContext,
+                                            tfLiteNode,
+                                            nodeIndex,
+                                            kTfLiteBuiltinDepthwiseConv2d);
+        case kTfLiteBuiltinDequantize:
+            return VisitDequantizeOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinDequantize);
+        case kTfLiteBuiltinDiv:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinDiv);
+        case kTfLiteBuiltinElu:
+            return VisitActivationOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinElu);
+        case kTfLiteBuiltinEqual:
+            return VisitComparisonOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinEqual);
+        case kTfLiteBuiltinExp:
+            return VisitElementwiseUnaryOperator(delegateData,
+                                                 tfLiteContext,
+                                                 tfLiteNode,
+                                                 nodeIndex,
+                                                 armnn::UnaryOperation::Exp);
+        case kTfLiteBuiltinExpandDims:
+            return VisitExpandDimsOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinExpandDims);
+        case kTfLiteBuiltinFill:
+            return VisitFillOperator(delegateData,
+                                     tfLiteContext,
+                                     tfLiteNode,
+                                     nodeIndex,
+                                     kTfLiteBuiltinFill);
+        case kTfLiteBuiltinFloor:
+            return VisitFloorOperator(delegateData,
+                                      tfLiteContext,
+                                      tfLiteNode,
+                                      nodeIndex,
+                                      kTfLiteBuiltinFloor);
+        case kTfLiteBuiltinFloorDiv:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinFloorDiv);
+        case kTfLiteBuiltinFullyConnected:
+            return VisitFullyConnectedOperator(delegateData,
+                                               tfLiteContext,
+                                               tfLiteNode,
+                                               nodeIndex,
+                                               kTfLiteBuiltinFullyConnected);
+        case kTfLiteBuiltinGather:
+            return VisitGatherOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinGather);
+        case kTfLiteBuiltinGreater:
+            return VisitComparisonOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinGreater);
+        case kTfLiteBuiltinGreaterEqual:
+            return VisitComparisonOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinGreaterEqual);
+        case kTfLiteBuiltinHardSwish:
+            return VisitActivationOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinHardSwish);
+        case kTfLiteBuiltinL2Normalization:
+            return VisitL2NormalizationOperator(delegateData,
+                                                tfLiteContext,
+                                                tfLiteNode,
+                                                nodeIndex,
+                                                kTfLiteBuiltinL2Normalization);
+        case kTfLiteBuiltinL2Pool2d:
+            return VisitPoolingOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinL2Pool2d);
+        case kTfLiteBuiltinLess:
+            return VisitComparisonOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinLess);
+        case kTfLiteBuiltinLessEqual:
+            return VisitComparisonOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinLessEqual);
+        case kTfLiteBuiltinLocalResponseNormalization:
+            return VisitLocalResponseNormalizationOperator(delegateData,
+                                                           tfLiteContext,
+                                                           tfLiteNode,
+                                                           nodeIndex,
+                                                           kTfLiteBuiltinLocalResponseNormalization);
+        case kTfLiteBuiltinLogicalAnd:
+            return VisitLogicalBinaryOperator(delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode,
+                                              nodeIndex,
+                                              kTfLiteBuiltinLogicalAnd,
+                                              armnn::LogicalBinaryOperation::LogicalAnd);
+        case kTfLiteBuiltinLogicalNot:
+            return VisitElementwiseUnaryOperator(delegateData,
+                                                 tfLiteContext,
+                                                 tfLiteNode,
+                                                 nodeIndex,
+                                                 armnn::UnaryOperation::LogicalNot);
+        case kTfLiteBuiltinLogicalOr:
+            return VisitLogicalBinaryOperator(delegateData,
+                                              tfLiteContext,
+                                              tfLiteNode,
+                                              nodeIndex,
+                                              kTfLiteBuiltinLogicalOr,
+                                              armnn::LogicalBinaryOperation::LogicalOr);
+        case kTfLiteBuiltinLogistic:
+            return VisitActivationOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinLogistic);
+        case kTfLiteBuiltinLogSoftmax:
+            return VisitSoftmaxOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinLogSoftmax);
+        case kTfLiteBuiltinLstm:
+            return VisitLstmOperator(delegateData,
+                                     tfLiteContext,
+                                     tfLiteNode,
+                                     nodeIndex,
+                                     kTfLiteBuiltinLstm);
+        case kTfLiteBuiltinMaxPool2d:
+            return VisitPoolingOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinMaxPool2d);
+        case kTfLiteBuiltinMaximum:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinMaximum);
+        case kTfLiteBuiltinMean:
+            return VisitControlOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinMean);
+        case kTfLiteBuiltinMinimum:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinMinimum);
+        case kTfLiteBuiltinMirrorPad:
+            return VisitPadOperator(delegateData,
+                                    tfLiteContext,
+                                    tfLiteNode,
+                                    nodeIndex,
+                                    kTfLiteBuiltinMirrorPad);
+        case kTfLiteBuiltinMul:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinMul);
+        case kTfLiteBuiltinNeg:
+            return VisitElementwiseUnaryOperator(delegateData,
+                                                 tfLiteContext,
+                                                 tfLiteNode,
+                                                 nodeIndex,
+                                                 armnn::UnaryOperation::Neg);
+        case kTfLiteBuiltinNotEqual:
+            return VisitComparisonOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinNotEqual);
+        case kTfLiteBuiltinPack:
+            return VisitPackOperator(delegateData,
+                                     tfLiteContext,
+                                     tfLiteNode,
+                                     nodeIndex,
+                                     kTfLiteBuiltinPack);
+        case kTfLiteBuiltinPad:
+            return VisitPadOperator(delegateData,
+                                    tfLiteContext,
+                                    tfLiteNode,
+                                    nodeIndex,
+                                    kTfLiteBuiltinPad);
+        case kTfLiteBuiltinPadv2:
+            return VisitPadOperator(delegateData,
+                                    tfLiteContext,
+                                    tfLiteNode,
+                                    nodeIndex,
+                                    kTfLiteBuiltinPadv2);
+        case kTfLiteBuiltinPrelu:
+            return VisitPreluOperator(delegateData,
+                                      tfLiteContext,
+                                      tfLiteNode,
+                                      nodeIndex,
+                                      kTfLiteBuiltinPrelu);
+        case kTfLiteBuiltinQuantize:
+            return VisitQuantizeOperator(delegateData,
+                                         tfLiteContext,
+                                         tfLiteNode,
+                                         nodeIndex,
+                                         kTfLiteBuiltinQuantize);
+        case kTfLiteBuiltinRank:
+            return VisitControlOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinRank);
+        case kTfLiteBuiltinReduceMax:
+            return VisitReduceOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinReduceMax);
+        case kTfLiteBuiltinReduceMin:
+            return VisitReduceOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinReduceMin);
+        case kTfLiteBuiltinReduceProd:
+            return VisitReduceOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinReduceProd);
+        case kTfLiteBuiltinRelu:
+            return VisitActivationOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinRelu);
+        case kTfLiteBuiltinReluN1To1:
+            return VisitActivationOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinReluN1To1);
+        case kTfLiteBuiltinRelu6:
+            return VisitActivationOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinRelu6);
+        case kTfLiteBuiltinReshape:
+            return VisitReshapeOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinReshape);
+        case kTfLiteBuiltinResizeBilinear:
+            return VisitResizeOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinResizeBilinear);
+        case kTfLiteBuiltinResizeNearestNeighbor:
+            return VisitResizeOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinResizeNearestNeighbor);
+        case kTfLiteBuiltinRsqrt:
+            return VisitElementwiseUnaryOperator(delegateData,
+                                                 tfLiteContext,
+                                                 tfLiteNode,
+                                                 nodeIndex,
+                                                 armnn::UnaryOperation::Rsqrt);
+        case kTfLiteBuiltinShape:
+            return VisitShapeOperator(delegateData,
+                                      tfLiteContext,
+                                      tfLiteNode,
+                                      nodeIndex,
+                                      kTfLiteBuiltinShape);
+        case kTfLiteBuiltinSplit:
+            return VisitSplitOperator(delegateData,
+                                      tfLiteContext,
+                                      tfLiteNode,
+                                      nodeIndex,
+                                      kTfLiteBuiltinSplit);
+        case kTfLiteBuiltinSplitV:
+            return VisitSplitVOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinSplitV);
+        case kTfLiteBuiltinSqrt:
+            return VisitElementwiseUnaryOperator(delegateData,
+                                                 tfLiteContext,
+                                                 tfLiteNode,
+                                                 nodeIndex,
+                                                 armnn::UnaryOperation::Sqrt);
+        case kTfLiteBuiltinSqueeze:
+            return VisitSqueezeOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinSqueeze);
+        case kTfLiteBuiltinStridedSlice:
+            return VisitSliceOperator(delegateData,
+                                      tfLiteContext,
+                                      tfLiteNode,
+                                      nodeIndex,
+                                      kTfLiteBuiltinStridedSlice);
+        case kTfLiteBuiltinSum:
+            return VisitReduceOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinSum);
+        case kTfLiteBuiltinTranspose:
+            return VisitTransposeOperator(delegateData,
+                                          tfLiteContext,
+                                          tfLiteNode,
+                                          nodeIndex,
+                                          kTfLiteBuiltinTranspose);
+        case kTfLiteBuiltinTransposeConv:
+            return VisitConvolutionOperator(delegateData,
+                                            tfLiteContext,
+                                            tfLiteNode,
+                                            nodeIndex,
+                                            kTfLiteBuiltinTransposeConv);
+        case kTfLiteBuiltinSoftmax:
+            return VisitSoftmaxOperator(delegateData,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        nodeIndex,
+                                        kTfLiteBuiltinSoftmax);
+        case kTfLiteBuiltinSpaceToBatchNd:
+            return VisitSpaceToBatchNdOperator(delegateData,
+                                               tfLiteContext,
+                                               tfLiteNode,
+                                               nodeIndex,
+                                               kTfLiteBuiltinSpaceToBatchNd);
+        case kTfLiteBuiltinSpaceToDepth:
+            return VisitSpaceToDepthOperator(delegateData,
+                                             tfLiteContext,
+                                             tfLiteNode,
+                                             nodeIndex,
+                                             kTfLiteBuiltinSpaceToDepth);
+        case kTfLiteBuiltinSub:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinSub);
+        case kTfLiteBuiltinTanh:
+            return VisitActivationOperator(delegateData,
+                                           tfLiteContext,
+                                           tfLiteNode,
+                                           nodeIndex,
+                                           kTfLiteBuiltinTanh);
+        case kTfLiteBuiltinUnidirectionalSequenceLstm:
+            return VisitUnidirectionalSequenceLstmOperator(delegateData,
+                                                           tfLiteContext,
+                                                           tfLiteNode,
+                                                           nodeIndex,
+                                                           kTfLiteBuiltinUnidirectionalSequenceLstm);
+        case kTfLiteBuiltinUnpack:
+            return VisitUnpackOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinUnpack);
+        default:
+            return kTfLiteError;
+    }
+}
+
+} // armnnDelegate namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/armnn_external_delegate.cpp b/arch/arm/ARMnn/delegate/src/armnn_external_delegate.cpp
new file mode 100644
index 0000000000..c3875740e1
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/armnn_external_delegate.cpp
@@ -0,0 +1,68 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "armnn_delegate.hpp"
+#include <armnn/Logging.hpp>
+#include <armnn/utility/NumericCast.hpp>
+
+#include <iostream>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace tflite
+{
+
+/**
+ * This file defines two symbols that need to be exported to use the TFLite external delegate provider. This is a plugin
+ * that can be used for fast integration of delegates into benchmark tests and other tools. It allows loading of
+ * a dynamic delegate library at runtime.
+ *
+ * The external delegate also has Tensorflow Lite Python bindings. Therefore the dynamic external delegate
+ * can be directly used with Tensorflow Lite Python APIs.
+ *
+ * See tensorflow/lite/delegates/external for details or visit the tensorflow guide
+ * [here](https://www.tensorflow.org/lite/performance/implementing_delegate#option_2_leverage_external_delegate)
+ */
+
+extern "C"
+{
+
+/**
+  * Implementation of the TfLite external delegate plugin
+  *
+  * For details about what options_keys and option_values are supported please see:
+  * armnnDelegate::DelegateOptions::DelegateOptions(char const* const*, char const* const*,size_t,void (*)(const char*))
+  */
+TfLiteDelegate* tflite_plugin_create_delegate(char** options_keys,
+                                              char** options_values,
+                                              size_t num_options,
+                                              void (*report_error)(const char*))
+{
+    // Returning null indicates an error during delegate creation, we initialize with that
+    TfLiteDelegate* delegate = nullptr;
+    try
+    {
+        armnnDelegate::DelegateOptions options (options_keys, options_values, num_options, (*report_error));
+        delegate = TfLiteArmnnDelegateCreate(options);
+    }
+    catch (const std::exception& ex)
+    {
+        if(report_error)
+        {
+            report_error(ex.what());
+        }
+    }
+    return delegate;
+}
+
+/** Destroy a given delegate plugin
+ *
+ * @param[in] delegate Delegate to destruct
+ */
+void tflite_plugin_destroy_delegate(TfLiteDelegate* delegate)
+{
+    armnnDelegate::TfLiteArmnnDelegateDelete(delegate);
+}
+
+}  // extern "C"
+}  // namespace tflite
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ActivationTest.cpp b/arch/arm/ARMnn/delegate/src/test/ActivationTest.cpp
new file mode 100644
index 0000000000..69041d77a2
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ActivationTest.cpp
@@ -0,0 +1,299 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ActivationTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+
+void ActivationReLuTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<float> inputData = {
+            -0.1f, -0.2f, -0.3f, -0.4f,
+            0.1f,  0.2f,  0.3f,  0.4f,
+            -1.0f, -2.0f, -3.0f, -4.0f,
+            1.0f,  2.0f,  3.0f,  4.0f
+    };
+
+    // Calculate output values for input.
+    auto f = [](float value)
+    {
+        return std::fmax(0.0f, value);
+    };
+    std::vector<float> outputExpectedData(inputData.size());
+    std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f);
+
+    ActivationTest(tflite::BuiltinOperator_RELU,
+                   backends,
+                   inputData,
+                   outputExpectedData);
+}
+
+void ActivationBoundedReluTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<float> inputData = {
+            -0.1f, -0.2f, -0.3f, -0.4f,
+            0.1f,  0.2f,  0.3f,  0.4f,
+            -1.0f, -2.0f, -3.0f, -4.0f,
+            1.0f,  2.0f,  3.0f,  4.0f
+    };
+
+    const float a = 6.0f;
+    const float b = 0.0f;
+    // Calculate output values for input.
+    auto f = [a, b](float value)
+    {
+        return std::min(a, std::max(b, value));
+    };
+    std::vector<float> outputExpectedData(inputData.size());
+    std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f);
+
+    ActivationTest(tflite::BuiltinOperator_RELU6,
+                   backends,
+                   inputData,
+                   outputExpectedData);
+}
+
+void ActivationSigmoidTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<float> inputData = {
+            -0.1f, -0.2f, -0.3f, -0.4f,
+            0.1f,  0.2f,  0.3f,  0.4f,
+            -1.0f, -2.0f, -3.0f, -4.0f,
+            1.0f,  2.0f,  3.0f,  4.0f
+    };
+
+    // Calculate output values for input.
+    auto f = [](float value)
+    {
+        return 1.0f / (1.0f + std::exp(-value));
+    };
+    std::vector<float> outputExpectedData(inputData.size());
+    std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f);
+
+    ActivationTest(tflite::BuiltinOperator_LOGISTIC,
+                   backends,
+                   inputData,
+                   outputExpectedData);
+}
+
+
+void ActivationTanHTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<float> inputData = {
+            -0.1f, -0.2f, -0.3f, -0.4f,
+            0.1f,  0.2f,  0.3f,  0.4f,
+            -1.0f, -2.0f, -3.0f, -4.0f,
+            1.0f,  2.0f,  3.0f,  4.0f
+    };
+
+    // Calculate output values for input.
+    auto f = [](float value)
+    {
+        return tanhf(value);
+    };
+    std::vector<float> outputExpectedData(inputData.size());
+    std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f);
+
+    ActivationTest(tflite::BuiltinOperator_TANH,
+                   backends,
+                   inputData,
+                   outputExpectedData);
+}
+
+void ActivationEluTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<float> inputData = {
+            -0.1f, -0.2f, -0.3f, -0.4f,
+            0.1f,  0.2f,  0.3f,  0.4f,
+            -1.0f, -2.0f, -3.0f, -4.0f,
+            1.0f,  2.0f,  3.0f,  4.0f
+    };
+
+    // Calculate output values for input.
+    auto f = [](float value)
+    {
+        if (value < 0)
+        {
+            // alpha * (exp(x) - 1)
+            return 1 * (std::exp(value) - 1);
+        }
+        return value;
+    };
+    std::vector<float> outputExpectedData(inputData.size());
+    std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f);
+
+    ActivationTest(tflite::BuiltinOperator_ELU,
+                   backends,
+                   inputData,
+                   outputExpectedData);
+}
+
+void ActivationHardSwishTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<float> inputData = {
+            -0.1f, -0.2f, -0.3f, -0.4f,
+            0.1f,  0.2f,  0.3f,  0.4f,
+            -1.0f, -2.0f, -3.0f, -4.0f,
+            1.0f,  2.0f,  3.0f,  4.0f
+    };
+
+    // Calculate output values for input.
+    auto f = [](float x)
+    {
+        // Break down the calculation to help with verification.
+        // hard_swish(x) = x * relu6(x+3) / 6
+        // relu6(x) = min(max(x,0),6)
+        float reLu6_step1 = std::max((x + 3),0.0f);
+        float reLu6Complete = std::min(reLu6_step1, 6.0f);
+        float hardSwish_step1 = x * reLu6Complete;
+        float result = hardSwish_step1 / 6;
+        return result;
+    };
+    std::vector<float> outputExpectedData(inputData.size());
+    std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f);
+
+    ActivationTest(tflite::BuiltinOperator_HARD_SWISH,
+                   backends,
+                   inputData,
+                   outputExpectedData);
+}
+
+TEST_SUITE("Activation_CpuRefTests")
+{
+
+TEST_CASE ("Activation_ReLu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ActivationReLuTest(backends);
+}
+
+TEST_CASE ("Activation_Bounded_Relu6_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ActivationBoundedReluTest(backends);
+}
+
+TEST_CASE ("Activation_Sigmoid_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ActivationSigmoidTest(backends);
+}
+
+TEST_CASE ("Activation_TanH_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ActivationTanHTest(backends);
+}
+
+TEST_CASE ("Activation_Elu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ActivationEluTest(backends);
+}
+
+TEST_CASE ("Activation_HardSwish_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ActivationHardSwishTest(backends);
+}
+
+}
+
+TEST_SUITE("Activation_CpuAccTests")
+{
+
+TEST_CASE ("Activation_ReLu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ActivationReLuTest(backends);
+}
+
+TEST_CASE ("Activation_Bounded_Relu6_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ActivationBoundedReluTest(backends);
+}
+
+TEST_CASE ("Activation_Sigmoid_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ActivationSigmoidTest(backends);
+}
+
+TEST_CASE ("Activation_TanH_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ActivationTanHTest(backends);
+}
+
+TEST_CASE ("Activation_Elu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ActivationEluTest(backends);
+}
+
+TEST_CASE ("Activation_HardSwish_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ActivationHardSwishTest(backends);
+}
+
+}
+
+TEST_SUITE("Activation_GpuAccTests")
+{
+
+TEST_CASE ("Activation_ReLu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ActivationReLuTest(backends);
+}
+
+TEST_CASE ("Activation_Bounded_Relu6_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ActivationBoundedReluTest(backends);
+}
+
+TEST_CASE ("Activation_Sigmoid_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ActivationSigmoidTest(backends);
+}
+
+TEST_CASE ("Activation_TanH_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ActivationTanHTest(backends);
+}
+
+TEST_CASE ("Activation_Elu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ActivationEluTest(backends);
+}
+
+TEST_CASE ("Activation_HardSwish_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ActivationHardSwishTest(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ActivationTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ActivationTestHelper.hpp
new file mode 100644
index 0000000000..0f4d944685
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ActivationTestHelper.hpp
@@ -0,0 +1,130 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateActivationTfLiteModel(tflite::BuiltinOperator activationOperatorCode,
+                                              tflite::TensorType tensorType,
+                                              const std::vector <int32_t>& tensorShape)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(), tensorShape.size()),
+                              tensorType);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(), tensorShape.size()),
+                              tensorType);
+
+    // create operator
+    const std::vector<int> operatorInputs{0};
+    const std::vector<int> operatorOutputs{1};
+    flatbuffers::Offset <Operator> unaryOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()));
+
+    const std::vector<int> subgraphInputs{0};
+    const std::vector<int> subgraphOutputs{1};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&unaryOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Activation Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, activationOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void ActivationTest(tflite::BuiltinOperator activationOperatorCode,
+                    std::vector<armnn::BackendId>& backends,
+                    std::vector<float>& inputValues,
+                    std::vector<float>& expectedOutputValues)
+{
+    using namespace tflite;
+    std::vector<int32_t> inputShape  { { 4, 1, 4} };
+    std::vector<char> modelBuffer = CreateActivationTfLiteModel(activationOperatorCode,
+                                                                      ::tflite::TensorType_FLOAT32,
+                                                                      inputShape);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<float>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<float>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<float>(tfLiteInterpreter,
+                                            armnnDelegateInterpreter,
+                                            inputShape,
+                                            expectedOutputValues);
+
+    tfLiteInterpreter.reset(nullptr);
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ArgMinMaxTest.cpp b/arch/arm/ARMnn/delegate/src/test/ArgMinMaxTest.cpp
new file mode 100644
index 0000000000..bf60a77cb2
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ArgMinMaxTest.cpp
@@ -0,0 +1,174 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ArgMinMaxTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void ArgMaxFP32Test(std::vector<armnn::BackendId>& backends, int axisValue)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 2, 4 };
+    std::vector<int32_t> outputShape { 1, 3, 4 };
+    std::vector<int32_t> axisShape { 1 };
+
+    std::vector<float> inputValues = { 1.0f,   2.0f,   3.0f,   4.0f,
+                                       5.0f,   6.0f,   7.0f,   8.0f,
+
+                                       10.0f,  20.0f,  30.0f,  40.0f,
+                                       50.0f,  60.0f,  70.0f,  80.0f,
+
+                                       100.0f, 200.0f, 300.0f, 400.0f,
+                                       500.0f, 600.0f, 700.0f, 800.0f };
+
+    std::vector<int32_t> expectedOutputValues = { 1, 1, 1, 1,
+                                                  1, 1, 1, 1,
+                                                  1, 1, 1, 1 };
+
+    ArgMinMaxTest<float, int32_t>(tflite::BuiltinOperator_ARG_MAX,
+                                  ::tflite::TensorType_FLOAT32,
+                                  backends,
+                                  inputShape,
+                                  axisShape,
+                                  outputShape,
+                                  inputValues,
+                                  expectedOutputValues,
+                                  axisValue,
+                                  ::tflite::TensorType_INT32);
+}
+
+void ArgMinFP32Test(std::vector<armnn::BackendId>& backends, int axisValue)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 2, 4 };
+    std::vector<int32_t> outputShape { 1, 3, 2 };
+    std::vector<int32_t> axisShape { 1 };
+
+    std::vector<float> inputValues = { 1.0f,   2.0f,   3.0f,   4.0f,
+                                       5.0f,   6.0f,   7.0f,   8.0f,
+
+                                       10.0f,  20.0f,  30.0f,  40.0f,
+                                       50.0f,  60.0f,  70.0f,  80.0f,
+
+                                       100.0f, 200.0f, 300.0f, 400.0f,
+                                       500.0f, 600.0f, 700.0f, 800.0f };
+
+    std::vector<int32_t> expectedOutputValues = { 0, 0,
+                                                  0, 0,
+                                                  0, 0 };
+
+    ArgMinMaxTest<float, int32_t>(tflite::BuiltinOperator_ARG_MIN,
+                                  ::tflite::TensorType_FLOAT32,
+                                  backends,
+                                  inputShape,
+                                  axisShape,
+                                  outputShape,
+                                  inputValues,
+                                  expectedOutputValues,
+                                  axisValue,
+                                  ::tflite::TensorType_INT32);
+}
+
+void ArgMaxUint8Test(std::vector<armnn::BackendId>& backends, int axisValue)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 1, 1, 5 };
+    std::vector<int32_t> outputShape { 1, 1, 1 };
+    std::vector<int32_t> axisShape { 1 };
+
+    std::vector<uint8_t> inputValues = { 5, 2, 8, 10, 9 };
+
+    std::vector<int32_t> expectedOutputValues = { 3 };
+
+    ArgMinMaxTest<uint8_t, int32_t>(tflite::BuiltinOperator_ARG_MAX,
+                                    ::tflite::TensorType_UINT8,
+                                    backends,
+                                    inputShape,
+                                    axisShape,
+                                    outputShape,
+                                    inputValues,
+                                    expectedOutputValues,
+                                    axisValue,
+                                    ::tflite::TensorType_INT32);
+}
+
+TEST_SUITE("ArgMinMax_CpuRefTests")
+{
+
+TEST_CASE ("ArgMaxFP32Test_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ArgMaxFP32Test(backends, 2);
+}
+
+TEST_CASE ("ArgMinFP32Test_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ArgMinFP32Test(backends, 3);
+}
+
+TEST_CASE ("ArgMaxUint8Test_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ArgMaxUint8Test(backends, -1);
+}
+
+} // TEST_SUITE("ArgMinMax_CpuRefTests")
+
+TEST_SUITE("ArgMinMax_CpuAccTests")
+{
+
+TEST_CASE ("ArgMaxFP32Test_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ArgMaxFP32Test(backends, 2);
+}
+
+TEST_CASE ("ArgMinFP32Test_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ArgMinFP32Test(backends, 3);
+}
+
+TEST_CASE ("ArgMaxUint8Test_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ArgMaxUint8Test(backends, -1);
+}
+
+} // TEST_SUITE("ArgMinMax_CpuAccTests")
+
+TEST_SUITE("ArgMinMax_GpuAccTests")
+{
+
+TEST_CASE ("ArgMaxFP32Test_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ArgMaxFP32Test(backends, 2);
+}
+
+TEST_CASE ("ArgMinFP32Test_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ArgMinFP32Test(backends, 3);
+}
+
+TEST_CASE ("ArgMaxUint8Test_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ArgMaxUint8Test(backends, -1);
+}
+
+} // TEST_SUITE("ArgMinMax_GpuAccTests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ArgMinMaxTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ArgMinMaxTestHelper.hpp
new file mode 100644
index 0000000000..a734c819f9
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ArgMinMaxTestHelper.hpp
@@ -0,0 +1,198 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename InputT, typename OutputT>
+std::vector<char> CreateArgMinMaxTfLiteModel(tflite::BuiltinOperator argMinMaxOperatorCode,
+                                             tflite::TensorType tensorType,
+                                             const std::vector<int32_t>& inputTensorShape,
+                                             const std::vector<int32_t>& axisTensorShape,
+                                             const std::vector<int32_t>& outputTensorShape,
+                                             const std::vector<OutputT> axisValue,
+                                             tflite::TensorType outputType,
+                                             float quantScale = 1.0f,
+                                             int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    auto inputTensor = CreateTensor(flatBufferBuilder,
+                                    flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                            inputTensorShape.size()),
+                                    tensorType,
+                                    0,
+                                    flatBufferBuilder.CreateString("input"),
+                                    quantizationParameters);
+
+    auto axisTensor = CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(axisTensorShape.data(),
+                                                                           axisTensorShape.size()),
+                                   tflite::TensorType_INT32,
+                                   1,
+                                   flatBufferBuilder.CreateString("axis"));
+
+    auto outputTensor = CreateTensor(flatBufferBuilder,
+                                     flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                             outputTensorShape.size()),
+                                     outputType,
+                                     2,
+                                     flatBufferBuilder.CreateString("output"),
+                                     quantizationParameters);
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors = { inputTensor, axisTensor, outputTensor };
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisValue.data()),
+                                                    sizeof(OutputT))));
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    std::vector<int32_t> operatorInputs = {{ 0, 1 }};
+    std::vector<int> subgraphInputs = {{ 0, 1 }};
+
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_ArgMaxOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateArgMaxOptions(flatBufferBuilder, outputType).Union();
+
+    if (argMinMaxOperatorCode == tflite::BuiltinOperator_ARG_MIN)
+    {
+        operatorBuiltinOptionsType = BuiltinOptions_ArgMinOptions;
+        operatorBuiltinOptions = CreateArgMinOptions(flatBufferBuilder, outputType).Union();
+    }
+
+    // create operator
+    const std::vector<int32_t> operatorOutputs{ 2 };
+    flatbuffers::Offset <Operator> argMinMaxOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphOutputs{ 2 };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&argMinMaxOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: ArgMinMax Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         argMinMaxOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename InputT, typename OutputT>
+void ArgMinMaxTest(tflite::BuiltinOperator argMinMaxOperatorCode,
+                   tflite::TensorType tensorType,
+                   const std::vector<armnn::BackendId>& backends,
+                   const std::vector<int32_t>& inputShape,
+                   const std::vector<int32_t>& axisShape,
+                   std::vector<int32_t>& outputShape,
+                   std::vector<InputT>& inputValues,
+                   std::vector<OutputT>& expectedOutputValues,
+                   OutputT axisValue,
+                   tflite::TensorType outputType,
+                   float quantScale = 1.0f,
+                   int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateArgMinMaxTfLiteModel<InputT, OutputT>(argMinMaxOperatorCode,
+                                                                                tensorType,
+                                                                                inputShape,
+                                                                                axisShape,
+                                                                                outputShape,
+                                                                                {axisValue},
+                                                                                outputType,
+                                                                                quantScale,
+                                                                                quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    CHECK(tfLiteModel != nullptr);
+
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<InputT>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<InputT>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<OutputT>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<OutputT>(armnnDelegateOutputId);
+
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteDelageOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ArmnnDelegateTest.cpp b/arch/arm/ARMnn/delegate/src/test/ArmnnDelegateTest.cpp
new file mode 100644
index 0000000000..bc73dde2ef
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ArmnnDelegateTest.cpp
@@ -0,0 +1,93 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include <doctest/doctest.h>
+
+#include <armnn_delegate.hpp>
+
+#include "tensorflow/lite/kernels/builtin_op_kernels.h"
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+
+namespace armnnDelegate
+{
+
+TEST_SUITE("ArmnnDelegate")
+{
+
+TEST_CASE ("ArmnnDelegate Registered")
+{
+    using namespace tflite;
+    auto tfLiteInterpreter = std::make_unique<Interpreter>();
+
+    tfLiteInterpreter->AddTensors(3);
+    tfLiteInterpreter->SetInputs({0, 1});
+    tfLiteInterpreter->SetOutputs({2});
+
+    tfLiteInterpreter->SetTensorParametersReadWrite(0, kTfLiteFloat32, "input1", {1,2,2,1}, TfLiteQuantization());
+    tfLiteInterpreter->SetTensorParametersReadWrite(1, kTfLiteFloat32, "input2", {1,2,2,1}, TfLiteQuantization());
+    tfLiteInterpreter->SetTensorParametersReadWrite(2, kTfLiteFloat32, "output", {1,2,2,1}, TfLiteQuantization());
+
+    tflite::ops::builtin::BuiltinOpResolver opResolver;
+    const TfLiteRegistration* opRegister = opResolver.FindOp(BuiltinOperator_ADD, 1);
+    tfLiteInterpreter->AddNodeWithParameters({0, 1}, {2}, "", 0, nullptr, opRegister);
+
+    // Create the Armnn Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    std::vector<armnn::BackendOptions> backendOptions;
+    backendOptions.emplace_back(
+        armnn::BackendOptions{ "BackendName",
+                               {
+                                  { "Option1", 42 },
+                                  { "Option2", true }
+                               }}
+    );
+
+    armnnDelegate::DelegateOptions delegateOptions(backends, backendOptions);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                       theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                        armnnDelegate::TfLiteArmnnDelegateDelete);
+
+    auto status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
+    CHECK(status == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+}
+
+TEST_CASE ("ArmnnDelegateOptimizerOptionsRegistered")
+{
+    using namespace tflite;
+    auto tfLiteInterpreter = std::make_unique<Interpreter>();
+
+    tfLiteInterpreter->AddTensors(3);
+    tfLiteInterpreter->SetInputs({0, 1});
+    tfLiteInterpreter->SetOutputs({2});
+
+    tfLiteInterpreter->SetTensorParametersReadWrite(0, kTfLiteFloat32, "input1", {1,2,2,1}, TfLiteQuantization());
+    tfLiteInterpreter->SetTensorParametersReadWrite(1, kTfLiteFloat32, "input2", {1,2,2,1}, TfLiteQuantization());
+    tfLiteInterpreter->SetTensorParametersReadWrite(2, kTfLiteFloat32, "output", {1,2,2,1}, TfLiteQuantization());
+
+    tflite::ops::builtin::BuiltinOpResolver opResolver;
+    const TfLiteRegistration* opRegister = opResolver.FindOp(BuiltinOperator_ADD, 1);
+    tfLiteInterpreter->AddNodeWithParameters({0, 1}, {2}, "", 0, nullptr, opRegister);
+
+    // Create the Armnn Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+
+    armnn::OptimizerOptions optimizerOptions(true, true, false, true);
+
+    armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                       theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                        armnnDelegate::TfLiteArmnnDelegateDelete);
+
+    auto status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
+    CHECK(status == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+}
+
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/test/BatchSpaceTest.cpp b/arch/arm/ARMnn/delegate/src/test/BatchSpaceTest.cpp
new file mode 100644
index 0000000000..47eba452e7
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/BatchSpaceTest.cpp
@@ -0,0 +1,299 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchSpaceTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// BatchToSpaceND Operator
+void BatchToSpaceNDFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 4, 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+    std::vector<float> expectedOutputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<float>(tflite::BuiltinOperator_BATCH_TO_SPACE_ND,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          crops,
+                          expectedOutputValues);
+}
+
+void BatchToSpaceNDFp32BatchOneTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+    std::vector<float> expectedOutputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+
+    std::vector<unsigned int> blockShape({1, 1});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<float>(tflite::BuiltinOperator_BATCH_TO_SPACE_ND,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          crops,
+                          expectedOutputValues);
+}
+
+void BatchToSpaceNDUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 4, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7 };
+    std::vector<uint8_t> expectedOutputValues { 1, 2, 3, 4, 5, 6, 7 };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<uint8_t>(tflite::BuiltinOperator_BATCH_TO_SPACE_ND,
+                          ::tflite::TensorType_UINT8,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          crops,
+                          expectedOutputValues);
+}
+
+// SpaceToBatchND Operator
+void SpaceToBatchNDFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 4, 1, 1, 1 };
+
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+    std::vector<float> expectedOutputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> padding = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<float>(tflite::BuiltinOperator_SPACE_TO_BATCH_ND,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          padding,
+                          expectedOutputValues);
+}
+
+void SpaceToBatchNDFp32PaddingTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2, 4, 1 };
+    std::vector<int32_t> expectedOutputShape { 8, 1, 3, 1 };
+
+    std::vector<float> inputValues { 1.0f,  2.0f,  3.0f,  4.0f,
+                                     5.0f,  6.0f,  7.0f,  8.0f,
+                                     9.0f,  10.0f, 11.0f, 12.0f,
+                                     13.0f, 14.0f, 15.0f, 16.0f };
+
+    std::vector<float> expectedOutputValues { 0.0f, 1.0f, 3.0f,  0.0f, 9.0f, 11.0f,
+                                              0.0f, 2.0f, 4.0f,  0.0f, 10.0f, 12.0f,
+                                              0.0f, 5.0f, 7.0f,  0.0f, 13.0f, 15.0f,
+                                              0.0f, 6.0f, 8.0f,  0.0f, 14.0f, 16.0f };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> padding = {{0, 0}, {2, 0}};
+
+    BatchSpaceTest<float>(tflite::BuiltinOperator_SPACE_TO_BATCH_ND,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          padding,
+                          expectedOutputValues);
+}
+
+void SpaceToBatchNDUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 4, 1, 1, 3 };
+
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7 };
+    std::vector<uint8_t> expectedOutputValues { 1, 2, 3, 4, 5, 6, 7 };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> padding = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<uint8_t>(tflite::BuiltinOperator_SPACE_TO_BATCH_ND,
+                            ::tflite::TensorType_UINT8,
+                            backends,
+                            inputShape,
+                            expectedOutputShape,
+                            inputValues,
+                            blockShape,
+                            padding,
+                            expectedOutputValues);
+}
+
+// BatchToSpaceND Tests
+TEST_SUITE("BatchToSpaceND_CpuAccTests")
+{
+
+TEST_CASE ("BatchToSpaceND_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    BatchToSpaceNDFp32Test(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Fp32_BatchOne_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    BatchToSpaceNDFp32BatchOneTest(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    BatchToSpaceNDUint8Test(backends);
+}
+
+}
+
+TEST_SUITE("BatchToSpaceND_GpuAccTests")
+{
+
+TEST_CASE ("BatchToSpaceND_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    BatchToSpaceNDFp32Test(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Fp32_BatchOne_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    BatchToSpaceNDFp32BatchOneTest(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    BatchToSpaceNDUint8Test(backends);
+}
+
+}
+
+TEST_SUITE("BatchToSpaceND_CpuRefTests")
+{
+
+TEST_CASE ("BatchToSpaceND_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    BatchToSpaceNDFp32Test(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Fp32_BatchOne_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    BatchToSpaceNDFp32BatchOneTest(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    BatchToSpaceNDUint8Test(backends);
+}
+
+}
+
+// SpaceToBatchND Tests
+TEST_SUITE("SpaceToBatchND_CpuAccTests")
+{
+
+TEST_CASE ("SpaceToBatchND_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SpaceToBatchNDFp32Test(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Fp32_Padding_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SpaceToBatchNDFp32PaddingTest(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SpaceToBatchNDUint8Test(backends);
+}
+
+}
+
+TEST_SUITE("SpaceToBatchND_GpuAccTests")
+{
+
+TEST_CASE ("SpaceToBatchND_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SpaceToBatchNDFp32Test(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Fp32_Padding_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SpaceToBatchNDFp32PaddingTest(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SpaceToBatchNDUint8Test(backends);
+}
+
+}
+
+TEST_SUITE("SpaceToBatchND_CpuRefTests")
+{
+
+TEST_CASE ("SpaceToBatchND_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SpaceToBatchNDFp32Test(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Fp32_Padding_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SpaceToBatchNDFp32PaddingTest(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SpaceToBatchNDUint8Test(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/BatchSpaceTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/BatchSpaceTestHelper.hpp
new file mode 100644
index 0000000000..464a5d9cbe
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/BatchSpaceTestHelper.hpp
@@ -0,0 +1,216 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateBatchSpaceTfLiteModel(tflite::BuiltinOperator batchSpaceOperatorCode,
+                                              tflite::TensorType tensorType,
+                                              std::vector<int32_t>& inputTensorShape,
+                                              std::vector <int32_t>& outputTensorShape,
+                                              std::vector<unsigned int>& blockData,
+                                              std::vector<std::pair<unsigned int, unsigned int>>& cropsPadData,
+                                              float quantScale = 1.0f,
+                                              int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(blockData.data()),
+                                                                  sizeof(int32_t) * blockData.size()));
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cropsPadData.data()),
+                                                                  sizeof(int64_t) * cropsPadData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::string cropsOrPadding =
+            batchSpaceOperatorCode == tflite::BuiltinOperator_BATCH_TO_SPACE_ND ? "crops" : "padding";
+
+    std::vector<int32_t> blockShape { 2 };
+    std::vector<int32_t> cropsOrPaddingShape { 2, 2 };
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(blockShape.data(),
+                                                                      blockShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("block"),
+                              quantizationParameters);
+
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(cropsOrPaddingShape.data(),
+                                                                      cropsOrPaddingShape.size()),
+                              ::tflite::TensorType_INT32,
+                              2,
+                              flatBufferBuilder.CreateString(cropsOrPadding),
+                              quantizationParameters);
+
+    // Create output tensor
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // Create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+    flatbuffers::Offset<void> operatorBuiltinOptions = 0;
+    switch (batchSpaceOperatorCode)
+    {
+        case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
+        {
+            operatorBuiltinOptionsType = tflite::BuiltinOptions_BatchToSpaceNDOptions;
+            operatorBuiltinOptions = CreateBatchToSpaceNDOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
+        {
+            operatorBuiltinOptionsType = tflite::BuiltinOptions_SpaceToBatchNDOptions;
+            operatorBuiltinOptions = CreateSpaceToBatchNDOptions(flatBufferBuilder).Union();
+            break;
+        }
+        default:
+            break;
+    }
+
+    const std::vector<int> operatorInputs{ {0, 1, 2} };
+    const std::vector<int> operatorOutputs{ 3 };
+    flatbuffers::Offset <Operator> batchSpaceOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1, 2} };
+    const std::vector<int> subgraphOutputs{ 3 };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&batchSpaceOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: BatchSpace Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, batchSpaceOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void BatchSpaceTest(tflite::BuiltinOperator controlOperatorCode,
+                    tflite::TensorType tensorType,
+                    std::vector<armnn::BackendId>& backends,
+                    std::vector<int32_t>& inputShape,
+                    std::vector<int32_t>& expectedOutputShape,
+                    std::vector<T>& inputValues,
+                    std::vector<unsigned int>& blockShapeValues,
+                    std::vector<std::pair<unsigned int, unsigned int>>& cropsPaddingValues,
+                    std::vector<T>& expectedOutputValues,
+                    float quantScale = 1.0f,
+                    int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateBatchSpaceTfLiteModel(controlOperatorCode,
+                                                                tensorType,
+                                                                inputShape,
+                                                                expectedOutputShape,
+                                                                blockShapeValues,
+                                                                cropsPaddingValues,
+                                                                quantScale,
+                                                                quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+    tfLiteInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/CastTest.cpp b/arch/arm/ARMnn/delegate/src/test/CastTest.cpp
new file mode 100644
index 0000000000..a637071ffc
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/CastTest.cpp
@@ -0,0 +1,95 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CastTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void CastUint8ToFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  {1, 3, 2, 3};
+
+    std::vector<uint8_t> inputValues { 1, 3, 1, 3, 1, 3, 1, 3, 1,
+                                        3, 1, 3, 1, 2, 1, 3, 1, 3 };
+
+    std::vector<float> expectedOutputValues { 1.0f, 3.0f, 1.0f, 3.0f, 1.0f, 3.0f, 1.0f, 3.0f, 1.0f,
+                                              3.0f, 1.0f, 3.0f, 1.0f, 2.0f, 1.0f, 3.0f, 1.0f, 3.0f };
+
+    CastTest<uint8_t, float>(::tflite::TensorType_UINT8,
+                             ::tflite::TensorType_FLOAT32,
+                             backends,
+                             inputShape,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+void CastInt32ToFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  {1, 3, 2, 3};
+
+    std::vector<int32_t> inputValues { -1, -3, -1, -3, -1, -3, -1, -3, 1,
+                                       3, 1, 3, 1, 2, 1, 3, 1, 3 };
+
+    std::vector<float> expectedOutputValues { -1.0f, -3.0f, -1.0f, -3.0f, -1.0f, -3.0f, -1.0f, -3.0f, 1.0f,
+                                              3.0f, 1.0f, 3.0f, 1.0f, 2.0f, 1.0f, 3.0f, 1.0f, 3.0f };
+
+    CastTest<int32_t, float>(::tflite::TensorType_INT32,
+                             ::tflite::TensorType_FLOAT32,
+                             backends,
+                             inputShape,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+// CAST Test Suite
+TEST_SUITE("CAST_CpuRefTests")
+{
+
+TEST_CASE ("CAST_UINT8_TO_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    CastUint8ToFp32Test(backends);
+}
+
+TEST_CASE ("CAST_INT32_TO_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    CastInt32ToFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("CAST_CpuAccTests")
+{
+
+TEST_CASE ("CAST_INT32_TO_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    CastInt32ToFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("CAST_GpuAccTests")
+{
+
+TEST_CASE ("CAST_INT32_TO_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    CastInt32ToFp32Test(backends);
+}
+
+}
+// End of CAST Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/CastTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/CastTestHelper.hpp
new file mode 100644
index 0000000000..6b1d5ee947
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/CastTestHelper.hpp
@@ -0,0 +1,157 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+std::vector<char> CreateCastTfLiteModel(tflite::TensorType inputTensorType,
+                                        tflite::TensorType outputTensorType,
+                                        const std::vector <int32_t>& tensorShape,
+                                        float quantScale = 1.0f,
+                                        int quantOffset = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({quantScale}),
+                                     flatBufferBuilder.CreateVector<int64_t>({quantOffset}));
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              inputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              outputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    const std::vector<int32_t> operatorInputs({0});
+    const std::vector<int32_t> operatorOutputs({1});
+
+    flatbuffers::Offset<Operator> castOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       BuiltinOptions_CastOptions,
+                       CreateCastOptions(flatBufferBuilder).Union());
+
+    flatbuffers::Offset<flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: CAST Operator Model");
+    flatbuffers::Offset<OperatorCode> operatorCode =
+        CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_CAST);
+
+    const std::vector<int32_t> subgraphInputs({0});
+    const std::vector<int32_t> subgraphOutputs({1});
+    flatbuffers::Offset<SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&castOperator, 1));
+
+    flatbuffers::Offset<Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template<typename T, typename K>
+void CastTest(tflite::TensorType inputTensorType,
+              tflite::TensorType outputTensorType,
+              std::vector<armnn::BackendId>& backends,
+              std::vector<int32_t>& shape,
+              std::vector<T>& inputValues,
+              std::vector<K>& expectedOutputValues,
+              float quantScale = 1.0f,
+              int quantOffset = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateCastTfLiteModel(inputTensorType,
+                                                          outputTensorType,
+                                                          shape,
+                                                          quantScale,
+                                                          quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<K>(tfLiteDelegate,
+                                        armnnDelegate,
+                                        shape,
+                                        expectedOutputValues,
+                                        0);
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+}
+
+} // anonymous namespace
diff --git a/arch/arm/ARMnn/delegate/src/test/ComparisonTest.cpp b/arch/arm/ARMnn/delegate/src/test/ComparisonTest.cpp
new file mode 100644
index 0000000000..95bfe21d27
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ComparisonTest.cpp
@@ -0,0 +1,844 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ComparisonTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void EqualFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<float> input0Values =
+    {
+        1.f, 1.f, 1.f, 1.f, 5.f, 5.f, 5.f, 5.f,
+        3.f, 3.f, 3.f, 3.f, 4.f, 4.f, 4.f, 4.f
+    };
+
+    std::vector<float> input1Values =
+    {
+        1.f, 1.f, 1.f, 1.f, 3.f, 3.f, 3.f, 3.f,
+        5.f, 5.f, 5.f, 5.f, 4.f, 4.f, 4.f, 4.f
+    };
+
+    std::vector<bool> expectedOutputValues =
+    {
+        1, 1, 1, 1, 0, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 1, 1
+    };
+
+
+    ComparisonTest<float>(tflite::BuiltinOperator_EQUAL,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void EqualBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<float> input0Values
+    {
+        1.f, 2.f, 3.f,  4.f,  5.f,  6.f,
+        7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    };
+    std::vector<float> input1Values { 4.f, 5.f, 6.f };
+    // Set output data
+    std::vector<bool> expectedOutputValues
+    {
+        0, 0, 0, 1, 1, 1,
+        0, 0, 0, 0, 0, 0
+    };
+    ComparisonTest<float>(tflite::BuiltinOperator_EQUAL,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void EqualInt32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<int32_t> input0Values = { 1, 5, 6, 4 };
+
+    std::vector<int32_t> input1Values = { 1, 3, 9, 4 };
+
+    std::vector<bool> expectedOutputValues = { 1, 0, 0, 1 };
+
+    ComparisonTest<int32_t>(tflite::BuiltinOperator_EQUAL,
+                            ::tflite::TensorType_INT32,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+void NotEqualFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<float> input0Values =
+    {
+        1.f, 1.f, 1.f, 1.f, 5.f, 5.f, 5.f, 5.f,
+        3.f, 3.f, 3.f, 3.f, 4.f, 4.f, 4.f, 4.f
+    };
+
+    std::vector<float> input1Values =
+    {
+        1.f, 1.f, 1.f, 1.f, 3.f, 3.f, 3.f, 3.f,
+        5.f, 5.f, 5.f, 5.f, 4.f, 4.f, 4.f, 4.f
+    };
+
+    std::vector<bool> expectedOutputValues =
+    {
+        0, 0, 0, 0, 1, 1, 1, 1,
+        1, 1, 1, 1, 0, 0, 0, 0
+    };
+
+    ComparisonTest<float>(tflite::BuiltinOperator_NOT_EQUAL,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void NotEqualBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<float> input0Values
+    {
+        1.f, 2.f, 3.f,  4.f,  5.f,  6.f,
+        7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    };
+    std::vector<float> input1Values { 4.f, 5.f, 6.f };
+    // Set output data
+    std::vector<bool> expectedOutputValues
+    {
+        1, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 1, 1
+    };
+    ComparisonTest<float>(tflite::BuiltinOperator_NOT_EQUAL,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void NotEqualInt32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<int32_t> input0Values = { 1, 5, 6, 4 };
+
+    std::vector<int32_t> input1Values = { 1, 3, 9, 4 };
+
+    std::vector<bool> expectedOutputValues = { 0, 1, 1, 0 };
+
+    ComparisonTest<int32_t>(tflite::BuiltinOperator_NOT_EQUAL,
+                            ::tflite::TensorType_INT32,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+void GreaterFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> input0Values = { 1, 5, 6, 4 };
+
+    std::vector<float> input1Values = { 1, 3, 9, 4 };
+
+    std::vector<bool> expectedOutputValues = { 0, 1, 0, 0 };
+
+    ComparisonTest<float>(tflite::BuiltinOperator_GREATER,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void GreaterBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<float> input0Values
+    {
+        1.f, 2.f, 3.f,  4.f,  5.f,  6.f,
+        7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    };
+    std::vector<float> input1Values { 4.f, 5.f, 6.f };
+
+    std::vector<bool> expectedOutputValues
+    {
+        0, 0, 0, 0, 0, 0,
+        1, 1, 1, 1, 1, 1
+    };
+    ComparisonTest<float>(tflite::BuiltinOperator_GREATER,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void GreaterInt32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<int32_t> input0Values = { 1, 5, 6, 4 };
+
+    std::vector<int32_t> input1Values = { 1, 3, 9, 4 };
+
+    std::vector<bool> expectedOutputValues = { 0, 1, 0, 0 };
+
+    ComparisonTest<int32_t>(tflite::BuiltinOperator_GREATER,
+                            ::tflite::TensorType_INT32,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+void GreaterEqualFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> input0Values = { 1.f, 5.f, 6.f, 4.f };
+
+    std::vector<float> input1Values = { 1.f, 3.f, 9.f, 4.f };
+
+    std::vector<bool> expectedOutputValues = { true, true, false, true };
+
+    ComparisonTest<float>(tflite::BuiltinOperator_GREATER_EQUAL,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void GreaterEqualBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<float> input0Values
+    {
+        1.f, 2.f, 3.f,  4.f,  5.f,  6.f,
+        7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    };
+    std::vector<float> input1Values { 4.f, 5.f, 6.f };
+    // Set output data
+    std::vector<bool> expectedOutputValues
+    {
+        0, 0, 0, 1, 1, 1,
+        1, 1, 1, 1, 1, 1
+    };
+
+    ComparisonTest<float>(tflite::BuiltinOperator_GREATER_EQUAL,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void GreaterEqualInt32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<int32_t> input0Values = { 1, 5, 6, 3 };
+
+    std::vector<int32_t> input1Values = { 1, 3, 9, 4 };
+
+    std::vector<bool> expectedOutputValues = { 1, 1, 0, 0 };
+
+    ComparisonTest<int32_t>(tflite::BuiltinOperator_GREATER_EQUAL,
+                            ::tflite::TensorType_INT32,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+void LessFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> input0Values = { 1.f, 5.f, 6.f, 4.f };
+
+    std::vector<float> input1Values = { 1.f, 3.f, 9.f, 4.f };
+
+    std::vector<bool> expectedOutputValues = { false, false, true, false };
+
+    ComparisonTest<float>(tflite::BuiltinOperator_LESS,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void LessBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<float> input0Values
+    {
+        1.f, 2.f, 3.f,  4.f,  5.f,  6.f,
+        7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    };
+    std::vector<float> input1Values { 4.f, 5.f, 6.f };
+
+    std::vector<bool> expectedOutputValues
+    {
+        true, true, true, false, false, false,
+        false, false, false, false, false, false
+    };
+
+    ComparisonTest<float>(tflite::BuiltinOperator_LESS,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void LessInt32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<int32_t> input0Values = { 1, 5, 6, 3 };
+
+    std::vector<int32_t> input1Values = { 1, 3, 9, 4 };
+
+    std::vector<bool> expectedOutputValues = { false, false, true, true };
+
+    ComparisonTest<int32_t>(tflite::BuiltinOperator_LESS,
+                            ::tflite::TensorType_INT32,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+void LessEqualFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> input0Values = { 1.f, 5.f, 6.f, 4.f };
+
+    std::vector<float> input1Values = { 1.f, 3.f, 9.f, 4.f };
+
+    std::vector<bool> expectedOutputValues = { true, false, true, true };
+
+    ComparisonTest<float>(tflite::BuiltinOperator_LESS_EQUAL,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void LessEqualBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<float> input0Values
+    {
+        1.f, 2.f, 3.f,  4.f,  5.f,  6.f,
+        7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    };
+    std::vector<float> input1Values { 4.f, 5.f, 6.f };
+
+    std::vector<bool> expectedOutputValues
+    {
+        true, true, true, true, true, true,
+        false, false, false, false, false, false
+    };
+
+    ComparisonTest<float>(tflite::BuiltinOperator_LESS_EQUAL,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          input0Shape,
+                          input1Shape,
+                          expectedOutputShape,
+                          input0Values,
+                          input1Values,
+                          expectedOutputValues);
+}
+
+void LessEqualInt32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<int32_t> input0Values = { 1, 5, 6, 3 };
+
+    std::vector<int32_t> input1Values = { 1, 3, 9, 4 };
+
+    std::vector<bool> expectedOutputValues = { true, false, true, true };
+
+    ComparisonTest<int32_t>(tflite::BuiltinOperator_LESS_EQUAL,
+                            ::tflite::TensorType_INT32,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+TEST_SUITE("Comparison_CpuRefTests")
+{
+
+TEST_CASE ("EQUAL_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    EqualFP32Test(backends);
+}
+
+TEST_CASE ("EQUAL_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    EqualBroadcastTest(backends);
+}
+
+TEST_CASE ("EQUAL_INT32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    EqualInt32Test(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    NotEqualFP32Test(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    NotEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_INT32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    NotEqualInt32Test(backends);
+}
+
+TEST_CASE ("GREATER_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    GreaterFP32Test(backends);
+}
+
+TEST_CASE ("GREATER_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    GreaterBroadcastTest(backends);
+}
+
+TEST_CASE ("GREATER_INT32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    GreaterInt32Test(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    GreaterEqualFP32Test(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    GreaterEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_INT32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    GreaterEqualInt32Test(backends);
+}
+
+TEST_CASE ("LESS_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LessFP32Test(backends);
+}
+
+TEST_CASE ("LESS_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LessBroadcastTest(backends);
+}
+
+TEST_CASE ("LESS_INT32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LessInt32Test(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LessEqualFP32Test(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LessEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_INT32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LessEqualInt32Test(backends);
+}
+} // End TEST_SUITE("Comparison_CpuRefTests")
+
+
+
+TEST_SUITE("Comparison_GpuAccTests")
+{
+
+TEST_CASE ("EQUAL_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    EqualFP32Test(backends);
+}
+
+TEST_CASE ("EQUAL_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    EqualBroadcastTest(backends);
+}
+
+TEST_CASE ("EQUAL_INT32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    EqualInt32Test(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    NotEqualFP32Test(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    NotEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_INT32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    NotEqualInt32Test(backends);
+}
+
+TEST_CASE ("GREATER_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    GreaterFP32Test(backends);
+}
+
+TEST_CASE ("GREATER_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    GreaterBroadcastTest(backends);
+}
+
+TEST_CASE ("GREATER_INT32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    GreaterInt32Test(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    GreaterEqualFP32Test(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    GreaterEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_INT32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    GreaterEqualInt32Test(backends);
+}
+
+TEST_CASE ("LESS_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LessFP32Test(backends);
+}
+
+TEST_CASE ("LESS_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LessBroadcastTest(backends);
+}
+
+TEST_CASE ("LESS_INT32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LessInt32Test(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LessEqualFP32Test(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LessEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_INT32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LessEqualInt32Test(backends);
+}
+
+} // End TEST_SUITE("Comparison_GpuAccTests")
+
+
+TEST_SUITE("Comparison_CpuAccTests")
+{
+
+TEST_CASE ("EQUAL_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    EqualFP32Test(backends);
+}
+
+TEST_CASE ("EQUAL_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    EqualBroadcastTest(backends);
+}
+
+TEST_CASE ("EQUAL_INT32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    EqualInt32Test(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    NotEqualFP32Test(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    NotEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("NOT_EQUAL_INT32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    NotEqualInt32Test(backends);
+}
+
+TEST_CASE ("GREATER_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    GreaterFP32Test(backends);
+}
+
+TEST_CASE ("GREATER_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    GreaterBroadcastTest(backends);
+}
+
+TEST_CASE ("GREATER_INT32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    GreaterInt32Test(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    GreaterEqualFP32Test(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    GreaterEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("GREATER_EQUAL_INT32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    GreaterEqualInt32Test(backends);
+}
+
+TEST_CASE ("LESS_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LessFP32Test(backends);
+}
+
+TEST_CASE ("LESS_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LessBroadcastTest(backends);
+}
+
+TEST_CASE ("LESS_INT32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LessInt32Test(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LessEqualFP32Test(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LessEqualBroadcastTest(backends);
+}
+
+TEST_CASE ("LESS_EQUAL_INT32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LessEqualInt32Test(backends);
+}
+
+} // End TEST_SUITE("Comparison_CpuAccTests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ComparisonTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ComparisonTestHelper.hpp
new file mode 100644
index 0000000000..c9ccb778ac
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ComparisonTestHelper.hpp
@@ -0,0 +1,235 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateComparisonTfLiteModel(tflite::BuiltinOperator comparisonOperatorCode,
+                                              tflite::TensorType tensorType,
+                                              const std::vector <int32_t>& input0TensorShape,
+                                              const std::vector <int32_t>& input1TensorShape,
+                                              const std::vector <int32_t>& outputTensorShape,
+                                              float quantScale = 1.0f,
+                                              int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input0TensorShape.data(),
+                                                                      input0TensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_0"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input1TensorShape.data(),
+                                                                      input1TensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_1"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              ::tflite::TensorType_BOOL,
+                              0);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_EqualOptions;;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateEqualOptions(flatBufferBuilder).Union();
+    switch (comparisonOperatorCode)
+    {
+        case BuiltinOperator_EQUAL:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_EqualOptions;
+            operatorBuiltinOptions = CreateEqualOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_NOT_EQUAL:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_NotEqualOptions;
+            operatorBuiltinOptions = CreateNotEqualOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_GREATER:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_GreaterOptions;
+            operatorBuiltinOptions = CreateGreaterOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_GREATER_EQUAL:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_GreaterEqualOptions;
+            operatorBuiltinOptions = CreateGreaterEqualOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_LESS:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_LessOptions;
+            operatorBuiltinOptions = CreateLessOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_LESS_EQUAL:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_LessEqualOptions;
+            operatorBuiltinOptions = CreateLessEqualOptions(flatBufferBuilder).Union();
+            break;
+        }
+        default:
+            break;
+    }
+    const std::vector<int32_t> operatorInputs{0, 1};
+    const std::vector<int32_t> operatorOutputs{2};
+    flatbuffers::Offset <Operator> comparisonOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0, 1};
+    const std::vector<int> subgraphOutputs{2};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&comparisonOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Comparison Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, comparisonOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void ComparisonTest(tflite::BuiltinOperator comparisonOperatorCode,
+                    tflite::TensorType tensorType,
+                    std::vector<armnn::BackendId>& backends,
+                    std::vector<int32_t>& input0Shape,
+                    std::vector<int32_t>& input1Shape,
+                    std::vector<int32_t>& outputShape,
+                    std::vector<T>& input0Values,
+                    std::vector<T>& input1Values,
+                    std::vector<bool>& expectedOutputValues,
+                    float quantScale = 1.0f,
+                    int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateComparisonTfLiteModel(comparisonOperatorCode,
+                                                                tensorType,
+                                                                input0Shape,
+                                                                input1Shape,
+                                                                outputShape,
+                                                                quantScale,
+                                                                quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInput0Id = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInput0Data = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInput0Id);
+    for (unsigned int i = 0; i < input0Values.size(); ++i)
+    {
+        tfLiteDelageInput0Data[i] = input0Values[i];
+    }
+
+    auto tfLiteDelegateInput1Id = tfLiteInterpreter->inputs()[1];
+    auto tfLiteDelageInput1Data = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInput1Id);
+    for (unsigned int i = 0; i < input1Values.size(); ++i)
+    {
+        tfLiteDelageInput1Data[i] = input1Values[i];
+    }
+
+    auto armnnDelegateInput0Id = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInput0Data = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInput0Id);
+    for (unsigned int i = 0; i < input0Values.size(); ++i)
+    {
+        armnnDelegateInput0Data[i] = input0Values[i];
+    }
+
+    auto armnnDelegateInput1Id = armnnDelegateInterpreter->inputs()[1];
+    auto armnnDelegateInput1Data = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInput1Id);
+    for (unsigned int i = 0; i < input1Values.size(); ++i)
+    {
+        armnnDelegateInput1Data[i] = input1Values[i];
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<bool>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<bool>(armnnDelegateOutputId);
+
+    armnnDelegate::CompareData(expectedOutputValues  , armnnDelegateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(expectedOutputValues  , tfLiteDelageOutputData , expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelageOutputData, armnnDelegateOutputData, expectedOutputValues.size());
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ControlTest.cpp b/arch/arm/ARMnn/delegate/src/test/ControlTest.cpp
new file mode 100644
index 0000000000..43491be982
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ControlTest.cpp
@@ -0,0 +1,420 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ControlTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// CONCATENATION Operator
+void ConcatUint8TwoInputsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 4, 2 };
+
+    // Set input and output data
+    std::vector<std::vector<uint8_t>> inputValues;
+    std::vector<uint8_t> inputValue1 { 0, 1, 2, 3 }; // Lower bounds
+    std::vector<uint8_t> inputValue2 { 252, 253, 254, 255 }; // Upper bounds
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue2);
+
+    std::vector<uint8_t> expectedOutputValues { 0, 1, 2, 3, 252, 253, 254, 255 };
+
+    ConcatenationTest<uint8_t>(tflite::BuiltinOperator_CONCATENATION,
+                               ::tflite::TensorType_UINT8,
+                               backends,
+                               inputShape,
+                               expectedOutputShape,
+                               inputValues,
+                               expectedOutputValues);
+}
+
+void ConcatInt16TwoInputsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 4, 2 };
+
+    std::vector<std::vector<int16_t>> inputValues;
+    std::vector<int16_t> inputValue1 { -32768, -16384, -1, 0 };
+    std::vector<int16_t> inputValue2 { 1, 2, 16384, 32767 };
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue2);
+
+    std::vector<int16_t> expectedOutputValues { -32768, -16384, -1, 0, 1, 2, 16384, 32767};
+
+    ConcatenationTest<int16_t>(tflite::BuiltinOperator_CONCATENATION,
+                               ::tflite::TensorType_INT16,
+                               backends,
+                               inputShape,
+                               expectedOutputShape,
+                               inputValues,
+                               expectedOutputValues);
+}
+
+void ConcatFloat32TwoInputsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 4, 2 };
+
+    std::vector<std::vector<float>> inputValues;
+    std::vector<float> inputValue1 { -127.f, -126.f, -1.f, 0.f };
+    std::vector<float> inputValue2 { 1.f, 2.f, 126.f, 127.f };
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue2);
+
+    std::vector<float> expectedOutputValues { -127.f, -126.f, -1.f, 0.f, 1.f, 2.f, 126.f, 127.f };
+
+    ConcatenationTest<float>(tflite::BuiltinOperator_CONCATENATION,
+                             ::tflite::TensorType_FLOAT32,
+                             backends,
+                             inputShape,
+                             expectedOutputShape,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+void ConcatThreeInputsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 6, 2 };
+
+    std::vector<std::vector<uint8_t>> inputValues;
+    std::vector<uint8_t> inputValue1 { 0, 1, 2, 3 };
+    std::vector<uint8_t> inputValue2 { 125, 126, 127, 128 };
+    std::vector<uint8_t> inputValue3 { 252, 253, 254, 255 };
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue2);
+    inputValues.push_back(inputValue3);
+
+    std::vector<uint8_t> expectedOutputValues { 0, 1, 2, 3, 125, 126, 127, 128, 252, 253, 254, 255 };
+
+    ConcatenationTest<uint8_t>(tflite::BuiltinOperator_CONCATENATION,
+                               ::tflite::TensorType_UINT8,
+                               backends,
+                               inputShape,
+                               expectedOutputShape,
+                               inputValues,
+                               expectedOutputValues);
+}
+
+void ConcatAxisTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 4 };
+
+    std::vector<std::vector<uint8_t>> inputValues;
+    std::vector<uint8_t> inputValue1 { 0, 1, 2, 3 };
+    std::vector<uint8_t> inputValue3 { 252, 253, 254, 255 };
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue3);
+
+    std::vector<uint8_t> expectedOutputValues { 0, 1, 252, 253, 2, 3, 254, 255 };
+
+    ConcatenationTest<uint8_t>(tflite::BuiltinOperator_CONCATENATION,
+                               ::tflite::TensorType_UINT8,
+                               backends,
+                               inputShape,
+                               expectedOutputShape,
+                               inputValues,
+                               expectedOutputValues,
+                               2);
+}
+
+// MEAN Operator
+void MeanUint8KeepDimsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 3 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1 };
+
+    std::vector<uint8_t> input0Values { 5, 10, 15 }; // Inputs
+    std::vector<int32_t> input1Values { 1 }; // Axis
+
+    std::vector<uint8_t> expectedOutputValues { 10 };
+
+    MeanTest<uint8_t>(tflite::BuiltinOperator_MEAN,
+                      ::tflite::TensorType_UINT8,
+                      backends,
+                      input0Shape,
+                      input1Shape,
+                      expectedOutputShape,
+                      input0Values,
+                      input1Values,
+                      expectedOutputValues,
+                      true);
+}
+
+void MeanUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 2, 2 };
+
+    std::vector<uint8_t> input0Values { 5, 10, 15, 20 }; // Inputs
+    std::vector<int32_t> input1Values { 0 }; // Axis
+
+    std::vector<uint8_t> expectedOutputValues { 5, 10, 15, 20 };
+
+    MeanTest<uint8_t>(tflite::BuiltinOperator_MEAN,
+                      ::tflite::TensorType_UINT8,
+                      backends,
+                      input0Shape,
+                      input1Shape,
+                      expectedOutputShape,
+                      input0Values,
+                      input1Values,
+                      expectedOutputValues,
+                      false);
+}
+
+void MeanFp32KeepDimsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 2 };
+
+    std::vector<float>   input0Values { 1.0f, 1.5f, 2.0f, 2.5f }; // Inputs
+    std::vector<int32_t> input1Values { 1 }; // Axis
+
+    std::vector<float>   expectedOutputValues { 1.5f, 2.0f };
+
+    MeanTest<float>(tflite::BuiltinOperator_MEAN,
+                    ::tflite::TensorType_FLOAT32,
+                    backends,
+                    input0Shape,
+                    input1Shape,
+                    expectedOutputShape,
+                    input0Values,
+                    input1Values,
+                    expectedOutputValues,
+                    true);
+}
+
+void MeanFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 1 };
+
+    std::vector<float>   input0Values { 1.0f, 1.5f, 2.0f, 2.5f }; // Inputs
+    std::vector<int32_t> input1Values { 2 }; // Axis
+
+    std::vector<float>   expectedOutputValues { 1.25f, 2.25f };
+
+    MeanTest<float>(tflite::BuiltinOperator_MEAN,
+                    ::tflite::TensorType_FLOAT32,
+                    backends,
+                    input0Shape,
+                    input1Shape,
+                    expectedOutputShape,
+                    input0Values,
+                    input1Values,
+                    expectedOutputValues,
+                    false);
+}
+
+// CONCATENATION Tests.
+TEST_SUITE("Concatenation_CpuAccTests")
+{
+
+TEST_CASE ("Concatenation_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatUint8TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Int16_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatInt16TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Float32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatFloat32TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Three_Inputs_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatThreeInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Axis_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatAxisTest(backends);
+}
+
+}
+
+TEST_SUITE("Concatenation_GpuAccTests")
+{
+
+TEST_CASE ("Concatenation_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatUint8TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Int16_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatInt16TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Float32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatFloat32TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Three_Inputs_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatThreeInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Axis_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatAxisTest(backends);
+}
+
+}
+
+TEST_SUITE("Concatenation_CpuRefTests")
+{
+
+TEST_CASE ("Concatenation_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatUint8TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Int16_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatInt16TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Float32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatFloat32TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Three_Inputs_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatThreeInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Axis_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatAxisTest(backends);
+}
+
+}
+
+// MEAN Tests
+TEST_SUITE("Mean_CpuAccTests")
+{
+
+TEST_CASE ("Mean_Uint8_KeepDims_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    MeanUint8KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    MeanUint8Test(backends);
+}
+
+TEST_CASE ("Mean_Fp32_KeepDims_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    MeanFp32KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    MeanFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("Mean_GpuAccTests")
+{
+
+TEST_CASE ("Mean_Uint8_KeepDims_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    MeanUint8KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    MeanUint8Test(backends);
+}
+
+TEST_CASE ("Mean_Fp32_KeepDims_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    MeanFp32KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    MeanFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("Mean_CpuRefTests")
+{
+
+TEST_CASE ("Mean_Uint8_KeepDims_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    MeanUint8KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    MeanUint8Test(backends);
+}
+
+TEST_CASE ("Mean_Fp32_KeepDims_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    MeanFp32KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    MeanFp32Test(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ControlTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ControlTestHelper.hpp
new file mode 100644
index 0000000000..0c9796170d
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ControlTestHelper.hpp
@@ -0,0 +1,344 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreateConcatTfLiteModel(tflite::BuiltinOperator controlOperatorCode,
+                                          tflite::TensorType tensorType,
+                                          std::vector<int32_t>& inputTensorShape,
+                                          const std::vector <int32_t>& outputTensorShape,
+                                          const int32_t inputTensorNum,
+                                          int32_t axis = 0,
+                                          float quantScale = 1.0f,
+                                          int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::vector<int32_t> operatorInputs{};
+    const std::vector<int32_t> operatorOutputs{inputTensorNum};
+    std::vector<int> subgraphInputs{};
+    const std::vector<int> subgraphOutputs{inputTensorNum};
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors(inputTensorNum + 1);
+    for (int i = 0; i < inputTensorNum; ++i)
+    {
+        tensors[i] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                          inputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("input" + std::to_string(i)),
+                                  quantizationParameters);
+
+        // Add number of inputs to vector.
+        operatorInputs.push_back(i);
+        subgraphInputs.push_back(i);
+    }
+
+    // Create output tensor
+    tensors[inputTensorNum] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_ConcatenationOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateConcatenationOptions(flatBufferBuilder, axis).Union();
+
+    flatbuffers::Offset <Operator> controlOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Concatenation Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, controlOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+std::vector<char> CreateMeanTfLiteModel(tflite::BuiltinOperator controlOperatorCode,
+                                        tflite::TensorType tensorType,
+                                        std::vector<int32_t>& input0TensorShape,
+                                        std::vector<int32_t>& input1TensorShape,
+                                        const std::vector <int32_t>& outputTensorShape,
+                                        std::vector<int32_t>& axisData,
+                                        const bool keepDims,
+                                        float quantScale = 1.0f,
+                                        int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 2> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input0TensorShape.data(),
+                                                                      input0TensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input1TensorShape.data(),
+                                                                      input1TensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+
+    // Create output tensor
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator. Mean uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_ReducerOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateReducerOptions(flatBufferBuilder, keepDims).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1} };
+    const std::vector<int> operatorOutputs{ 2 };
+    flatbuffers::Offset <Operator> controlOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{ 2 };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Mean Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, controlOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void ConcatenationTest(tflite::BuiltinOperator controlOperatorCode,
+                       tflite::TensorType tensorType,
+                       std::vector<armnn::BackendId>& backends,
+                       std::vector<int32_t>& inputShapes,
+                       std::vector<int32_t>& expectedOutputShape,
+                       std::vector<std::vector<T>>& inputValues,
+                       std::vector<T>& expectedOutputValues,
+                       int32_t axis = 0,
+                       float quantScale = 1.0f,
+                       int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateConcatTfLiteModel(controlOperatorCode,
+                                                            tensorType,
+                                                            inputShapes,
+                                                            expectedOutputShape,
+                                                            inputValues.size(),
+                                                            axis,
+                                                            quantScale,
+                                                            quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data for all input tensors.
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        // Get single input tensor and assign to interpreters.
+        auto inputTensorValues = inputValues[i];
+        armnnDelegate::FillInput<T>(tfLiteInterpreter, i, inputTensorValues);
+        armnnDelegate::FillInput<T>(armnnDelegateInterpreter, i, inputTensorValues);
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+template <typename T>
+void MeanTest(tflite::BuiltinOperator controlOperatorCode,
+              tflite::TensorType tensorType,
+              std::vector<armnn::BackendId>& backends,
+              std::vector<int32_t>& input0Shape,
+              std::vector<int32_t>& input1Shape,
+              std::vector<int32_t>& expectedOutputShape,
+              std::vector<T>& input0Values,
+              std::vector<int32_t>& input1Values,
+              std::vector<T>& expectedOutputValues,
+              const bool keepDims,
+              float quantScale = 1.0f,
+              int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateMeanTfLiteModel(controlOperatorCode,
+                                                          tensorType,
+                                                          input0Shape,
+                                                          input1Shape,
+                                                          expectedOutputShape,
+                                                          input1Values,
+                                                          keepDims,
+                                                          quantScale,
+                                                          quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, input0Values);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, input0Values);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/Convolution2dTest.cpp b/arch/arm/ARMnn/delegate/src/test/Convolution2dTest.cpp
new file mode 100644
index 0000000000..b2e5fad8df
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/Convolution2dTest.cpp
@@ -0,0 +1,489 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConvolutionTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void Conv2DWithBiasesFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 5, 5, 1 };
+    std::vector<int32_t> filterShape { 1, 3, 3, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    static std::vector<float> inputValues =
+        {
+            1, 5, 2, 3, 5,
+            8, 7, 3, 6, 3,
+            3, 3, 9, 1, 9,
+            4, 1, 8, 1, 3,
+            6, 8, 1, 9, 2
+        };
+
+    std::vector<float> filterValues =
+        {
+            4, 5, 6,
+            0, 0, 0,
+            3, 2, 1
+        };
+
+    std::vector<float> biasValues = { 0 };
+
+    std::vector<float> expectedOutputValues =
+        {
+            23, 33, 24,
+            91, 99, 48,
+            26, 50, 19
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<float>(tflite::BuiltinOperator_CONV_2D,
+                                 ::tflite::TensorType_FLOAT32,
+                                 2, // strideX
+                                 2, // strideY
+                                 1, // dilationX
+                                 1, // dilationY
+                                 padding,
+                                 tflite::ActivationFunctionType_NONE,
+                                 backends,
+                                 inputShape,
+                                 filterShape,
+                                 outputShape,
+                                 inputValues,
+                                 filterValues,
+                                 expectedOutputValues,
+                                 biasShape,
+                                 biasValues);
+}
+
+void Conv2DWithBiasesInt8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    static std::vector<int8_t> inputValues = { 1, 2, 3, 4 };
+
+    std::vector<int8_t> filterValues = { 2, 1, 0, 6 };
+
+    std::vector<int32_t> biasValues = { 10 };
+
+    std::vector<int8_t> expectedOutputValues =
+        {
+            (1 * 2 + 2 * 1 + 3 * 0 + 4 * 6 + 10) / 2, // 19
+            (2 * 2 + 0 * 1 + 4 * 0 + 0 * 6 + 10) / 2, // 7
+            (3 * 2 + 4 * 1 + 0 * 0 + 0 * 6 + 10) / 2, // 10
+            (4 * 2 + 0 * 1 + 0 * 0 + 0 * 6 + 10) / 2,  // 9
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<int8_t, int32_t>(tflite::BuiltinOperator_CONV_2D,
+                                            ::tflite::TensorType_INT8,
+                                            1, // strideX
+                                            1, // strideY
+                                            1, // dilationX
+                                            1, // dilationY
+                                            padding,
+                                            tflite::ActivationFunctionType_NONE,
+                                            backends,
+                                            inputShape,
+                                            filterShape,
+                                            outputShape,
+                                            inputValues,
+                                            filterValues,
+                                            expectedOutputValues,
+                                            biasShape,
+                                            biasValues);
+}
+
+void Conv2DWithBiasesReluUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    static std::vector<uint8_t> inputValues = { 1, 2, 4, 8 };
+
+    std::vector<uint8_t> filterValues = { 2, 1, 0, 6 };
+
+    std::vector<int32_t> biasValues = { 16 };
+
+    // factors to consider:
+    // - the filter zero point is non zero, hence the (x-fz)
+    // - the output scale is 2 hence the /2
+    // - output zero point is non zero, hence the +outZero
+    // - RELU cuts negative values and then we add the output zero point
+    uint8_t bias = 16;
+    uint8_t outZero = 20;
+    uint8_t fz = 4; // filter zero point
+
+    std::vector<uint8_t> expectedOutputValues =
+        {
+            std::max(outZero, static_cast<uint8_t>((1*(2-fz) + 2*(1-fz) + 4*(0-fz) + 8*(6-fz) + bias)/2 + outZero)),
+            std::max(outZero, static_cast<uint8_t>((2*(2-fz) + 0*(1-fz) + 8*(0-fz) + 0*(6-fz) + bias)/2 + outZero)),
+            std::max(outZero, static_cast<uint8_t>((4*(2-fz) + 8*(1-fz) + 0*(0-fz) + 0*(6-fz) + bias)/2 + outZero)),
+            std::max(outZero, static_cast<uint8_t>((8*(2-fz) + 0*(1-fz) + 0*(0-fz) + 0*(6-fz) + bias)/2 + outZero))
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<uint8_t, int32_t>(tflite::BuiltinOperator_CONV_2D,
+                                            ::tflite::TensorType_UINT8,
+                                            1, // strideX
+                                            1, // strideY
+                                            1, // dilationX
+                                            1, // dilationY
+                                            padding,
+                                            tflite::ActivationFunctionType_RELU,
+                                            backends,
+                                            inputShape,
+                                            filterShape,
+                                            outputShape,
+                                            inputValues,
+                                            filterValues,
+                                            expectedOutputValues,
+                                            biasShape,
+                                            biasValues,
+                                            {1.0f}, // biasScale
+                                            {0},    // biasOffset
+                                            {1.0f}, // filterScale
+                                            {4},    // filterOffsets
+                                            2, // output scale
+                                            20); // output offset
+}
+
+void Conv2DWithBiasesRelu6Uint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    static std::vector<uint8_t> inputValues = { 1, 2, 4, 1 };
+
+    std::vector<uint8_t> filterValues = { 2, 1, 0, 6 };
+
+    std::vector<int32_t> biasValues = { 0 };
+
+    // factors to consider:
+    // - the output scale is 2 hence the /2
+    // - RELU6 cuts output values at +6
+    uint8_t relu6Min = 6 / 2; // divide by output scale
+
+    std::vector<uint8_t> expectedOutputValues =
+        {
+            std::min(relu6Min, static_cast<uint8_t>((1 * 2 + 2 * 1 + 4 * 0 + 1 * 6) / 2)),
+            std::min(relu6Min, static_cast<uint8_t>((2 * 2 + 0 * 1 + 1 * 0 + 0 * 6) / 2)),
+            std::min(relu6Min, static_cast<uint8_t>((4 * 2 + 1 * 1 + 0 * 0 + 0 * 6) / 2)),
+            std::min(relu6Min, static_cast<uint8_t>((1 * 2 + 0 * 1 + 0 * 0 + 0 * 6) / 2))
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<uint8_t, int32_t>(tflite::BuiltinOperator_CONV_2D,
+                                            ::tflite::TensorType_UINT8,
+                                            1, // strideX
+                                            1, // strideY
+                                            1, // dilationX
+                                            1, // dilationY
+                                            padding,
+                                            tflite::ActivationFunctionType_RELU6,
+                                            backends,
+                                            inputShape,
+                                            filterShape,
+                                            outputShape,
+                                            inputValues,
+                                            filterValues,
+                                            expectedOutputValues,
+                                            biasShape,
+                                            biasValues);
+}
+
+
+void Conv2DPerChannelInt8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape  { 1,4,4,2 };
+    std::vector<int32_t> filterShape { 4,2,2,2 };
+    std::vector<int32_t> biasShape   { 4 };
+    std::vector<int32_t> outputShape { 1,4,4,4 };
+
+    static std::vector<int8_t> inputValues =
+        {
+            -11, 40,-26, 11,-28,  8,  0, -8,
+            -10, 34, 47,  0,-33,-14, 28, 35,
+              6,-28,-26,  8, 13, 33,-31,-41,
+             31,-20,-31,-16,  8,-18,-44,  0
+        };
+
+    std::vector<float>  filterScales = { 1.858268, 2.0, 1.992126, 1.905512 };
+    int32_t filterQuantizationDim    = 0;
+    std::vector<int8_t> filterValues =
+        {
+             13,-44,  5,-14, 21,-45, 36,-25,
+            -42, -2, 24,-30,-31, 35, 43,-30,
+            -20, -5, 25, 17, 18, 20,  4,-46,
+            -49,  9, -3,-20, 46,  5,  7,-15
+        };
+
+    std::vector<int32_t> biasValues = { 0,0,0,0 };
+    std::vector<float>   biasScales = { 0.721445, 0.7764700055, 0.773414, 0.739787 };
+
+    std::vector<int8_t> expectedOutputValues =
+        {
+               -1,  9,  3, 5, 1, -1,  5,  9,
+                2,  7, -1, 2, 2,  4,  5,  6,
+                1,  1,  4, 4, 2,  0, -4, -3,
+                0,  6, 12, 6, 3,  0, -1, -2,
+                7, -4,  4, 4, 3,  6,  6,  2,
+                0, -3, -1, 4, 4,  8,  3,  1,
+                5,  0,  0, 1, 4,  7,  4,  6,
+                4,  0,  1, 2, 2,  7,  5,  7
+        };
+    float outputQuantScale  = 401.960785f;
+    int   outputQuantOffset = 3;
+    float inputQuantScale   = 0.388235f;
+    int   inputQuantOffset  = 1;
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<int8_t, int32_t>(tflite::BuiltinOperator_CONV_2D,
+                                            ::tflite::TensorType_INT8,
+                                            1, // strideX
+                                            1, // strideY
+                                            1, // dilationX
+                                            1, // dilationY
+                                            padding,
+                                            tflite::ActivationFunctionType_NONE,
+                                            backends,
+                                            inputShape,
+                                            filterShape,
+                                            outputShape,
+                                            inputValues,
+                                            filterValues,
+                                            expectedOutputValues,
+                                            biasShape,
+                                            biasValues,
+                                            biasScales,
+                                            {0,0,0,0},
+                                            filterScales,
+                                            {0,0,0,0},
+                                            outputQuantScale,
+                                            outputQuantOffset,
+                                            inputQuantScale,
+                                            inputQuantOffset,
+                                            1, // depth_multiplier is ignored for conv2d value doesn't matter
+                                            filterQuantizationDim);
+}
+
+TEST_SUITE("Convolution2dTest_CpuRefTests")
+{
+
+TEST_CASE ("Conv2DWithBiases_Fp32_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv2DWithBiasesFp32Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Int8_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv2DWithBiasesInt8Test(backends);
+}
+
+TEST_CASE ("Conv2DPerChannel_Int8_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv2DPerChannelInt8Test(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_CpuRef")
+
+TEST_SUITE("Convolution2dTest_CpuAccTests")
+{
+
+TEST_CASE ("Conv2DWithBiases_Fp32_CpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+Conv2DWithBiasesFp32Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Int8_CpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+Conv2DWithBiasesInt8Test(backends);
+}
+
+TEST_CASE ("Conv2DPerChannel_Int8_CpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    Conv2DPerChannelInt8Test(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_CpuAcc")
+
+TEST_SUITE("Convolution2dTest_GpuAccTests")
+{
+
+TEST_CASE ("Conv2DWithBiases_Fp32_GpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+Conv2DWithBiasesFp32Test(backends);
+}
+
+TEST_CASE ("Conv2DWithBiases_Int8_GpuAcc_Test")
+{
+std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+Conv2DWithBiasesInt8Test(backends);
+}
+
+TEST_CASE ("Conv2DPerChannel_Int8_GpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    Conv2DPerChannelInt8Test(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_GpuAcc")
+
+void TransposeConvInt8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> transposeTensorShape { 4 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    std::vector<int32_t> transposeData = { 1, 3, 3, 1 };
+    static std::vector<int8_t> inputValues = { 1, 2, 3, 4 };
+    std::vector<int8_t> filterValues = { 0, 1, 2, 4 };
+    std::vector<int8_t> expectedOutputValues =
+        {
+            0, 1,  2,
+            2, 11, 12,
+            6, 20, 16
+        };
+
+    tflite::Padding padding = tflite::Padding_VALID;
+    TransposeConvTest<int8_t>(backends,
+                             ::tflite::TensorType_INT8,
+                             1, // strideX
+                             1, // strideY
+                             padding,
+                             transposeTensorShape,
+                             filterShape,
+                             inputShape,
+                             outputShape,
+                             transposeData,
+                             filterValues,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+void TransposeConvFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> transposeTensorShape { 4 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 1 };
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    std::vector<int32_t> transposeData = { 1, 3, 3, 1 };
+    static std::vector<float> inputValues = { 1, 2, 3, 4 };
+    std::vector<float> filterValues = { 0, 1, 2, 4 };
+    std::vector<float> expectedOutputValues =
+        {
+            0, 1,  2,
+            2, 11, 12,
+            6, 20, 16
+        };
+
+    tflite::Padding padding = tflite::Padding_VALID;
+    TransposeConvTest<float>(backends,
+                             ::tflite::TensorType_FLOAT32,
+                             1, // strideX
+                             1, // strideY
+                             padding,
+                             transposeTensorShape,
+                             filterShape,
+                             inputShape,
+                             outputShape,
+                             transposeData,
+                             filterValues,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+TEST_SUITE("TransposeConv_CpuRef_Test")
+{
+
+TEST_CASE ("TransposeConv_Fp32_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    TransposeConvFp32Test(backends);
+}
+
+TEST_CASE ("TransposeConv_Int8_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    TransposeConvInt8Test(backends);
+}
+
+} // End of  TEST_SUITE(TransposeConv_CpuRef_Test)
+
+TEST_SUITE("TransposeConv_CpuAcc_Test")
+{
+
+TEST_CASE ("TransposeConv_Fp32_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    TransposeConvFp32Test(backends);
+}
+
+TEST_CASE ("TransposeConv_Int8_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    TransposeConvInt8Test(backends);
+}
+
+} // End of  TEST_SUITE(TransposeConv_CpuAcc_Test)
+
+TEST_SUITE("TransposeConv_GpuAcc_Test")
+{
+
+TEST_CASE ("TransposeConv_Fp32_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    TransposeConvFp32Test(backends);
+}
+
+TEST_CASE ("TransposeConv_Int8_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    TransposeConvInt8Test(backends);
+}
+
+} // End of  TEST_SUITE(TransposeConv_GpuAcc_Test)
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/Convolution3dTest.cpp b/arch/arm/ARMnn/delegate/src/test/Convolution3dTest.cpp
new file mode 100644
index 0000000000..06883f186d
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/Convolution3dTest.cpp
@@ -0,0 +1,318 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConvolutionTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// Conv3d is currently only supports Float32 inputs, filter, bias and outputs in TFLite.
+// Conv3d is only correctly supported for external delegates from TF Lite v2.6, as there was a breaking bug in v2.5.
+#if defined(ARMNN_POST_TFLITE_2_5)
+
+// Create a vector from 0 to size divided to create smaller floating point values.
+template <typename T>
+std::vector<T> CreateFloatData(int32_t size, float divisor)
+{
+    std::vector<float> data;
+    for (int32_t i = 0; i < size; ++i)
+    {
+        float value = static_cast<float>(i);
+        data.push_back(value/divisor);
+    }
+    return data;
+}
+
+void Conv3DWithBiasesSimpleWithPaddingFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 2, 1 };
+    std::vector<int32_t> filterShape { 2, 2, 2, 1, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 2, 1 };
+
+    static std::vector<float> inputValues =
+    {
+        1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f
+    };
+
+    std::vector<float> filterValues =
+    {
+        2.f,1.f, 1.f,0.f, 0.f,1.f, 1.f,1.f
+    };
+
+    std::vector<float> biasValues = { 5.f };
+
+    std::vector<float> expectedOutputValues =
+    {
+       33.f, 21.f, 23.f, 13.f, 28.f, 25.f, 27.f, 21.f
+    };
+
+    Convolution3dTest<float>(tflite::BuiltinOperator_CONV_3D,
+                             ::tflite::TensorType_FLOAT32,
+                             { 1, 1, 1 }, // strideX, strideY, strideZ
+                             { 1, 1, 1 }, // dilationX, dilationY, dilationZ
+                             tflite::Padding_SAME,
+                             tflite::ActivationFunctionType_NONE,
+                             backends,
+                             inputShape,
+                             filterShape,
+                             outputShape,
+                             inputValues,
+                             filterValues,
+                             expectedOutputValues,
+                             biasShape,
+                             biasValues);
+}
+
+void Conv3DWithBiasesStridesFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 3, 10, 10, 1 };
+    std::vector<int32_t> filterShape { 3, 5, 5, 1, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 1, 3, 3, 1 };
+
+    std::vector<float> inputValues = CreateFloatData<float>(300, 1.0f);
+
+    std::vector<float> filterValues =
+    {
+        1.f, 1.f, 1.f, 1.f, 1.f,
+        1.f, 1.f, 1.f, 1.f, 1.f,
+        1.f, 1.f, 1.f, 1.f, 1.f,
+        1.f, 1.f, 1.f, 1.f, 1.f,
+        1.f, 1.f, 1.f, 1.f, 1.f,
+
+        0.f, 0.f, 0.f, 0.f, 0.f,
+        0.f, 0.f, 0.f, 0.f, 0.f,
+        0.f, 0.f, 0.f, 0.f, 0.f,
+        0.f, 0.f, 0.f, 0.f, 0.f,
+        0.f, 0.f, 0.f, 0.f, 0.f,
+
+        2.f, 2.f, 2.f, 2.f, 2.f,
+        2.f, 2.f, 2.f, 2.f, 2.f,
+        2.f, 2.f, 2.f, 2.f, 2.f,
+        2.f, 2.f, 2.f, 2.f, 2.f,
+        2.f, 2.f, 2.f, 2.f, 2.f
+    };
+
+    std::vector<float> biasValues = { 10.f };
+
+    std::vector<float> expectedOutputValues =
+    {
+        11660.f, 11810.f, 11960.f,
+
+        13160.f, 13310.f, 13460.f,
+
+        14660.f, 14810.f, 14960.f
+    };
+
+    Convolution3dTest<float>(tflite::BuiltinOperator_CONV_3D,
+                             ::tflite::TensorType_FLOAT32,
+                             { 2, 2, 2 }, // strideX, strideY, strideZ
+                             { 1, 1, 1 }, // dilationX, dilationY, dilationZ
+                             tflite::Padding_VALID,
+                             tflite::ActivationFunctionType_NONE,
+                             backends,
+                             inputShape,
+                             filterShape,
+                             outputShape,
+                             inputValues,
+                             filterValues,
+                             expectedOutputValues,
+                             biasShape,
+                             biasValues);
+}
+
+
+void Conv3DWithBiasesDilationFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 5, 5, 5, 2 };
+    std::vector<int32_t> filterShape { 2, 2, 2, 2, 2 };
+    std::vector<int32_t> biasShape { 2 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 2, 2 };
+
+    std::vector<float> inputValues = CreateFloatData<float>(250, 1.0f);
+
+    std::vector<float> filterValues =
+    {
+        -1.f, -1.f,  -1.f, -1.f,  -1.f, -1.f,  -1.f, -1.f,  -1.f, -1.f,  -1.f,  1.f,   1.f,  1.f,  -1.f, -1.f,
+         1.f,  1.f,  -1.f,  1.f,  -1.f,  1.f,  -1.f,  1.f,  -1.f, -1.f,  -1.f,  1.f,  -1.f,  1.f,  -1.f,  1.f,
+    };
+
+    std::vector<float> biasValues = { 0.f, 2.f };
+
+    // Since the dilation rate is 3 this will dilate the kernel to be 4x4,
+    // therefore the output will be 2x2
+    std::vector<float> expectedOutputValues =
+    {
+        -1124.f, 976.f,
+        -1148.f, 980.f,
+
+        -1244.f, 996.f,
+        -1268.f, 1000.f,
+
+        -1724.f, 1076.f,
+        -1748.f, 1080.f,
+
+        -1844.f, 1096.f,
+        -1868.f, 1100.f
+    };
+
+    Convolution3dTest<float>(tflite::BuiltinOperator_CONV_3D,
+                             ::tflite::TensorType_FLOAT32,
+                             { 1, 1, 1 }, // strideX, strideY, strideZ
+                             { 3, 3, 3 }, // dilationX, dilationY, dilationZ
+                             tflite::Padding_VALID,
+                             tflite::ActivationFunctionType_NONE,
+                             backends,
+                             inputShape,
+                             filterShape,
+                             outputShape,
+                             inputValues,
+                             filterValues,
+                             expectedOutputValues,
+                             biasShape,
+                             biasValues);
+}
+
+void Conv3DFp32SmallTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 3, 10, 10, 1 };
+    std::vector<int32_t> filterShape { 3, 3, 3, 1, 1 };
+    std::vector<int32_t> biasShape { 1 };
+    std::vector<int32_t> outputShape { 1, 1, 4, 4, 1 };
+
+    std::vector<float> inputValues = CreateFloatData<float>(300, 100.0f);
+
+    std::vector<float> filterValues =
+    {
+         0.125977f,  0.150391f,  0.101562f,
+         0.0585938f, 0.0864258f, 0.043457f,
+         0.034668f,  0.0322266f, 0.0385742f,
+
+         0.125977f,  0.150391f, -0.101562f,
+        -0.0585938f,-0.0864258f,-0.043457f,
+        -0.0104630f, 0.0154114f, 0.0013768f,
+
+         0.0344238f, 0.035644f,  0.0495605f,
+         0.0683594f, 0.099121f, -0.0461426f,
+        -0.0996094f,-0.126953f, -0.043457f,
+    };
+
+    std::vector<float> biasValues = { 0 };
+
+    std::vector<float> expectedOutputValues =
+    {
+        -0.08156067f, -0.06891209f, -0.05589598f, -0.04310101f,
+         0.04584253f,  0.05855697f,  0.07129729f,  0.08325434f,
+         0.17304349f,  0.18521416f,  0.19818866f,  0.21096253f,
+         0.29965734f,  0.312698f,    0.32547557f,  0.33818722f
+    };
+
+    Convolution3dTest<float>(tflite::BuiltinOperator_CONV_3D,
+                             ::tflite::TensorType_FLOAT32,
+                             { 2, 2, 2 }, // strideX, strideY, strideZ
+                             { 1, 1, 1 }, // dilationX, dilationY, dilationZ
+                             tflite::Padding_VALID,
+                             tflite::ActivationFunctionType_NONE,
+                             backends,
+                             inputShape,
+                             filterShape,
+                             outputShape,
+                             inputValues,
+                             filterValues,
+                             expectedOutputValues,
+                             biasShape,
+                             biasValues);
+}
+
+TEST_SUITE("Convolution3dTest_CpuRefTests")
+{
+
+TEST_CASE ("Conv3DWithBiasesSimpleWithPadding_Fp32_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv3DWithBiasesSimpleWithPaddingFp32Test(backends);
+}
+
+TEST_CASE ("Conv3DWithBiasesStrides_Fp32_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv3DWithBiasesStridesFp32Test(backends);
+}
+
+TEST_CASE ("Conv3DWithBiasesDilation_Fp32_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv3DWithBiasesDilationFp32Test(backends);
+}
+
+TEST_CASE ("Conv3DFp32Small_Fp32_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    Conv3DFp32SmallTest(backends);
+}
+
+} //End of TEST_SUITE("Convolution3dTest_CpuRefTests")
+
+TEST_SUITE("Convolution3dTest_CpuAccTests")
+{
+
+TEST_CASE ("Conv3DWithBiasesSimpleWithPadding_Fp32_CpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    Conv3DWithBiasesSimpleWithPaddingFp32Test(backends);
+}
+
+TEST_CASE ("Conv3DWithBiasesStrides_Fp32_CpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    Conv3DWithBiasesStridesFp32Test(backends);
+}
+
+TEST_CASE ("Conv3DFp32Small_Fp32_CpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    Conv3DFp32SmallTest(backends);
+}
+
+} //End of TEST_SUITE("Convolution3dTest_CpuAccTests")
+
+TEST_SUITE("Convolution3dTest_GpuAccTests")
+{
+
+TEST_CASE ("Conv3DWithBiasesSimpleWithPadding_Fp32_GpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    Conv3DWithBiasesSimpleWithPaddingFp32Test(backends);
+}
+
+TEST_CASE ("Conv3DWithBiasesStrides_Fp32_GpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    Conv3DWithBiasesStridesFp32Test(backends);
+}
+
+TEST_CASE ("Conv3DFp32Small_Fp32_GpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    Conv3DFp32SmallTest(backends);
+}
+
+} //End of TEST_SUITE("Convolution3dTest_GpuAccTests")
+
+#endif
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ConvolutionTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ConvolutionTestHelper.hpp
new file mode 100644
index 0000000000..ce1f951d21
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ConvolutionTestHelper.hpp
@@ -0,0 +1,782 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T, typename B = float>
+std::vector<char> CreateConv2dTfLiteModel(tflite::BuiltinOperator convolutionOperatorCode,
+                                          tflite::TensorType tensorType,
+                                          uint32_t strideX,
+                                          uint32_t strideY,
+                                          uint32_t dilationX,
+                                          uint32_t dilationY,
+                                          tflite::Padding padding,
+                                          tflite::ActivationFunctionType fused_activation_function,
+                                          const std::vector <int32_t>& inputTensorShape,
+                                          const std::vector <int32_t>& filterTensorShape,
+                                          const std::vector <int32_t>& biasTensorShape,
+                                          const std::vector <int32_t>& outputTensorShape,
+                                          const std::vector <T>& filterData,
+                                          const std::vector <B>& biasData,
+                                          const std::vector<float> biasScales = {1.0f},
+                                          const std::vector<int64_t> biasOffsets = {0},
+                                          const std::vector<float> filterScales = {1.0f},
+                                          const std::vector<int64_t> filterOffsets = {0},
+                                          float outputQuantScale = 2.0f,
+                                          int outputQuantOffset = 0,
+                                          float quantScale = 1.0f,
+                                          int quantOffset = 0,
+                                          int32_t depth_multiplier = 1,
+                                          int32_t filterQuantizationDim = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(filterData.data()),
+                                                             sizeof(T) * filterData.size()));
+
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                             sizeof(B) * biasData.size()));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+    auto outputQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+
+    auto filterQuantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>(filterScales),
+                                         flatBufferBuilder.CreateVector<int64_t>(filterOffsets),
+                                         tflite::QuantizationDetails_NONE,
+                                         0,
+                                         filterQuantizationDim);
+
+    auto biasQuantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>(biasScales),
+                                         flatBufferBuilder.CreateVector<int64_t>(biasOffsets));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(filterTensorShape.data(),
+                                                                      filterTensorShape.size()),
+                              tensorType,
+                              1,
+                              flatBufferBuilder.CreateString("filter"),
+                              filterQuantizationParameters);
+
+    auto biasTensorType = ::tflite::TensorType_FLOAT32;
+    if (tensorType == ::tflite::TensorType_INT8 || tensorType == ::tflite::TensorType_UINT8)
+    {
+        biasTensorType = ::tflite::TensorType_INT32;
+    }
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(biasTensorShape.data(), biasTensorShape.size()),
+                              biasTensorType,
+                              2,
+                              flatBufferBuilder.CreateString("bias"),
+                              biasQuantizationParameters);
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              outputQuantizationParameters);
+
+    flatbuffers::Offset<void> operatorBuiltinOptions;
+    tflite::BuiltinOptions operatorBuiltinOptionsType;
+
+    if(convolutionOperatorCode == tflite::BuiltinOperator_DEPTHWISE_CONV_2D)
+    {
+        operatorBuiltinOptionsType = tflite::BuiltinOptions_DepthwiseConv2DOptions;
+        operatorBuiltinOptions = CreateDepthwiseConv2DOptions(flatBufferBuilder,
+                                                              padding,
+                                                              strideX,
+                                                              strideY,
+                                                              depth_multiplier,
+                                                              fused_activation_function,
+                                                              dilationX,
+                                                              dilationY).Union();
+    }
+    if(convolutionOperatorCode == tflite::BuiltinOperator_CONV_2D)
+    {
+        operatorBuiltinOptionsType = tflite::BuiltinOptions_Conv2DOptions;
+        operatorBuiltinOptions = CreateConv2DOptions(flatBufferBuilder,
+                                                     padding,
+                                                     strideX,
+                                                     strideY,
+                                                     fused_activation_function,
+                                                     dilationX,
+                                                     dilationY).Union();
+    }
+
+    // create operator
+    const std::vector<int> operatorInputs{0, 1, 2};
+    const std::vector<int> operatorOutputs{3};
+    flatbuffers::Offset <Operator> convolutionOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0, 1, 2};
+    const std::vector<int> subgraphOutputs{3};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&convolutionOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Convolution2d Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, convolutionOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T, typename B = float>
+void ConvolutionTest(tflite::BuiltinOperator convolutionOperatorCode,
+                     tflite::TensorType tensorType,
+                     uint32_t strideX,
+                     uint32_t strideY,
+                     uint32_t dilationX,
+                     uint32_t dilationY,
+                     tflite::Padding padding,
+                     tflite::ActivationFunctionType fused_activation_function,
+                     std::vector<armnn::BackendId>& backends,
+                     std::vector<int32_t>& inputShape,
+                     std::vector<int32_t>& filterShape,
+                     std::vector<int32_t>& outputShape,
+                     std::vector<T>& inputValues,
+                     std::vector<T>& filterValues,
+                     std::vector<T>& expectedOutputValues,
+                     const std::vector<int32_t>& biasShape = {},
+                     const std::vector<B>& biasValues = {},
+                     const std::vector<float> biasScales = {1.0f},
+                     const std::vector<int64_t> biasOffsets = {0},
+                     const std::vector<float> filterScales = {1.0f},
+                     const std::vector<int64_t> filterOffsets = {0},
+                     float outputQuantScale = 2.0f,
+                     int outputQuantOffset = 0,
+                     float quantScale = 1.0f,
+                     int quantOffset = 0,
+                     int32_t depth_multiplier = 1,
+                     int32_t filterQuantizationDim = 3)
+
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer;
+
+    modelBuffer = CreateConv2dTfLiteModel(convolutionOperatorCode,
+                                          tensorType,
+                                          strideX,
+                                          strideY,
+                                          dilationX,
+                                          dilationY,
+                                          padding,
+                                          fused_activation_function,
+                                          inputShape,
+                                          filterShape,
+                                          biasShape,
+                                          outputShape,
+                                          filterValues,
+                                          biasValues,
+                                          biasScales,
+                                          biasOffsets,
+                                          filterScales,
+                                          filterOffsets,
+                                          outputQuantScale,
+                                          outputQuantOffset,
+                                          quantScale,
+                                          quantOffset,
+                                          depth_multiplier,
+                                          filterQuantizationDim);
+
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(tfLiteDelagateOutputData[i] == armnnDelegateOutputData[i]);
+        CHECK(doctest::Approx(tfLiteDelagateOutputData[i]).epsilon(0.000001f) == expectedOutputValues[i]);
+        CHECK(doctest::Approx(armnnDelegateOutputData[i]).epsilon(0.000001f) == expectedOutputValues[i]);
+    }
+}
+
+// Conv3d is only correctly supported for external delegates from TF Lite v2.6, as there was a breaking bug in v2.5.
+#if defined(ARMNN_POST_TFLITE_2_5)
+template <typename T, typename B = float>
+std::vector<char> CreateConv3dTfLiteModel(tflite::BuiltinOperator convolutionOperatorCode,
+                                          tflite::TensorType tensorType,
+                                          std::vector<uint32_t> strides,
+                                          std::vector<uint32_t> dilation,
+                                          tflite::Padding padding,
+                                          tflite::ActivationFunctionType fused_activation_function,
+                                          const std::vector<int32_t>& inputTensorShape,
+                                          const std::vector<int32_t>& filterTensorShape,
+                                          const std::vector<int32_t>& biasTensorShape,
+                                          const std::vector<int32_t>& outputTensorShape,
+                                          const std::vector<T>& filterData,
+                                          const std::vector<B>& biasData,
+                                          const std::vector<float> biasScales = {1.0f},
+                                          const std::vector<int64_t> biasOffsets = {0},
+                                          const std::vector<float> filterScales = {1.0f},
+                                          const std::vector<int64_t> filterOffsets = {0},
+                                          float outputQuantScale = 2.0f,
+                                          int outputQuantOffset = 0,
+                                          float quantScale = 1.0f,
+                                          int quantOffset = 0,
+                                          int32_t depth_multiplier = 1,
+                                          int32_t filterQuantizationDim = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(filterData.data()),
+                                                             sizeof(T) * filterData.size()));
+
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                             sizeof(B) * biasData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+    auto outputQuantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+
+    auto filterQuantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>(filterScales),
+                                         flatBufferBuilder.CreateVector<int64_t>(filterOffsets),
+                                         tflite::QuantizationDetails_NONE,
+                                         0,
+                                         filterQuantizationDim);
+
+    auto biasQuantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>(biasScales),
+                                         flatBufferBuilder.CreateVector<int64_t>(biasOffsets));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(filterTensorShape.data(),
+                                                                      filterTensorShape.size()),
+                              tensorType,
+                              1,
+                              flatBufferBuilder.CreateString("filter"),
+                              filterQuantizationParameters);
+
+    auto biasTensorType = ::tflite::TensorType_FLOAT32;
+    if (tensorType == ::tflite::TensorType_INT8 || tensorType == ::tflite::TensorType_UINT8)
+    {
+        biasTensorType = ::tflite::TensorType_INT32;
+    }
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(biasTensorShape.data(), biasTensorShape.size()),
+                              biasTensorType,
+                              2,
+                              flatBufferBuilder.CreateString("bias"),
+                              biasQuantizationParameters);
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              outputQuantizationParameters);
+
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_Conv3DOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateConv3DOptions(flatBufferBuilder,
+                                                                           padding,
+                                                                           strides[2], // Depth
+                                                                           strides[0], // Width
+                                                                           strides[1], // Height
+                                                                           fused_activation_function,
+                                                                           dilation[2],
+                                                                           dilation[0],
+                                                                           dilation[1]).Union();
+
+    // Create operator
+    const std::vector<int> operatorInputs{0, 1, 2};
+    const std::vector<int> operatorOutputs{3};
+    flatbuffers::Offset <Operator> convolutionOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0, 1, 2};
+    const std::vector<int> subgraphOutputs{3};
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&convolutionOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Convolution 3d Operator Model");
+
+    // If using an operator with a code greater than 127 then the enum value should be passed as the fifth
+    // parameter rather than the second like in other tests.
+    flatbuffers::Offset <OperatorCode> operatorCode =
+            CreateOperatorCode(flatBufferBuilder, 0, 0, 1, tflite::BuiltinOperator_CONV_3D);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T, typename B = float>
+void Convolution3dTest(tflite::BuiltinOperator convolutionOperatorCode,
+                       tflite::TensorType tensorType,
+                       std::vector<uint32_t> strides,
+                       std::vector<uint32_t> dilation,
+                       tflite::Padding padding,
+                       tflite::ActivationFunctionType fused_activation_function,
+                       std::vector<armnn::BackendId>& backends,
+                       std::vector<int32_t>& inputShape,
+                       std::vector<int32_t>& filterShape,
+                       std::vector<int32_t>& outputShape,
+                       std::vector<T>& inputValues,
+                       std::vector<T>& filterValues,
+                       std::vector<T>& expectedOutputValues,
+                       const std::vector<int32_t>& biasShape = {},
+                       const std::vector<B>& biasValues = {},
+                       const std::vector<float> biasScales = {1.0f},
+                       const std::vector<int64_t> biasOffsets = {0},
+                       const std::vector<float> filterScales = {1.0f},
+                       const std::vector<int64_t> filterOffsets = {0},
+                       float outputQuantScale = 2.0f,
+                       int outputQuantOffset = 0,
+                       float quantScale = 1.0f,
+                       int quantOffset = 0,
+                       int32_t depth_multiplier = 1,
+                       int32_t filterQuantizationDim = 3)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer;
+    modelBuffer = CreateConv3dTfLiteModel(convolutionOperatorCode,
+                                          tensorType,
+                                          strides,
+                                          dilation,
+                                          padding,
+                                          fused_activation_function,
+                                          inputShape,
+                                          filterShape,
+                                          biasShape,
+                                          outputShape,
+                                          filterValues,
+                                          biasValues,
+                                          biasScales,
+                                          biasOffsets,
+                                          filterScales,
+                                          filterOffsets,
+                                          outputQuantScale,
+                                          outputQuantOffset,
+                                          quantScale,
+                                          quantOffset,
+                                          depth_multiplier,
+                                          filterQuantizationDim);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+
+    armnnDelegate::CompareData(expectedOutputValues.data(), armnnDelegateOutputData, expectedOutputValues.size(), 1);
+    armnnDelegate::CompareData(expectedOutputValues.data(), tfLiteDelagateOutputData, expectedOutputValues.size(), 1);
+    armnnDelegate::CompareData(tfLiteDelagateOutputData, armnnDelegateOutputData, expectedOutputValues.size(), 1);
+}
+#endif
+
+template <typename T>
+std::vector<char> CreateTransposeConvTfLiteModel(tflite::TensorType tensorType,
+                                                 uint32_t strideX,
+                                                 uint32_t strideY,
+                                                 tflite::Padding padding,
+                                                 const std::vector <int32_t>& transposeTensorShape,
+                                                 const std::vector <int32_t>& filterTensorShape,
+                                                 const std::vector <int32_t>& inputTensorShape,
+                                                 const std::vector <int32_t>& outputTensorShape,
+                                                 const std::vector <int32_t>& transposeData,
+                                                 const std::vector <T>& filterData,
+                                                 float filterScale = 1.0f,
+                                                 int filterOffset = 0,
+                                                 float outputQuantScale = 2.0f,
+                                                 int outputQuantOffset = 0,
+                                                 float quantScale = 1.0f,
+                                                 int quantOffset = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(transposeData.data()),
+                                                             sizeof(int32_t) * transposeData.size()));
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(filterData.data()),
+                                                             sizeof(T) * filterData.size()));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+    auto outputQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+    auto filterQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ filterScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ filterOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(transposeTensorShape.data(),
+                              transposeTensorShape.size()),
+                              tflite::TensorType_INT32,
+                              1);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(filterTensorShape.data(),
+                              filterTensorShape.size()),
+                              tensorType,
+                              2,
+                              flatBufferBuilder.CreateString("filter"),
+                              filterQuantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                              inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                              outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              outputQuantizationParameters);
+
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_TransposeConvOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateTransposeConvOptions(flatBufferBuilder, padding, strideX, strideY).Union();
+
+    // create operator
+    const std::vector<int> operatorInputs{0, 1, 2};
+    const std::vector<int> operatorOutputs{3};
+    flatbuffers::Offset <Operator> convolutionOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0, 1, 2};
+    const std::vector<int> subgraphOutputs{3};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&convolutionOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: TransposeConv Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode =
+        CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_TRANSPOSE_CONV);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void TransposeConvTest(std::vector<armnn::BackendId>& backends,
+                       tflite::TensorType tensorType,
+                       uint32_t strideX,
+                       uint32_t strideY,
+                       tflite::Padding padding,
+                       const std::vector <int32_t>& transposeTensorShape,
+                       const std::vector <int32_t>& filterTensorShape,
+                       const std::vector <int32_t>& inputTensorShape,
+                       const std::vector <int32_t>& outputTensorShape,
+                       const std::vector <int32_t>& transposeData,
+                       const std::vector <T>& filterData,
+                       std::vector<T>& inputValues,
+                       std::vector<T>& expectedOutputValues,
+                       float filterScale = 1.0f,
+                       int filterOffset = 0,
+                       float outputQuantScale = 1.0f,
+                       int outputQuantOffset = 0,
+                       float quantScale = 1.0f,
+                       int quantOffset = 0)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer;
+    modelBuffer = CreateTransposeConvTfLiteModel<T>(tensorType,
+                                                    strideX,
+                                                    strideY,
+                                                    padding,
+                                                    transposeTensorShape,
+                                                    filterTensorShape,
+                                                    inputTensorShape,
+                                                    outputTensorShape,
+                                                    transposeData,
+                                                    filterData,
+                                                    filterScale,
+                                                    filterOffset,
+                                                    outputQuantScale,
+                                                    outputQuantOffset,
+                                                    quantScale,
+                                                    quantOffset);
+
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[2];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[2];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(armnnDelegateOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelagateOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelagateOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+
+} // anonymous namespace
+
+
+
+
diff --git a/arch/arm/ARMnn/delegate/src/test/DelegateOptionsTest.cpp b/arch/arm/ARMnn/delegate/src/test/DelegateOptionsTest.cpp
new file mode 100644
index 0000000000..54f9c8f0e3
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/DelegateOptionsTest.cpp
@@ -0,0 +1,313 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DelegateOptionsTestHelper.hpp"
+#include <common/include/ProfilingGuid.hpp>
+#include <armnnUtils/Filesystem.hpp>
+
+namespace armnnDelegate
+{
+
+TEST_SUITE("DelegateOptions")
+{
+
+TEST_CASE ("ArmnnDelegateOptimizerOptionsReduceFp32ToFp16")
+{
+    std::stringstream ss;
+    {
+        StreamRedirector redirect(std::cout, ss.rdbuf());
+
+        std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+        std::vector<int32_t> tensorShape { 1, 2, 2, 1 };
+        std::vector<float> inputData = { 1, 2, 3, 4 };
+        std::vector<float> divData = { 2, 2, 3, 4 };
+        std::vector<float> expectedResult = { 1, 2, 2, 2 };
+
+        // Enable ReduceFp32ToFp16
+        armnn::OptimizerOptions optimizerOptions(true, true, false, false);
+        armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions);
+
+        DelegateOptionTest<float>(::tflite::TensorType_FLOAT32,
+                                  backends,
+                                  tensorShape,
+                                  inputData,
+                                  inputData,
+                                  divData,
+                                  expectedResult,
+                                  delegateOptions);
+    }
+    // ReduceFp32ToFp16 option is enabled
+    CHECK(ss.str().find("convert_fp32_to_fp16") != std::string::npos);
+    CHECK(ss.str().find("convert_fp16_to_fp32") != std::string::npos);
+}
+
+TEST_CASE ("ArmnnDelegateOptimizerOptionsDebug")
+{
+    std::stringstream ss;
+    {
+        StreamRedirector redirect(std::cout, ss.rdbuf());
+
+        std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+        std::vector<int32_t> tensorShape { 1, 2, 2, 1 };
+        std::vector<float> inputData = { 1, 2, 3, 4 };
+        std::vector<float> divData = { 2, 2, 3, 4 };
+        std::vector<float> expectedResult = { 1, 2, 2, 2 };
+
+        // Enable Debug
+        armnn::OptimizerOptions optimizerOptions(false, true, false, false);
+        armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions);
+
+        DelegateOptionTest<float>(::tflite::TensorType_FLOAT32,
+                                  backends,
+                                  tensorShape,
+                                  inputData,
+                                  inputData,
+                                  divData,
+                                  expectedResult,
+                                  delegateOptions);
+    }
+    // Debug option triggered.
+    CHECK(ss.str().find("layerGuid") != std::string::npos);
+    CHECK(ss.str().find("layerName") != std::string::npos);
+    CHECK(ss.str().find("outputSlot") != std::string::npos);
+    CHECK(ss.str().find("shape") != std::string::npos);
+    CHECK(ss.str().find("data") != std::string::npos);
+}
+
+TEST_CASE ("ArmnnDelegateOptimizerOptionsDebugFunction")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    std::vector<int32_t> tensorShape { 1, 2, 2, 1 };
+    std::vector<float> inputData = { 1, 2, 3, 4 };
+    std::vector<float> divData = { 2, 2, 3, 4 };
+    std::vector<float> expectedResult = { 1, 2, 2, 2 };
+
+    // Enable debug with debug callback function
+    armnn::OptimizerOptions optimizerOptions(false, true, false, false);
+    bool callback = false;
+    auto mockCallback = [&](armnn::LayerGuid guid, unsigned int slotIndex, armnn::ITensorHandle* tensor)
+    {
+        armnn::IgnoreUnused(guid);
+        armnn::IgnoreUnused(slotIndex);
+        armnn::IgnoreUnused(tensor);
+        callback = true;
+    };
+
+    armnn::INetworkProperties networkProperties(false, armnn::MemorySource::Undefined, armnn::MemorySource::Undefined);
+    armnnDelegate::DelegateOptions delegateOptions(backends,
+                                                   optimizerOptions,
+                                                   armnn::EmptyOptional(),
+                                                   armnn::Optional<armnn::DebugCallbackFunction>(mockCallback));
+
+    CHECK(!callback);
+
+    DelegateOptionTest<float>(::tflite::TensorType_FLOAT32,
+                              backends,
+                              tensorShape,
+                              inputData,
+                              inputData,
+                              divData,
+                              expectedResult,
+                              delegateOptions);
+
+    // Check that the debug callback function was called.
+    CHECK(callback);
+}
+
+TEST_CASE ("ArmnnDelegateOptimizerOptionsReduceFp32ToBf16")
+{
+    std::stringstream ss;
+    {
+        StreamRedirector redirect(std::cout, ss.rdbuf());
+
+        ReduceFp32ToBf16TestImpl();
+    }
+
+    // ReduceFp32ToBf16 option is enabled
+    CHECK(ss.str().find("convert_fp32_to_bf16") != std::string::npos);
+}
+
+TEST_CASE ("ArmnnDelegateOptimizerOptionsImport")
+{
+    std::vector<armnn::BackendId> backends = {  armnn::Compute::CpuAcc, armnn::Compute::CpuRef };
+    std::vector<int32_t> tensorShape { 1, 2, 2, 1 };
+    std::vector<uint8_t> inputData = { 1, 2, 3, 4 };
+    std::vector<uint8_t> divData = { 2, 2, 3, 4 };
+    std::vector<uint8_t> expectedResult = { 1, 2, 2, 2 };
+
+    armnn::OptimizerOptions optimizerOptions(false, false, false, true);
+    armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions);
+
+    DelegateOptionTest<uint8_t>(::tflite::TensorType_UINT8,
+                                backends,
+                                tensorShape,
+                                inputData,
+                                inputData,
+                                divData,
+                                expectedResult,
+                                delegateOptions);
+}
+
+}
+
+TEST_SUITE("DelegateOptions_CpuAccTests")
+{
+
+TEST_CASE ("ArmnnDelegateModelOptions_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    std::vector<int32_t> tensorShape { 1, 2, 2, 1 };
+    std::vector<float> inputData = { 1, 2, 3, 4 };
+    std::vector<float> divData = { 2, 2, 3, 4 };
+    std::vector<float> expectedResult = { 1, 2, 2, 2 };
+
+    unsigned int numberOfThreads = 2;
+
+    armnn::ModelOptions modelOptions;
+    armnn::BackendOptions cpuAcc("CpuAcc",
+                                 {
+                                         { "FastMathEnabled", true },
+                                         { "NumberOfThreads", numberOfThreads }
+                                 });
+    modelOptions.push_back(cpuAcc);
+
+    armnn::OptimizerOptions optimizerOptions(false, false, false, false, modelOptions);
+    armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions);
+
+    DelegateOptionTest<float>(::tflite::TensorType_FLOAT32,
+                              backends,
+                              tensorShape,
+                              inputData,
+                              inputData,
+                              divData,
+                              expectedResult,
+                              delegateOptions);
+}
+
+TEST_CASE ("ArmnnDelegateSerializeToDot")
+{
+    const fs::path filename(fs::temp_directory_path() / "ArmnnDelegateSerializeToDot.dot");
+    if ( fs::exists(filename) )
+    {
+        fs::remove(filename);
+    }
+    std::stringstream ss;
+    {
+        StreamRedirector redirect(std::cout, ss.rdbuf());
+
+        std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+        std::vector<int32_t> tensorShape { 1, 2, 2, 1 };
+        std::vector<float> inputData = { 1, 2, 3, 4 };
+        std::vector<float> divData = { 2, 2, 3, 4 };
+        std::vector<float> expectedResult = { 1, 2, 2, 2 };
+
+        armnn::OptimizerOptions optimizerOptions(false, false, false, false);
+        armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions);
+        // Enable serialize to dot by specifying the target file name.
+        delegateOptions.SetSerializeToDot(filename);
+        DelegateOptionTest<float>(::tflite::TensorType_FLOAT32,
+                                  backends,
+                                  tensorShape,
+                                  inputData,
+                                  inputData,
+                                  divData,
+                                  expectedResult,
+                                  delegateOptions);
+    }
+    CHECK(fs::exists(filename));
+    // The file should have a size greater than 0 bytes.
+    CHECK(fs::file_size(filename) > 0);
+    // Clean up.
+    fs::remove(filename);
+}
+
+void CreateFp16StringParsingTestRun(std::vector<std::string>& keys,
+                                    std::vector<std::string>& values,
+                                    std::stringstream& ss)
+{
+    StreamRedirector redirect(std::cout, ss.rdbuf());
+
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    std::vector<int32_t> tensorShape { 1, 2, 2, 1 };
+    std::vector<float> inputData = { 1, 2, 3, 4 };
+    std::vector<float> divData = { 2, 2, 3, 4 };
+    std::vector<float> expectedResult = { 1, 2, 2, 2 };
+
+    // Create options_keys and options_values char array
+    size_t num_options = keys.size();
+    std::unique_ptr<const char*> options_keys =
+            std::unique_ptr<const char*>(new const char*[num_options + 1]);
+    std::unique_ptr<const char*> options_values =
+            std::unique_ptr<const char*>(new const char*[num_options + 1]);
+    for (size_t i=0; i<num_options; ++i)
+    {
+        options_keys.get()[i]   = keys[i].c_str();
+        options_values.get()[i] = values[i].c_str();
+    }
+
+    armnnDelegate::DelegateOptions delegateOptions(options_keys.get(), options_values.get(), num_options, nullptr);
+    DelegateOptionTest<float>(::tflite::TensorType_FLOAT32,
+                              backends,
+                              tensorShape,
+                              inputData,
+                              inputData,
+                              divData,
+                              expectedResult,
+                              delegateOptions);
+}
+
+TEST_CASE ("ArmnnDelegateStringParsingOptionReduceFp32ToFp16")
+{
+    SUBCASE("Fp16=1")
+    {
+        std::stringstream ss;
+        std::vector<std::string> keys   {  "backends", "debug-data", "reduce-fp32-to-fp16", "logging-severity"};
+        std::vector<std::string> values {    "CpuRef",          "1",                   "1",             "info"};
+        CreateFp16StringParsingTestRun(keys, values, ss);
+        CHECK(ss.str().find("convert_fp32_to_fp16") != std::string::npos);
+        CHECK(ss.str().find("convert_fp16_to_fp32") != std::string::npos);
+    }
+    SUBCASE("Fp16=true")
+    {
+        std::stringstream ss;
+        std::vector<std::string> keys   {  "backends", "debug-data", "reduce-fp32-to-fp16"};
+        std::vector<std::string> values {    "CpuRef",       "TRUE",                "true"};
+        CreateFp16StringParsingTestRun(keys, values, ss);
+        CHECK(ss.str().find("convert_fp32_to_fp16") != std::string::npos);
+        CHECK(ss.str().find("convert_fp16_to_fp32") != std::string::npos);
+    }
+    SUBCASE("Fp16=True")
+    {
+        std::stringstream ss;
+        std::vector<std::string> keys   {  "backends", "debug-data", "reduce-fp32-to-fp16"};
+        std::vector<std::string> values {    "CpuRef",       "true",                "True"};
+        CreateFp16StringParsingTestRun(keys, values, ss);
+        CHECK(ss.str().find("convert_fp32_to_fp16") != std::string::npos);
+        CHECK(ss.str().find("convert_fp16_to_fp32") != std::string::npos);
+    }
+    SUBCASE("Fp16=0")
+    {
+        std::stringstream ss;
+        std::vector<std::string> keys   {  "backends", "debug-data", "reduce-fp32-to-fp16"};
+        std::vector<std::string> values {    "CpuRef",       "true",                   "0"};
+        CreateFp16StringParsingTestRun(keys, values, ss);
+        CHECK(ss.str().find("convert_fp32_to_fp16") == std::string::npos);
+        CHECK(ss.str().find("convert_fp16_to_fp32") == std::string::npos);
+    }
+    SUBCASE("Fp16=false")
+    {
+        std::stringstream ss;
+        std::vector<std::string> keys   {  "backends", "debug-data", "reduce-fp32-to-fp16"};
+        std::vector<std::string> values {    "CpuRef",     "1",               "false"};
+        CreateFp16StringParsingTestRun(keys, values, ss);
+        CHECK(ss.str().find("convert_fp32_to_fp16") == std::string::npos);
+        CHECK(ss.str().find("convert_fp16_to_fp32") == std::string::npos);
+    }
+}
+
+
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/test/DelegateOptionsTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/DelegateOptionsTestHelper.hpp
new file mode 100644
index 0000000000..6e0cc3154c
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/DelegateOptionsTestHelper.hpp
@@ -0,0 +1,298 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include "ConvolutionTestHelper.hpp"
+#include "TestUtils.hpp"
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+struct StreamRedirector
+{
+public:
+    StreamRedirector(std::ostream &stream, std::streambuf *newStreamBuffer)
+        : m_Stream(stream), m_BackupBuffer(m_Stream.rdbuf(newStreamBuffer)) {}
+
+    ~StreamRedirector() { m_Stream.rdbuf(m_BackupBuffer); }
+
+private:
+    std::ostream &m_Stream;
+    std::streambuf *m_BackupBuffer;
+};
+
+std::vector<char> CreateAddDivTfLiteModel(tflite::TensorType tensorType,
+                                          const std::vector<int32_t>& tensorShape,
+                                          float quantScale = 1.0f,
+                                          int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+
+    std::array<flatbuffers::Offset<Tensor>, 5> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_0"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_1"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_2"),
+                              quantizationParameters);
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("add"),
+                              quantizationParameters);
+    tensors[4] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions addBuiltinOptionsType = tflite::BuiltinOptions_AddOptions;
+    flatbuffers::Offset<void> addBuiltinOptions =
+        CreateAddOptions(flatBufferBuilder, ActivationFunctionType_NONE).Union();
+
+    tflite::BuiltinOptions divBuiltinOptionsType = tflite::BuiltinOptions_DivOptions;
+    flatbuffers::Offset<void> divBuiltinOptions =
+        CreateAddOptions(flatBufferBuilder, ActivationFunctionType_NONE).Union();
+
+    std::array<flatbuffers::Offset<Operator>, 2> operators;
+    const std::vector<int32_t> addInputs{0, 1};
+    const std::vector<int32_t> addOutputs{3};
+    operators[0] = CreateOperator(flatBufferBuilder,
+                                  0,
+                                  flatBufferBuilder.CreateVector<int32_t>(addInputs.data(), addInputs.size()),
+                                  flatBufferBuilder.CreateVector<int32_t>(addOutputs.data(), addOutputs.size()),
+                                  addBuiltinOptionsType,
+                                  addBuiltinOptions);
+    const std::vector<int32_t> divInputs{3, 2};
+    const std::vector<int32_t> divOutputs{4};
+    operators[1] = CreateOperator(flatBufferBuilder,
+                                  1,
+                                  flatBufferBuilder.CreateVector<int32_t>(divInputs.data(), divInputs.size()),
+                                  flatBufferBuilder.CreateVector<int32_t>(divOutputs.data(), divOutputs.size()),
+                                  divBuiltinOptionsType,
+                                  divBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0, 1, 2};
+    const std::vector<int> subgraphOutputs{4};
+    flatbuffers::Offset<SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(operators.data(), operators.size()));
+
+    flatbuffers::Offset<flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Add and Div Operator Model");
+
+    std::array<flatbuffers::Offset<OperatorCode>, 2> codes;
+    codes[0] = CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_ADD);
+    codes[1] = CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_DIV);
+
+    flatbuffers::Offset<Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(codes.data(), codes.size()),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void ReduceFp32ToBf16TestImpl()
+{
+    using namespace tflite;
+    // Set input data
+    std::vector<int32_t> inputShape{ 1, 5, 5, 1 };
+    std::vector<int32_t> filterShape{ 1, 3, 3, 1 };
+    std::vector<int32_t> biasShape{ 1 };
+    std::vector<int32_t> outputShape{ 1, 3, 3, 1 };
+
+    std::vector<float> inputValues =
+        {
+            1, 5, 2, 3, 5,
+            8, 7, 3, 6, 3,
+            3, 3, 9, 1, 9,
+            4, 1, 8, 1, 3,
+            6, 8, 1, 9, 2
+        };
+
+    std::vector<float> filterValues =
+        {
+            4, 5, 6,
+            0, 0, 0,
+            3, 2, 1
+        };
+
+    std::vector<float> biasValues = { 5 };
+
+    std::vector<float> expectedResult =
+        {
+            28, 38, 29,
+            96, 104, 53,
+            31, 55, 24
+        };
+
+    tflite::Padding padding = Padding_SAME;
+
+    std::vector<char> modelBuffer;
+    modelBuffer = CreateConv2dTfLiteModel<float>(BuiltinOperator_CONV_2D,
+                                                 ::tflite::TensorType_FLOAT32,
+                                                 2,
+                                                 2,
+                                                 1,
+                                                 1,
+                                                 padding,
+                                                 ActivationFunctionType_NONE,
+                                                 inputShape,
+                                                 filterShape,
+                                                 biasShape,
+                                                 outputShape,
+                                                 filterValues,
+                                                 biasValues);
+
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the Armnn Delegate
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<armnn::BackendOptions> backendOptions;
+
+    // Enable debug with BF16 enabled
+    armnn::OptimizerOptions optimizerOptions(false, true, true, false);
+
+    armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueueWorkload
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+    armnnDelegate::CompareData(expectedResult.data(), armnnDelegateOutputData, expectedResult.size());
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+template <typename T>
+void DelegateOptionTest(tflite::TensorType tensorType,
+                        const std::vector<armnn::BackendId>& backends,
+                        std::vector<int32_t>& tensorShape,
+                        std::vector<T>& input0Values,
+                        std::vector<T>& input1Values,
+                        std::vector<T>& input2Values,
+                        std::vector<T>& expectedOutputValues,
+                        const armnnDelegate::DelegateOptions& delegateOptions,
+                        float quantScale = 1.0f,
+                        int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateAddDivTfLiteModel(tensorType,
+                                                            tensorShape,
+                                                            quantScale,
+                                                            quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput(tfLiteInterpreter, 0, input0Values);
+    armnnDelegate::FillInput(tfLiteInterpreter, 1, input1Values);
+    armnnDelegate::FillInput(tfLiteInterpreter, 2, input2Values);
+
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 0, input0Values);
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 1, input1Values);
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 2, input2Values);
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter, armnnDelegateInterpreter, tensorShape, expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/DepthwiseConvolution2dTest.cpp b/arch/arm/ARMnn/delegate/src/test/DepthwiseConvolution2dTest.cpp
new file mode 100644
index 0000000000..ca10f2c0cb
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/DepthwiseConvolution2dTest.cpp
@@ -0,0 +1,282 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConvolutionTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void DepthwiseConv2dValidReluFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 2, 2 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 4 };
+    std::vector<int32_t> biasShape { 4 };
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    static std::vector<float> inputValues =
+        {
+            1, 2,  7,  8,
+            3, 4,  9, 10,
+            5, 6, 11, 12
+        };
+
+    std::vector<float> filterValues =
+        {
+            1,    2,   3,   4,
+           -9,   10, -11,  12,
+            5,    6,   7,   8,
+            13,  -14,  15, -16
+        };
+
+    std::vector<float> biasValues = { 1, 2, 3, 4 };
+
+    std::vector<float> expectedOutputValues =
+        {
+            71, 0,  99, 0,
+            91, 0, 127, 0
+        };
+
+    tflite::Padding padding = tflite::Padding_VALID;
+    int32_t depth_multiplier = 2;
+
+    ConvolutionTest<float>(tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+                           ::tflite::TensorType_FLOAT32,
+                           1, // strideX
+                           1, // strideY
+                           1, // dilationX
+                           1, // dilationY
+                           padding,
+                           tflite::ActivationFunctionType_RELU,
+                           backends,
+                           inputShape,
+                           filterShape,
+                           outputShape,
+                           inputValues,
+                           filterValues,
+                           expectedOutputValues,
+                           biasShape,
+                           biasValues,
+                           {1.0f}, // biasScale
+                           {0},    // biasOffset
+                           {1.0f}, // filterScale
+                           {0},    // filterOffsets
+                           2.0f,   // outputQuantScale
+                           0,      // outputQuantOffset
+                           1.0f,   // quantScale
+                           0,      // quantOffset
+                           depth_multiplier);
+}
+
+void DepthwiseConv2dSameUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 3, 1 };
+    std::vector<int32_t> filterShape { 1, 3, 3, 1 };
+    std::vector<int32_t> biasShape { 1 } ;
+    std::vector<int32_t> outputShape { 1, 3, 3, 1 };
+
+    static std::vector<uint8_t> inputValues =
+        {
+            0, 1, 2,
+            3, 4, 5,
+            6, 7, 8
+        };
+
+    std::vector<uint8_t> filterValues = { 9, 8, 7,  6, 5, 4,  3, 2, 1 };
+
+    std::vector<int32_t> biasValues = { 10 };
+
+    std::vector<uint8_t> expectedOutputValues =
+        {
+            12,  23, 24, // ( 14+10)/2, ( 35+10)/2, ( 38+10)/2,
+            34,  65, 61, // ( 57+10)/2, (120+10)/2, (111+10)/2,
+            60, 104, 84  // (110+10)/2, (197+10)/2, (158+10)/2
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<uint8_t, int32_t>(tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+                                      ::tflite::TensorType_UINT8,
+                                      1, // strideX
+                                      1, // strideY
+                                      1, // dilationX
+                                      1, // dilationY
+                                      padding,
+                                      tflite::ActivationFunctionType_NONE,
+                                      backends,
+                                      inputShape,
+                                      filterShape,
+                                      outputShape,
+                                      inputValues,
+                                      filterValues,
+                                      expectedOutputValues,
+                                      biasShape,
+                                      biasValues);
+}
+
+void DepthwiseConv2dSameInt8PerChannelTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 4, 4, 4 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 16 };
+    std::vector<int32_t> biasShape {16} ;
+    std::vector<int32_t> outputShape { 1, 4, 4, 16 };
+
+    static std::vector<int8_t> inputValues =
+        {
+            3,3,3,4, 4,4,0,0, 0,3,4,3, 0,2,2,3,
+            3,0,3,0, 0,3,2,1, 4,1,2,2, 0,0,0,4,
+            3,2,2,2, 2,1,0,4, 4,3,2,4, 3,2,0,0,
+            4,1,4,4, 1,0,4,3, 3,2,0,3, 1,1,0,2
+        };
+
+    std::vector<int8_t> filterValues = { 12,20,10, 3, 2,24, 9,10, 5,16,30,12, 3,10, 4,32,
+                                           8, 0,30, 3, 0,16,12,15,20,12, 0, 3, 9,20, 8, 8,
+                                          12,15,20, 0, 0, 0, 3,15,15, 8,40,12, 9, 5, 2,24,
+                                           4, 0, 0, 6, 6, 0, 3, 5,20, 8,20, 3, 6,15, 4, 0 };
+    std::vector<float> filterScales = {         0.25,   0.2,        0.1, 0.3333333333,
+                                                 0.5, 0.125, 0.33333333,          0.2,
+                                                 0.2,  0.25,        0.1,  0.333333333,
+                                        0.3333333333,   0.2,        0.5,        0.125 };
+
+    int32_t filterQuantizationDim = 3;
+
+    int32_t depth_multiplier = 4;
+
+    std::vector<int32_t> biasValues = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    float inputScale = 1.0f;
+    std::vector<float> biasScales {};
+    std::vector<int64_t> biasOffsets {};
+    std::vector<int64_t> filterOffsets {};
+    for (const auto& filterScale: filterScales)
+    {
+        biasScales.push_back(inputScale * filterScale);
+        // filter and bias offset always needs to be zero for per channel. We don't support anything else
+        biasOffsets.push_back(0);
+        filterOffsets.push_back(0);
+    }
+
+    std::vector<int8_t> expectedOutputValues =
+        {
+            26,21,21, 7,12,17,28,21,20,22,25,26, 6,11,10,16,
+            16,16, 4,12, 7,18,28,27,30,20,12,14,16,19,17, 6,
+            12,12, 8, 0, 3,13,18,15,18,26,20,26,26,32,28,21,
+            0, 0, 0, 0, 2, 6, 6, 4, 2, 8, 6, 8,15,10,10,24,
+            20,21, 9, 7, 3, 6,15,16,17,22,17,22,17,18,14, 7,
+            18, 6,16,12,12,11,17,15,18,18,10,12,27,26,22,18,
+            27,28,12,10, 7, 3, 8,13, 8,12,14,16,26,24,24,24,
+            9, 9, 6, 0, 0, 0, 2, 6, 0, 0, 0, 0, 4, 8, 8,16,
+            26,24,17, 7, 2, 8,11,10,30,24,30,28,32,33,30,24,
+            20,11,16,12, 7, 9,17,13,20,14,16,18,31,36,33,29,
+            28,25,19, 9, 6,13,20,19, 2, 8, 6, 8,17,17,15,25,
+            12,15, 5, 3, 2, 6, 7, 7, 0, 0, 0, 0, 6, 2, 2, 6,
+            14,16, 7, 5, 1, 3, 3, 2,20,28,12,20,13,20,20,19,
+            9, 4,10, 4, 0, 4, 8, 6, 4,16,12,16,12,18,18,15,
+            11,12, 6, 4, 2, 8,10, 7, 0, 0, 0, 0, 9,14,14,14,
+            3, 4, 1, 1, 1, 3, 3, 2, 0, 0, 0, 0, 2, 4, 4, 8
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<int8_t, int32_t>(tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+                                      ::tflite::TensorType_INT8,
+                                      1, // strideX
+                                      1, // strideY
+                                      1, // dilationX
+                                      1, // dilationY
+                                      padding,
+                                      tflite::ActivationFunctionType_NONE,
+                                      backends,
+                                      inputShape,
+                                      filterShape,
+                                      outputShape,
+                                      inputValues,
+                                      filterValues,
+                                      expectedOutputValues,
+                                      biasShape,
+                                      biasValues,
+                                      biasScales,
+                                      biasOffsets,
+                                      filterScales,
+                                      filterOffsets,
+                                      1.0f,
+                                      0,
+                                      inputScale,
+                                      0,
+                                      depth_multiplier,
+                                      filterQuantizationDim);
+}
+
+TEST_SUITE("DepthwiseConv2d_CpuRef_Tests")
+{
+
+TEST_CASE ("DepthwiseConv2d_Valid_Relu_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    DepthwiseConv2dValidReluFp32Test(backends);
+}
+
+TEST_CASE ("DepthwiseConv2d_Same_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    DepthwiseConv2dSameUint8Test(backends);
+}
+
+TEST_CASE ("DepthwiseConv2d_Same_Int8_PerChannelQuantization_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    DepthwiseConv2dSameInt8PerChannelTest(backends);
+}
+
+}//End of TEST_SUITE("DepthwiseConv2d_CpuRef_Tests")
+
+TEST_SUITE("DepthwiseConv2d_CpuAcc_Tests")
+{
+
+TEST_CASE ("DepthwiseConv2d_Valid_Relu_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    DepthwiseConv2dValidReluFp32Test(backends);
+}
+
+TEST_CASE ("DepthwiseConv2d_Same_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    DepthwiseConv2dSameUint8Test(backends);
+}
+
+}//End of TEST_SUITE("DepthwiseConv2d_CpuAcc_Tests")
+
+TEST_SUITE("DepthwiseConv2d_GpuAcc_Tests")
+{
+
+TEST_CASE ("DepthwiseConv2d_Valid_Relu_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    DepthwiseConv2dValidReluFp32Test(backends);
+}
+
+TEST_CASE ("DepthwiseConv2d_Same_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    DepthwiseConv2dSameUint8Test(backends);
+}
+
+}//End of TEST_SUITE("DepthwiseConv2d_GpuAcc_Tests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ElementwiseBinaryTest.cpp b/arch/arm/ARMnn/delegate/src/test/ElementwiseBinaryTest.cpp
new file mode 100644
index 0000000000..9d03204263
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ElementwiseBinaryTest.cpp
@@ -0,0 +1,1136 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ElementwiseBinaryTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void AddFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 3 };
+
+    std::vector<float> input0Values =
+    {
+        0.0f, 2.0f, 1.0f,
+        0.2f, 1.0f, 2.0f,
+
+        1.0f, 2.0f, 1.0f,
+        0.2f, 1.0f, 2.0f,
+
+        0.0f, 2.0f, 1.0f,
+        4.2f, 1.0f, 2.0f,
+
+        0.0f, 0.0f, 1.0f,
+        0.2f, 1.0f, 2.0f,
+    };
+
+    std::vector<float> input1Values =
+    {
+        1.0f, 2.0f,  1.0f,
+        0.0f, 1.0f,  2.0f,
+
+        1.0f, 2.0f, -2.0f,
+        0.2f, 1.0f,  2.0f,
+
+        0.0f, 2.0f,  1.0f,
+        4.2f, 0.0f, -3.0f,
+
+        0.0f, 0.0f,  1.0f,
+        0.7f, 1.0f,  5.0f,
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        1.0f, 4.0f,  2.0f,
+        0.2f, 2.0f,  4.0f,
+
+        2.0f, 4.0f, -1.0f,
+        0.4f, 2.0f,  4.0f,
+
+        0.0f, 4.0f,  2.0f,
+        8.4f, 1.0f, -1.0f,
+
+        0.0f, 0.0f,  2.0f,
+        0.9f, 2.0f,  7.0f,
+    };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_ADD,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void AddBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 3, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 1, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 3, 2, 3 };
+
+    std::vector<float> input0Values
+    {
+        0.0f,
+        1.0f,
+
+        2.0f,
+        3.0f,
+
+        4.0f,
+        5.0f,
+    };
+    std::vector<float> input1Values
+    {
+        0.5f, 1.5f, 2.5f,
+        3.5f, 4.5f, 5.5f,
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        0.5f, 1.5f, 2.5f,
+        4.5f, 5.5f, 6.5f,
+
+        2.5f, 3.5f, 4.5f,
+        6.5f, 7.5f, 8.5f,
+
+        4.5f, 5.5f, 6.5f,
+        8.5f, 9.5f, 10.5f,
+    };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_ADD,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void AddConstInputTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 3, 2, 1 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 3, 2, 1 };
+
+    std::vector<float> input0Values
+        {
+            0.0f,
+            1.0f,
+
+            2.0f,
+            3.0f,
+
+            4.0f,
+            5.0f,
+        };
+    std::vector<float> input1Values
+        {
+            0.5f
+        };
+    // Set output data
+    std::vector<float> expectedOutputValues
+        {
+            0.5f,
+            1.5f,
+
+            2.5f,
+            3.5f,
+
+            4.5f,
+            5.5f,
+        };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_ADD,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues,
+                                 1.0f,
+                                 0,
+                                 true);
+}
+
+void AddActivationTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> input0Values { 4.0f, 0.8f, 0.7f, -0.8f };
+    std::vector<float> input1Values { 0.7f, -1.2f, 0.8f, 0.5f };
+    std::vector<float> expectedOutputValues { 4.7f, 0.0f, 1.5f, 0.0f };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_ADD,
+                                 tflite::ActivationFunctionType_RELU,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void AddUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<uint8_t> input0Values =
+    {
+        63,  35,  77,  70,  56, 112,
+        203,  28, 252, 168, 245,  91
+    };
+
+    std::vector<uint8_t> input1Values =
+    {
+        21,   7, 175, 231, 175, 210,
+        126, 161,  63,  21, 105, 126
+    };
+
+    std::vector<uint8_t> expectedOutputValues =
+    {
+        81,  39, 249, 255, 228, 255,
+        255, 186, 255, 186, 255, 214,
+    };
+
+    ElementwiseBinaryTest<uint8_t>(tflite::BuiltinOperator_ADD,
+                                   tflite::ActivationFunctionType_NONE,
+                                   ::tflite::TensorType_UINT8,
+                                   backends,
+                                   input0Shape,
+                                   input1Shape,
+                                   expectedOutputShape,
+                                   input0Values,
+                                   input1Values,
+                                   expectedOutputValues, 7.0f, 3);
+}
+
+void DivFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<float> input0Values =
+    {
+        2.f, 2.f, 2.f, 2.f, 3.f, 3.f, 3.f, 3.f,
+        4.f, 4.f, 4.f, 4.f, 5.f, 5.f, 5.f, 5.f
+
+    };
+
+    std::vector<float> input1Values =
+    {
+        1.f, 1.f, 1.f, 1.f, 2.f, 2.f, 2.f, 2.f,
+        4.f, 4.f, 4.f, 4.f, 4.f, 4.f, 4.f, 4.f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        2.f, 2.f, 2.f, 2.f, 1.50f, 1.50f, 1.50f, 1.50f,
+        1.f, 1.f, 1.f, 1.f, 1.25f, 1.25f, 1.25f, 1.25f
+    };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_DIV,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void DivBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 2 };
+
+    std::vector<float> input0Values = { 2, 4, 6, 8, 10, 12, 14, 16 };
+    std::vector<float> input1Values = { 2 };
+    std::vector<float> expectedOutputValues = { 1, 2, 3, 4, 5, 6, 7, 8 };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_DIV,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void DivUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<uint8_t> input0Values =
+    {
+        2, 2, 2, 2,  3, 3, 3, 3,
+        4, 4, 4, 4,  5, 5, 5, 5
+
+    };
+
+    std::vector<uint8_t> input1Values =
+    {
+        1, 1, 1, 1,  2, 2, 2, 2,
+        4, 4, 4, 4,  4, 4, 4, 4
+    };
+
+    std::vector<uint8_t> expectedOutputValues =
+    {
+        8, 8, 8, 8,  6, 6, 6, 6,
+        4, 4, 4, 4,  5, 5, 5, 5
+    };
+
+    ElementwiseBinaryTest<uint8_t>(tflite::BuiltinOperator_DIV,
+                                   tflite::ActivationFunctionType_NONE,
+                                   ::tflite::TensorType_UINT8,
+                                   backends,
+                                   input0Shape,
+                                   input1Shape,
+                                   expectedOutputShape,
+                                   input0Values,
+                                   input1Values,
+                                   expectedOutputValues, 0.25f, 0);
+}
+
+void FloorDivFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<float> input0Values =
+    {
+        -37.5f, -15.2f, -8.76f, -2.0f,  -2.6f, -1.0f,  -0.8f,   0.0f,
+          4.0f,   1.6f,  2.0f,   5.2f,   6.0f, 35.04f, 60.8f, 150.0f
+    };
+
+    std::vector<float> input1Values =
+    {
+        1.f, 1.f, 1.f, 1.f, 2.f, 2.f, 2.f, 2.f,
+        4.f, 4.f, 4.f, 4.f, 4.f, 4.f, 4.f, 4.f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        -38.0f, -16.0f, -9.0f,  -2.0f, -2.0f, -1.0f,  -1.0f,  0.0f,
+          1.0f,   0.0f,  0.0f,   1.0f,  1.0f,  8.0f,  15.0f, 37.0f
+    };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_FLOOR_DIV,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+
+}
+
+void MaxFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<float> input0Values =
+    {
+        1.f, 1.f, 5.f, 1.f,  2.f, 2.f, 7.f, 2.f,
+        3.f, 3.f, 3.f, 3.f,  4.f, 4.f, 4.f, 4.f
+
+    };
+
+    std::vector<float> input1Values =
+    {
+        2.f, 2.f, 2.f, 2.f,  3.f, 3.f, 3.f, 3.f,
+        4.f, 4.f, 4.f, 4.f,  5.f, 5.f, 5.f, 5.f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        2.f,  2.f, 5.f,  2.f,   3.f,  3.f,  7.f,  3.f,
+        4.f, 4.f, 4.f, 4.f,  5.f, 5.f, 5.f, 5.f
+    };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_MAXIMUM,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void MaxBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 2 };
+
+    std::vector<float> input0Values = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f };
+    std::vector<float> input1Values = { 4.f };
+    std::vector<float> expectedOutputValues = { 4.f, 4.f, 4.f, 4.f, 5.f, 6.f, 7.f, 8.f };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_MAXIMUM,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void MaxUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<uint8_t> input0Values =
+    {
+        1, 1, 1, 1, 7, 8, 9, 9,
+        3, 3, 3, 3, 4, 4, 4, 4
+
+    };
+
+    std::vector<uint8_t> input1Values =
+    {
+        2, 2, 2, 2, 3, 3, 3, 3,
+        4, 4, 4, 4, 5, 5, 5, 5
+    };
+
+    std::vector<uint8_t> expectedOutputValues =
+    {
+        2, 2, 2, 2, 7, 8, 9, 9,
+        4, 4, 4, 4, 5, 5, 5, 5
+    };
+
+    ElementwiseBinaryTest<uint8_t>(tflite::BuiltinOperator_MAXIMUM,
+                                   tflite::ActivationFunctionType_NONE,
+                                   ::tflite::TensorType_UINT8,
+                                   backends,
+                                   input0Shape,
+                                   input1Shape,
+                                   expectedOutputShape,
+                                   input0Values,
+                                   input1Values,
+                                   expectedOutputValues, 1.0f, 0);
+}
+
+void MinFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<float> input0Values =
+    {
+        1.f, 1.f, 5.f, 1.f,  2.f, 2.f, 7.f, 2.f,
+        3.f, 3.f, 3.f, 3.f,  4.f, 4.f, 4.f, 4.f
+
+    };
+
+    std::vector<float> input1Values =
+    {
+        2.f, 2.f, 2.f, 2.f,  3.f, 3.f, 3.f, 3.f,
+        1.f, 1.f, 1.f, 1.f,  5.f, 5.f, 5.f, 5.f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        1.f,  1.f, 2.f,  1.f,   2.f,  2.f,  3.f,  2.f,
+        1.f, 1.f, 1.f, 1.f,  4.f, 4.f, 4.f, 4.f
+    };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_MINIMUM,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void MinBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 2 };
+
+    std::vector<float> input0Values = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f };
+
+    std::vector<float> input1Values = { 4.f };
+
+    std::vector<float> expectedOutputValues = { 1.f, 2.f, 3.f, 4.f, 4.f, 4.f, 4.f, 4.f };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_MINIMUM,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void MinUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<uint8_t> input0Values =
+    {
+        1, 1, 1, 1, 7, 8, 9, 9,
+        3, 3, 3, 3, 4, 4, 4, 4
+
+    };
+
+    std::vector<uint8_t> input1Values =
+    {
+        2, 2, 2, 2, 3, 3, 3, 3,
+        4, 4, 4, 4, 5, 5, 5, 5
+    };
+
+    std::vector<uint8_t> expectedOutputValues =
+    {
+        1, 1, 1, 1, 3, 3, 3, 3,
+        3, 3, 3, 3, 4, 4, 4, 4
+    };
+
+    ElementwiseBinaryTest<uint8_t>(tflite::BuiltinOperator_MINIMUM,
+                                   tflite::ActivationFunctionType_NONE,
+                                   ::tflite::TensorType_UINT8,
+                                   backends,
+                                   input0Shape,
+                                   input1Shape,
+                                   expectedOutputShape,
+                                   input0Values,
+                                   input1Values,
+                                   expectedOutputValues, 1.0f, 0);
+}
+
+void MulFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 2, 2, 2, 2 };
+
+    std::vector<float> input0Values =
+    {
+        1.f, 1.f, 1.f, 1.f,  2.f, 2.f, 2.f, 2.f,
+        3.f, 3.f, 3.f, 3.f,  4.f, 4.f, 4.f, 4.f
+
+    };
+
+    std::vector<float> input1Values =
+    {
+        2.f, 2.f, 2.f, 2.f,  3.f, 3.f, 3.f, 3.f,
+        4.f, 4.f, 4.f, 4.f,  5.f, 5.f, 5.f, 5.f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        2.f,  2.f,  2.f,  2.f,   6.f,  6.f,  6.f,  6.f,
+        12.f, 12.f, 12.f, 12.f,  20.f, 20.f, 20.f, 20.f
+    };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_MUL,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void MulBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 2 };
+
+    std::vector<float> input0Values = { 2, 4, 6, 8, 10, 12, 14, 16 };
+    std::vector<float> input1Values = { 2 };
+    std::vector<float> expectedOutputValues = { 4, 8, 12, 16, 20, 24, 28, 32 };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_MUL,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void MulUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<uint8_t> input0Values =
+    {
+        1, 2, 3,    4,  5,  6,
+        7, 8, 9,   10, 11, 12
+
+    };
+
+    std::vector<uint8_t> input1Values = { 1, 2, 3 };
+
+    std::vector<uint8_t> expectedOutputValues =
+    {
+        1,  4,   9,     4, 10, 18,
+        7, 16,  27,    10, 22, 36
+    };
+
+    ElementwiseBinaryTest<uint8_t>(tflite::BuiltinOperator_MUL,
+                                   tflite::ActivationFunctionType_NONE,
+                                   ::tflite::TensorType_UINT8,
+                                   backends,
+                                   input0Shape,
+                                   input1Shape,
+                                   expectedOutputShape,
+                                   input0Values,
+                                   input1Values,
+                                   expectedOutputValues, 1.0f, 0);
+}
+
+void MulActivationTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> input0Values { 4.0f, 0.0f, 1.0f, 0.5f };
+    std::vector<float> input1Values { -2.0f, -1.2f, 2.5f, 2.0f };
+    std::vector<float> expectedOutputValues { 0.0f, 0.0f, 2.5f, 1.0f };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_MUL,
+                                 tflite::ActivationFunctionType_RELU,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void SubFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 2, 2 };
+
+    std::vector<float> input0Values = { 1, 3, 3, -7 };
+    std::vector<float> input1Values = { 1, -1, 0, -2 };
+    std::vector<float> expectedOutputValues = { 0, 4, 3, -5 };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_SUB,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void SubBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 2, 2 };
+
+    std::vector<float> input0Values = { 2, 3, 4, 5};
+    std::vector<float> input1Values = { 10 };
+    std::vector<float> expectedOutputValues = { -8, -7, -6, -5 };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_SUB,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void SubUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 2, 2 };
+
+    std::vector<uint8_t> input0Values = { 10, 12, 14, 16 };
+    std::vector<uint8_t> input1Values = { 2 };
+    std::vector<uint8_t> expectedOutputValues = { 8, 10, 12, 14 };
+
+    ElementwiseBinaryTest<uint8_t>(tflite::BuiltinOperator_SUB,
+                                   tflite::ActivationFunctionType_NONE,
+                                   ::tflite::TensorType_UINT8,
+                                   backends,
+                                   input0Shape,
+                                   input1Shape,
+                                   expectedOutputShape,
+                                   input0Values,
+                                   input1Values,
+                                   expectedOutputValues, 1.0f, 0);
+}
+
+TEST_SUITE("ElementwiseBinary_GpuAccTests")
+{
+
+TEST_CASE ("ADD_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AddFP32Test(backends);
+}
+
+TEST_CASE ("ADD_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AddBroadcastTest(backends);
+}
+
+TEST_CASE ("ADD_Activation_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AddActivationTest(backends);
+}
+
+TEST_CASE ("ADD_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AddUint8Test(backends);
+}
+
+TEST_CASE ("DIV_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    DivFP32Test(backends);
+}
+
+TEST_CASE ("DIV_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    DivBroadcastTest(backends);
+}
+
+TEST_CASE ("FLOORDIV_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    FloorDivFP32Test(backends);
+}
+
+TEST_CASE ("MAX_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxFP32Test(backends);
+}
+
+TEST_CASE ("MAX_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxBroadcastTest(backends);
+}
+
+TEST_CASE ("MAX_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxUint8Test(backends);
+}
+
+TEST_CASE ("MIN_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MinFP32Test(backends);
+}
+
+TEST_CASE ("MIN_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MinBroadcastTest(backends);
+}
+
+TEST_CASE ("MIN_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MinUint8Test(backends);
+}
+
+TEST_CASE ("MUL_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MulFP32Test(backends);
+}
+
+TEST_CASE ("MUL_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MulBroadcastTest(backends);
+}
+
+TEST_CASE ("MUL_Activation_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MulActivationTest(backends);
+}
+
+TEST_CASE ("MUL_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MulUint8Test(backends);
+}
+
+TEST_CASE ("SUB_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    SubFP32Test(backends);
+}
+
+TEST_CASE ("SUB_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    SubBroadcastTest(backends);
+}
+
+TEST_CASE ("SUB_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    SubUint8Test(backends);
+}
+
+} //TEST_SUITE("ElementwiseBinary_GpuAccTests")
+
+
+
+TEST_SUITE("ElementwiseBinary_CpuAccTests")
+{
+
+TEST_CASE ("ADD_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AddFP32Test(backends);
+}
+
+TEST_CASE ("ADD_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AddBroadcastTest(backends);
+}
+
+TEST_CASE ("ADD_Actiation_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AddActivationTest(backends);
+}
+
+TEST_CASE ("ADD_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AddUint8Test(backends);
+}
+
+TEST_CASE ("DIV_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    DivFP32Test(backends);
+}
+
+TEST_CASE ("DIV_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    DivBroadcastTest(backends);
+}
+
+TEST_CASE ("FLOORDIV_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    FloorDivFP32Test(backends);
+}
+
+TEST_CASE ("MAX_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxFP32Test(backends);
+}
+
+TEST_CASE ("MAX_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxBroadcastTest(backends);
+}
+
+TEST_CASE ("MAX_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxUint8Test(backends);
+}
+
+TEST_CASE ("MIN_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MinFP32Test(backends);
+}
+
+TEST_CASE ("MIN_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MinBroadcastTest(backends);
+}
+
+TEST_CASE ("MIN_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MinUint8Test(backends);
+}
+
+TEST_CASE ("MUL_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MulFP32Test(backends);
+}
+
+TEST_CASE ("MUL_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MulBroadcastTest(backends);
+}
+
+TEST_CASE ("MUL_Actiation_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MulActivationTest(backends);
+}
+
+TEST_CASE ("MUL_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MulUint8Test(backends);
+}
+
+TEST_CASE ("SUB_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    SubFP32Test(backends);
+}
+
+TEST_CASE ("SUB_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    SubBroadcastTest(backends);
+}
+
+TEST_CASE ("SUB_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    SubUint8Test(backends);
+}
+
+} // TEST_SUITE("ElementwiseBinary_CpuAccTests")
+
+
+TEST_SUITE("ElementwiseBinary_CpuRefTests")
+{
+
+TEST_CASE ("ADD_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AddFP32Test(backends);
+}
+
+TEST_CASE ("ADD_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AddBroadcastTest(backends);
+}
+
+TEST_CASE ("ADD_Constant_Input_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AddConstInputTest(backends);
+}
+
+TEST_CASE ("ADD_Actiation_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AddActivationTest(backends);
+}
+
+TEST_CASE ("ADD_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AddUint8Test(backends);
+}
+
+TEST_CASE ("DIV_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    DivFP32Test(backends);
+}
+
+TEST_CASE ("DIV_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    DivBroadcastTest(backends);
+}
+
+TEST_CASE ("FLOORDIV_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FloorDivFP32Test(backends);
+}
+
+TEST_CASE ("DIV_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    DivUint8Test(backends);
+}
+
+TEST_CASE ("MAX_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxFP32Test(backends);
+}
+
+TEST_CASE ("MAX_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxBroadcastTest(backends);
+}
+
+TEST_CASE ("MAX_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxUint8Test(backends);
+}
+
+TEST_CASE ("MIN_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MinFP32Test(backends);
+}
+
+TEST_CASE ("MIN_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MinBroadcastTest(backends);
+}
+
+TEST_CASE ("MIN_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MinUint8Test(backends);
+}
+
+TEST_CASE ("MUL_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MulFP32Test(backends);
+}
+
+TEST_CASE ("MUL_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MulBroadcastTest(backends);
+}
+
+TEST_CASE ("MUL_Actiation_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MulActivationTest(backends);
+}
+
+TEST_CASE ("MUL_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MulUint8Test(backends);
+}
+
+TEST_CASE ("SUB_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    SubFP32Test(backends);
+}
+
+TEST_CASE ("SUB_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    SubBroadcastTest(backends);
+}
+
+TEST_CASE ("SUB_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    SubUint8Test(backends);
+}
+
+} // TEST_SUITE("ElementwiseBinary_CpuRefTests")
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/test/ElementwiseBinaryTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ElementwiseBinaryTestHelper.hpp
new file mode 100644
index 0000000000..69b0c88dc8
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ElementwiseBinaryTestHelper.hpp
@@ -0,0 +1,242 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T>
+std::vector<char> CreateElementwiseBinaryTfLiteModel(tflite::BuiltinOperator binaryOperatorCode,
+                                                     tflite::ActivationFunctionType activationType,
+                                                     tflite::TensorType tensorType,
+                                                     const std::vector <int32_t>& input0TensorShape,
+                                                     const std::vector <int32_t>& input1TensorShape,
+                                                     const std::vector <int32_t>& outputTensorShape,
+                                                     std::vector<T>& input1Values,
+                                                     bool constantInput = false,
+                                                     float quantScale = 1.0f,
+                                                     int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    if (constantInput)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(input1Values.data()),
+                                                        sizeof(T) * input1Values.size())));
+    }
+    else
+    {
+        buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    }
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input0TensorShape.data(),
+                                                                      input0TensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_0"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input1TensorShape.data(),
+                                                                      input1TensorShape.size()),
+                              tensorType,
+                              1,
+                              flatBufferBuilder.CreateString("input_1"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              2,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+    flatbuffers::Offset<void> operatorBuiltinOptions = 0;
+    switch (binaryOperatorCode)
+    {
+        case BuiltinOperator_ADD:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_AddOptions;
+            operatorBuiltinOptions = CreateAddOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        case BuiltinOperator_DIV:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_DivOptions;
+            operatorBuiltinOptions = CreateDivOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        case BuiltinOperator_MAXIMUM:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_MaximumMinimumOptions;
+            operatorBuiltinOptions = CreateMaximumMinimumOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_MINIMUM:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_MaximumMinimumOptions;
+            operatorBuiltinOptions = CreateMaximumMinimumOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_MUL:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_MulOptions;
+            operatorBuiltinOptions = CreateMulOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        case BuiltinOperator_SUB:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_SubOptions;
+            operatorBuiltinOptions = CreateSubOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        case BuiltinOperator_FLOOR_DIV:
+        {
+            operatorBuiltinOptionsType = tflite::BuiltinOptions_FloorDivOptions;
+            operatorBuiltinOptions = CreateSubOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        default:
+            break;
+    }
+    const std::vector<int32_t> operatorInputs{0, 1};
+    const std::vector<int32_t> operatorOutputs{2};
+    flatbuffers::Offset <Operator> elementwiseBinaryOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0, 1};
+    const std::vector<int> subgraphOutputs{2};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&elementwiseBinaryOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Elementwise Binary Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, binaryOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void ElementwiseBinaryTest(tflite::BuiltinOperator binaryOperatorCode,
+                           tflite::ActivationFunctionType activationType,
+                           tflite::TensorType tensorType,
+                           std::vector<armnn::BackendId>& backends,
+                           std::vector<int32_t>& input0Shape,
+                           std::vector<int32_t>& input1Shape,
+                           std::vector<int32_t>& outputShape,
+                           std::vector<T>& input0Values,
+                           std::vector<T>& input1Values,
+                           std::vector<T>& expectedOutputValues,
+                           float quantScale = 1.0f,
+                           int quantOffset  = 0,
+                           bool constantInput = false)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateElementwiseBinaryTfLiteModel<T>(binaryOperatorCode,
+                                                                          activationType,
+                                                                          tensorType,
+                                                                          input0Shape,
+                                                                          input1Shape,
+                                                                          outputShape,
+                                                                          input1Values,
+                                                                          constantInput,
+                                                                          quantScale,
+                                                                          quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr <Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr <Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, input0Values);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, input0Values);
+    if (!constantInput)
+    {
+        armnnDelegate::FillInput<T>(tfLiteInterpreter, 1, input1Values);
+        armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 1, input1Values);
+    }
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        outputShape,
+                                        expectedOutputValues);
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ElementwiseUnaryTest.cpp b/arch/arm/ARMnn/delegate/src/test/ElementwiseUnaryTest.cpp
new file mode 100644
index 0000000000..3200423b10
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ElementwiseUnaryTest.cpp
@@ -0,0 +1,303 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ElementwiseUnaryTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+TEST_SUITE("ElementwiseUnary_GpuAccTests")
+{
+
+TEST_CASE ("Abs_Float32_GpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        -0.1f, -0.2f, -0.3f,
+        0.1f,  0.2f,  0.3f
+    };
+    // Calculate output data
+    std::vector<float> expectedOutputValues(inputValues.size());
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        expectedOutputValues[i] = std::abs(inputValues[i]);
+    }
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_ABS, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Exp_Float32_GpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        5.0f, 4.0f,
+        3.0f, 2.0f,
+        1.0f, 1.1f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        148.413159102577f, 54.598150033144f,
+        20.085536923188f,  7.389056098931f,
+        2.718281828459f,  3.004166023946f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_EXP, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Neg_Float32_GpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        1.f, 0.f, 3.f,
+        25.f, 64.f, 100.f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        -1.f, 0.f, -3.f,
+        -25.f, -64.f, -100.f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_NEG, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Rsqrt_Float32_GpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        1.f, 4.f, 16.f,
+        25.f, 64.f, 100.f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        1.f, 0.5f, 0.25f,
+        0.2f, 0.125f, 0.1f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_RSQRT, backends, inputValues, expectedOutputValues);
+}
+
+} // TEST_SUITE("ElementwiseUnary_GpuAccTests")
+
+
+
+TEST_SUITE("ElementwiseUnary_CpuAccTests")
+{
+
+TEST_CASE ("Abs_Float32_CpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        -0.1f, -0.2f, -0.3f,
+        0.1f,  0.2f,  0.3f
+    };
+    // Calculate output data
+    std::vector<float> expectedOutputValues(inputValues.size());
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        expectedOutputValues[i] = std::abs(inputValues[i]);
+    }
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_ABS, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Exp_Float32_CpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        5.0f, 4.0f,
+        3.0f, 2.0f,
+        1.0f, 1.1f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        148.413159102577f, 54.598150033144f,
+        20.085536923188f,  7.389056098931f,
+        2.718281828459f,  3.004166023946f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_EXP, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Neg_Float32_CpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        1.f, 0.f, 3.f,
+        25.f, 64.f, 100.f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        -1.f, 0.f, -3.f,
+        -25.f, -64.f, -100.f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_NEG, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Rsqrt_Float32_CpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        1.f, 4.f, 16.f,
+        25.f, 64.f, 100.f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        1.f, 0.5f, 0.25f,
+        0.2f, 0.125f, 0.1f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_RSQRT, backends, inputValues, expectedOutputValues);
+}
+
+} // TEST_SUITE("ElementwiseUnary_CpuAccTests")
+
+TEST_SUITE("ElementwiseUnary_CpuRefTests")
+{
+
+TEST_CASE ("Abs_Float32_CpuRef_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        -0.1f, -0.2f, -0.3f,
+        0.1f,  0.2f,  0.3f
+    };
+    // Calculate output data
+    std::vector<float> expectedOutputValues(inputValues.size());
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        expectedOutputValues[i] = std::abs(inputValues[i]);
+    }
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_ABS, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Exp_Float32_CpuRef_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        5.0f, 4.0f,
+        3.0f, 2.0f,
+        1.0f, 1.1f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        148.413159102577f, 54.598150033144f,
+        20.085536923188f,  7.389056098931f,
+        2.718281828459f,  3.004166023946f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_EXP, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Neg_Float32_CpuRef_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        1.f, 0.f, 3.f,
+        25.f, 64.f, 100.f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        -1.f, 0.f, -3.f,
+        -25.f, -64.f, -100.f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_NEG, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Rsqrt_Float32_CpuRef_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        1.f, 4.f, 16.f,
+        25.f, 64.f, 100.f
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        1.f, 0.5f, 0.25f,
+        0.2f, 0.125f, 0.1f
+    };
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_RSQRT, backends, inputValues, expectedOutputValues);
+}
+
+TEST_CASE ("Sqrt_Float32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    // Set input data
+    std::vector<float> inputValues
+    {
+        9.0f, 4.25f, 81.9f,
+        0.1f,  0.9f,  169.0f
+    };
+    // Calculate output data
+    std::vector<float> expectedOutputValues(inputValues.size());
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        expectedOutputValues[i] = std::sqrt(inputValues[i]);
+    }
+
+    ElementwiseUnaryFP32Test(tflite::BuiltinOperator_SQRT, backends, inputValues, expectedOutputValues);
+}
+
+} // TEST_SUITE("ElementwiseUnary_CpuRefTests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ElementwiseUnaryTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ElementwiseUnaryTestHelper.hpp
new file mode 100644
index 0000000000..dcc7074753
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ElementwiseUnaryTestHelper.hpp
@@ -0,0 +1,189 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateElementwiseUnaryTfLiteModel(tflite::BuiltinOperator unaryOperatorCode,
+                                                    tflite::TensorType tensorType,
+                                                    const std::vector <int32_t>& tensorShape)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(), tensorShape.size()),
+                              tensorType);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(), tensorShape.size()),
+                              tensorType);
+
+    // create operator
+    const std::vector<int> operatorInputs{0};
+    const std::vector<int> operatorOutputs{1};
+    flatbuffers::Offset <Operator> unaryOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()));
+
+    const std::vector<int> subgraphInputs{0};
+    const std::vector<int> subgraphOutputs{1};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&unaryOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Elementwise Unary Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, unaryOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void ElementwiseUnaryFP32Test(tflite::BuiltinOperator unaryOperatorCode,
+                              std::vector<armnn::BackendId>& backends,
+                              std::vector<float>& inputValues,
+                              std::vector<float>& expectedOutputValues)
+{
+    using namespace tflite;
+    std::vector<int32_t> inputShape  { { 3, 1, 2} };
+    std::vector<char> modelBuffer = CreateElementwiseUnaryTfLiteModel(unaryOperatorCode,
+                                                                      ::tflite::TensorType_FLOAT32,
+                                                                      inputShape);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 0, inputValues);
+    armnnDelegate::FillInput(tfLiteInterpreter, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData(tfLiteInterpreter, armnnDelegateInterpreter, inputShape, expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+    tfLiteInterpreter.reset(nullptr);
+}
+
+void ElementwiseUnaryBoolTest(tflite::BuiltinOperator unaryOperatorCode,
+                              std::vector<armnn::BackendId>& backends,
+                              std::vector<int32_t>& inputShape,
+                              std::vector<bool>& inputValues,
+                              std::vector<bool>& expectedOutputValues)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateElementwiseUnaryTfLiteModel(unaryOperatorCode,
+                                                                      ::tflite::TensorType_BOOL,
+                                                                      inputShape);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 0, inputValues);
+    armnnDelegate::FillInput(tfLiteInterpreter, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data, comparing Boolean values is handled differently and needs to call the CompareData function
+    // directly instead. This is because Boolean types get converted to a bit representation in a vector.
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelegateOutputData = tfLiteInterpreter->typed_tensor<bool>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<bool>(armnnDelegateOutputId);
+
+    armnnDelegate::CompareData(expectedOutputValues, armnnDelegateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(expectedOutputValues, tfLiteDelegateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelegateOutputData, armnnDelegateOutputData, expectedOutputValues.size());
+
+    armnnDelegateInterpreter.reset(nullptr);
+    tfLiteInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
+
+
+
+
diff --git a/arch/arm/ARMnn/delegate/src/test/FillTest.cpp b/arch/arm/ARMnn/delegate/src/test/FillTest.cpp
new file mode 100644
index 0000000000..50f7f53d56
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/FillTest.cpp
@@ -0,0 +1,221 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "FillTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void Fill2dTest(std::vector<armnn::BackendId>& backends,
+               tflite::BuiltinOperator fillOperatorCode = tflite::BuiltinOperator_FILL,
+               float fill = 2.0f )
+{
+    std::vector<int32_t> inputShape { 2 };
+    std::vector<int32_t> tensorShape { 2, 2 };
+    std::vector<float> expectedOutputValues = { fill, fill,
+                                                fill, fill };
+
+    FillTest<float>(fillOperatorCode,
+                    ::tflite::TensorType_FLOAT32,
+                    backends,
+                    inputShape,
+                    tensorShape,
+                    expectedOutputValues,
+                    fill);
+}
+
+void Fill3dTest(std::vector<armnn::BackendId>& backends,
+               tflite::BuiltinOperator fillOperatorCode = tflite::BuiltinOperator_FILL,
+               float fill = 5.0f )
+{
+    std::vector<int32_t> inputShape { 3 };
+    std::vector<int32_t> tensorShape { 3, 3, 3 };
+    std::vector<float> expectedOutputValues = { fill, fill, fill,
+                                                fill, fill, fill,
+                                                fill, fill, fill,
+
+                                                fill, fill, fill,
+                                                fill, fill, fill,
+                                                fill, fill, fill,
+
+                                                fill, fill, fill,
+                                                fill, fill, fill,
+                                                fill, fill, fill };
+
+    FillTest<float>(fillOperatorCode,
+                    ::tflite::TensorType_FLOAT32,
+                    backends,
+                    inputShape,
+                    tensorShape,
+                    expectedOutputValues,
+                    fill);
+}
+
+void Fill4dTest(std::vector<armnn::BackendId>& backends,
+               tflite::BuiltinOperator fillOperatorCode = tflite::BuiltinOperator_FILL,
+               float fill = 3.0f )
+{
+    std::vector<int32_t> inputShape { 4 };
+    std::vector<int32_t> tensorShape { 2, 2, 4, 4 };
+    std::vector<float> expectedOutputValues = { fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill,
+                                                fill, fill, fill, fill };
+
+    FillTest<float>(fillOperatorCode,
+                    ::tflite::TensorType_FLOAT32,
+                    backends,
+                    inputShape,
+                    tensorShape,
+                    expectedOutputValues,
+                    fill);
+}
+
+void FillInt32Test(std::vector<armnn::BackendId>& backends,
+                  tflite::BuiltinOperator fillOperatorCode = tflite::BuiltinOperator_FILL,
+                  int32_t fill = 2 )
+{
+    std::vector<int32_t> inputShape { 2 };
+    std::vector<int32_t> tensorShape { 2, 2 };
+    std::vector<int32_t> expectedOutputValues = { fill, fill,
+                                                  fill, fill };
+
+    FillTest<int32_t>(fillOperatorCode,
+                      ::tflite::TensorType_INT32,
+                      backends,
+                      inputShape,
+                      tensorShape,
+                      expectedOutputValues,
+                      fill);
+}
+
+TEST_SUITE("Fill_CpuRefTests")
+{
+
+TEST_CASE ("Fill2d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Fill2dTest(backends);
+}
+
+TEST_CASE ("Fill3d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Fill3dTest(backends);
+}
+
+TEST_CASE ("Fill3d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Fill3dTest(backends);
+}
+
+TEST_CASE ("Fill4d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Fill4dTest(backends);
+}
+
+TEST_CASE ("FillInt32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FillInt32Test(backends);
+}
+
+}
+
+TEST_SUITE("Fill_CpuAccTests")
+{
+
+TEST_CASE ("Fill2d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Fill2dTest(backends);
+}
+
+TEST_CASE ("Fill3d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Fill3dTest(backends);
+}
+
+TEST_CASE ("Fill3d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Fill3dTest(backends);
+}
+
+TEST_CASE ("Fill4d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Fill4dTest(backends);
+}
+
+TEST_CASE ("FillInt32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    FillInt32Test(backends);
+}
+
+}
+
+TEST_SUITE("Fill_GpuAccTests")
+{
+
+TEST_CASE ("Fill2d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Fill2dTest(backends);
+}
+
+TEST_CASE ("Fill3d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Fill3dTest(backends);
+}
+
+TEST_CASE ("Fill3d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Fill3dTest(backends);
+}
+
+TEST_CASE ("Fill4d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Fill4dTest(backends);
+}
+
+TEST_CASE ("FillInt32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    FillInt32Test(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/FillTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/FillTestHelper.hpp
new file mode 100644
index 0000000000..e6890a2b2d
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/FillTestHelper.hpp
@@ -0,0 +1,160 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T>
+std::vector<char> CreateFillTfLiteModel(tflite::BuiltinOperator fillOperatorCode,
+                                        tflite::TensorType tensorType,
+                                        const std::vector<int32_t>& inputShape,
+                                        const std::vector <int32_t>& tensorShape,
+                                        const std::vector<T> fillValue)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector({})));
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(tensorShape.data()),
+                                                    sizeof(int32_t) * tensorShape.size())));
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(fillValue.data()),
+                                                    sizeof(T) * fillValue.size())));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputShape.data(),
+                                                                      inputShape.size()),
+                              tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("dims"));
+
+    std::vector<int32_t> fillShape = {};
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(fillShape.data(),
+                                                                      fillShape.size()),
+                              tensorType,
+                              2,
+                              flatBufferBuilder.CreateString("value"));
+
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"));
+
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_FillOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateFillOptions(flatBufferBuilder).Union();
+
+    // create operator
+    const std::vector<int> operatorInputs{ {0, 1} };
+    const std::vector<int> operatorOutputs{ 2 };
+    flatbuffers::Offset <Operator> fillOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{ 2 };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&fillOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Fill Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         fillOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+
+}
+
+template <typename T>
+void FillTest(tflite::BuiltinOperator fillOperatorCode,
+              tflite::TensorType tensorType,
+              const std::vector<armnn::BackendId>& backends,
+              std::vector<int32_t >& inputShape,
+              std::vector<int32_t >& tensorShape,
+              std::vector<T>& expectedOutputValues,
+              T fillValue)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateFillTfLiteModel<T>(fillOperatorCode,
+                                                             tensorType,
+                                                             inputShape,
+                                                             tensorShape,
+                                                             {fillValue});
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    CHECK(tfLiteModel != nullptr);
+
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter, armnnDelegateInterpreter, tensorShape, expectedOutputValues);
+}
+
+} // anonymous namespace
diff --git a/arch/arm/ARMnn/delegate/src/test/FullyConnectedTest.cpp b/arch/arm/ARMnn/delegate/src/test/FullyConnectedTest.cpp
new file mode 100644
index 0000000000..c300bc72bf
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/FullyConnectedTest.cpp
@@ -0,0 +1,178 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "FullyConnectedTestHelper.hpp"
+
+namespace
+{
+
+void FullyConnectedFp32Test(std::vector<armnn::BackendId>& backends, bool constantWeights = true)
+{
+    std::vector<int32_t> inputTensorShape   { 1, 4, 1, 1 };
+    std::vector<int32_t> weightsTensorShape { 1, 4 };
+    std::vector<int32_t> biasTensorShape    { 1 };
+    std::vector<int32_t> outputTensorShape  { 1, 1 };
+
+    std::vector<float> inputValues = { 10, 20, 30, 40 };
+    std::vector<float> weightsData = { 2, 3, 4, 5 };
+
+    std::vector<float> expectedOutputValues = { (400 + 10) };
+
+    // bias is set std::vector<float> biasData = { 10 } in the model
+    FullyConnectedTest<float>(backends,
+                              ::tflite::TensorType_FLOAT32,
+                              tflite::ActivationFunctionType_NONE,
+                              inputTensorShape,
+                              weightsTensorShape,
+                              biasTensorShape,
+                              outputTensorShape,
+                              inputValues,
+                              expectedOutputValues,
+                              weightsData,
+                              constantWeights);
+}
+
+void FullyConnectedActivationTest(std::vector<armnn::BackendId>& backends, bool constantWeights = true)
+{
+    std::vector<int32_t> inputTensorShape   { 1, 4, 1, 1 };
+    std::vector<int32_t> weightsTensorShape { 1, 4 };
+    std::vector<int32_t> biasTensorShape    { 1 };
+    std::vector<int32_t> outputTensorShape  { 1, 1 };
+
+    std::vector<float> inputValues = { -10, 20, 30, 40 };
+    std::vector<float> weightsData = { 2, 3, 4, -5 };
+
+    std::vector<float> expectedOutputValues = { 0 };
+
+    // bias is set std::vector<float> biasData = { 10 } in the model
+    FullyConnectedTest<float>(backends,
+                              ::tflite::TensorType_FLOAT32,
+                              tflite::ActivationFunctionType_RELU,
+                              inputTensorShape,
+                              weightsTensorShape,
+                              biasTensorShape,
+                              outputTensorShape,
+                              inputValues,
+                              expectedOutputValues,
+                              weightsData,
+                              constantWeights);
+}
+
+void FullyConnectedInt8Test(std::vector<armnn::BackendId>& backends, bool constantWeights = true)
+{
+    std::vector<int32_t> inputTensorShape   { 1, 4, 2, 1 };
+    std::vector<int32_t> weightsTensorShape { 1, 4 };
+    std::vector<int32_t> biasTensorShape    { 1 };
+    std::vector<int32_t> outputTensorShape  { 2, 1 };
+
+    std::vector<int8_t> inputValues = { 1, 2, 3, 4, 5, 10, 15, 20 };
+    std::vector<int8_t> weightsData = { 2, 3, 4, 5 };
+
+    std::vector<int8_t> expectedOutputValues = { 25, 105 };  // (40 + 10) / 2, (200 + 10) / 2
+
+    // bias is set std::vector<int32_t> biasData = { 10 } in the model
+    // input and weights quantization scale 1.0f and offset 0 in the model
+    // output quantization scale 2.0f and offset 0 in the model
+    FullyConnectedTest<int8_t>(backends,
+                                ::tflite::TensorType_INT8,
+                                tflite::ActivationFunctionType_NONE,
+                                inputTensorShape,
+                                weightsTensorShape,
+                                biasTensorShape,
+                                outputTensorShape,
+                                inputValues,
+                                expectedOutputValues,
+                                weightsData,
+                                constantWeights);
+}
+
+TEST_SUITE("FullyConnected_GpuAccTests")
+{
+
+TEST_CASE ("FullyConnected_FP32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    FullyConnectedFp32Test(backends);
+}
+
+TEST_CASE ("FullyConnected_Int8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    FullyConnectedInt8Test(backends);
+}
+
+TEST_CASE ("FullyConnected_Activation_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    FullyConnectedActivationTest(backends);
+}
+
+} // End of TEST_SUITE("FullyConnected_GpuAccTests")
+
+TEST_SUITE("FullyConnected_CpuAccTests")
+{
+
+TEST_CASE ("FullyConnected_FP32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    FullyConnectedFp32Test(backends);
+}
+
+TEST_CASE ("FullyConnected_Int8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    FullyConnectedInt8Test(backends);
+}
+
+TEST_CASE ("FullyConnected_Activation_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    FullyConnectedActivationTest(backends);
+}
+
+} // End of TEST_SUITE("FullyConnected_CpuAccTests")
+
+TEST_SUITE("FullyConnected_CpuRefTests")
+{
+
+TEST_CASE ("FullyConnected_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedFp32Test(backends);
+}
+
+TEST_CASE ("FullyConnected_Int8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedInt8Test(backends);
+}
+
+TEST_CASE ("FullyConnected_Activation_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedActivationTest(backends);
+}
+
+TEST_CASE ("FullyConnected_Weights_As_Inputs_FP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedFp32Test(backends, false);
+}
+
+TEST_CASE ("FullyConnected_Weights_As_Inputs_Int8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedInt8Test(backends, false);
+}
+
+TEST_CASE ("FullyConnected_Weights_As_Inputs_Activation_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    FullyConnectedActivationTest(backends, false);
+}
+
+} // End of TEST_SUITE("FullyConnected_CpuRefTests")
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/FullyConnectedTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/FullyConnectedTestHelper.hpp
new file mode 100644
index 0000000000..37062c3400
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/FullyConnectedTestHelper.hpp
@@ -0,0 +1,253 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T>
+std::vector<char> CreateFullyConnectedTfLiteModel(tflite::TensorType tensorType,
+                                                  tflite::ActivationFunctionType activationType,
+                                                  const std::vector <int32_t>& inputTensorShape,
+                                                  const std::vector <int32_t>& weightsTensorShape,
+                                                  const std::vector <int32_t>& biasTensorShape,
+                                                  std::vector <int32_t>& outputTensorShape,
+                                                  std::vector <T>& weightsData,
+                                                  bool constantWeights = true,
+                                                  float quantScale = 1.0f,
+                                                  int quantOffset  = 0,
+                                                  float outputQuantScale = 2.0f,
+                                                  int outputQuantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+
+    auto biasTensorType = ::tflite::TensorType_FLOAT32;
+    if (tensorType == ::tflite::TensorType_INT8)
+    {
+        biasTensorType = ::tflite::TensorType_INT32;
+    }
+    if (constantWeights)
+    {
+        buffers[1] = CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(weightsData.data()),
+                                                    sizeof(T) * weightsData.size()));
+
+        if (tensorType == ::tflite::TensorType_INT8)
+        {
+            std::vector<int32_t> biasData = { 10 };
+            buffers[2] = CreateBuffer(flatBufferBuilder,
+                                      flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                                     sizeof(int32_t) * biasData.size()));
+
+        }
+        else
+        {
+            std::vector<float> biasData = { 10 };
+            buffers[2] = CreateBuffer(flatBufferBuilder,
+                                      flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(biasData.data()),
+                                                                     sizeof(float) * biasData.size()));
+        }
+    }
+    else
+    {
+        buffers[1] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+        buffers[2] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    }
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    auto outputQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_0"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(weightsTensorShape.data(),
+                                                                      weightsTensorShape.size()),
+                              tensorType,
+                              1,
+                              flatBufferBuilder.CreateString("weights"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(biasTensorShape.data(),
+                                                                      biasTensorShape.size()),
+                              biasTensorType,
+                              2,
+                              flatBufferBuilder.CreateString("bias"),
+                              quantizationParameters);
+
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              outputQuantizationParameters);
+
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_FullyConnectedOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateFullyConnectedOptions(flatBufferBuilder,
+                                    activationType,
+                                    FullyConnectedOptionsWeightsFormat_DEFAULT, false).Union();
+
+    const std::vector<int> operatorInputs{0, 1, 2};
+    const std::vector<int> operatorOutputs{3};
+    flatbuffers::Offset <Operator> fullyConnectedOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType, operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0, 1, 2};
+    const std::vector<int> subgraphOutputs{3};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&fullyConnectedOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: FullyConnected Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         tflite::BuiltinOperator_FULLY_CONNECTED);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void FullyConnectedTest(std::vector<armnn::BackendId>& backends,
+                        tflite::TensorType tensorType,
+                        tflite::ActivationFunctionType activationType,
+                        const std::vector <int32_t>& inputTensorShape,
+                        const std::vector <int32_t>& weightsTensorShape,
+                        const std::vector <int32_t>& biasTensorShape,
+                        std::vector <int32_t>& outputTensorShape,
+                        std::vector <T>& inputValues,
+                        std::vector <T>& expectedOutputValues,
+                        std::vector <T>& weightsData,
+                        bool constantWeights = true,
+                        float quantScale = 1.0f,
+                        int quantOffset  = 0)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer = CreateFullyConnectedTfLiteModel(tensorType,
+                                                                    activationType,
+                                                                    inputTensorShape,
+                                                                    weightsTensorShape,
+                                                                    biasTensorShape,
+                                                                    outputTensorShape,
+                                                                    weightsData,
+                                                                    constantWeights,
+                                                                    quantScale,
+                                                                    quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+    if (!constantWeights)
+    {
+        armnnDelegate::FillInput<T>(tfLiteInterpreter, 1, weightsData);
+        armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 1, weightsData);
+
+        if (tensorType == ::tflite::TensorType_INT8)
+        {
+            std::vector <int32_t> biasData = {10};
+            armnnDelegate::FillInput<int32_t>(tfLiteInterpreter, 2, biasData);
+            armnnDelegate::FillInput<int32_t>(armnnDelegateInterpreter, 2, biasData);
+        }
+        else
+        {
+            std::vector<float> biasData = {10};
+            armnnDelegate::FillInput<float>(tfLiteInterpreter, 2, biasData);
+            armnnDelegate::FillInput<float>(armnnDelegateInterpreter, 2, biasData);
+        }
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        outputTensorShape,
+                                        expectedOutputValues);
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/GatherTest.cpp b/arch/arm/ARMnn/delegate/src/test/GatherTest.cpp
new file mode 100644
index 0000000000..6dd015173c
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/GatherTest.cpp
@@ -0,0 +1,117 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "GatherTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// GATHER Operator
+void GatherUint8Test(std::vector<armnn::BackendId>& backends)
+{
+
+    std::vector<int32_t> paramsShape{8};
+    std::vector<int32_t> indicesShape{3};
+    std::vector<int32_t> expectedOutputShape{3};
+
+    int32_t              axis = 0;
+    std::vector<uint8_t> paramsValues{1, 2, 3, 4, 5, 6, 7, 8};
+    std::vector<int32_t> indicesValues{7, 6, 5};
+    std::vector<uint8_t> expectedOutputValues{8, 7, 6};
+
+    GatherTest<uint8_t>(::tflite::TensorType_UINT8,
+                        backends,
+                        paramsShape,
+                        indicesShape,
+                        expectedOutputShape,
+                        axis,
+                        paramsValues,
+                        indicesValues,
+                        expectedOutputValues);
+}
+
+void GatherFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> paramsShape{8};
+    std::vector<int32_t> indicesShape{3};
+    std::vector<int32_t> expectedOutputShape{3};
+
+    int32_t              axis = 0;
+    std::vector<float>   paramsValues{1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f};
+    std::vector<int32_t> indicesValues{7, 6, 5};
+    std::vector<float>   expectedOutputValues{8.8f, 7.7f, 6.6f};
+
+    GatherTest<float>(::tflite::TensorType_FLOAT32,
+                      backends,
+                      paramsShape,
+                      indicesShape,
+                      expectedOutputShape,
+                      axis,
+                      paramsValues,
+                      indicesValues,
+                      expectedOutputValues);
+}
+
+// GATHER Test Suite
+TEST_SUITE("GATHER_CpuRefTests")
+{
+
+TEST_CASE ("GATHER_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    GatherUint8Test(backends);
+}
+
+TEST_CASE ("GATHER_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    GatherFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("GATHER_CpuAccTests")
+{
+
+TEST_CASE ("GATHER_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    GatherUint8Test(backends);
+}
+
+TEST_CASE ("GATHER_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    GatherFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("GATHER_GpuAccTests")
+{
+
+TEST_CASE ("GATHER_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    GatherUint8Test(backends);
+}
+
+TEST_CASE ("GATHER_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    GatherFp32Test(backends);
+}
+
+}
+// End of GATHER Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/GatherTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/GatherTestHelper.hpp
new file mode 100644
index 0000000000..fcacf04134
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/GatherTestHelper.hpp
@@ -0,0 +1,181 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateGatherTfLiteModel(tflite::TensorType tensorType,
+                                          std::vector<int32_t>& paramsShape,
+                                          std::vector<int32_t>& indicesShape,
+                                          const std::vector<int32_t>& expectedOutputShape,
+                                          int32_t axis,
+                                          float quantScale = 1.0f,
+                                          int quantOffset = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+             CreateQuantizationParameters(flatBufferBuilder,
+                                          0,
+                                          0,
+                                          flatBufferBuilder.CreateVector<float>({quantScale}),
+                                          flatBufferBuilder.CreateVector<int64_t>({quantOffset}));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(paramsShape.data(),
+                                                                      paramsShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("params"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(indicesShape.data(),
+                                                                      indicesShape.size()),
+                              ::tflite::TensorType_INT32,
+                              0,
+                              flatBufferBuilder.CreateString("indices"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(expectedOutputShape.data(),
+                                                                      expectedOutputShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+
+    // create operator
+    tflite::BuiltinOptions    operatorBuiltinOptionsType = tflite::BuiltinOptions_GatherOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions     = CreateGatherOptions(flatBufferBuilder).Union();
+
+    const std::vector<int>        operatorInputs{{0, 1}};
+    const std::vector<int>        operatorOutputs{2};
+    flatbuffers::Offset<Operator> controlOperator        =
+                                      CreateOperator(flatBufferBuilder,
+                                                     0,
+                                                     flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(),
+                                                                                             operatorInputs.size()),
+                                                     flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(),
+                                                                                             operatorOutputs.size()),
+                                                     operatorBuiltinOptionsType,
+                                                     operatorBuiltinOptions);
+
+    const std::vector<int>        subgraphInputs{{0, 1}};
+    const std::vector<int>        subgraphOutputs{2};
+    flatbuffers::Offset<SubGraph> subgraph               =
+                                      CreateSubGraph(flatBufferBuilder,
+                                                     flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                                                     flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(),
+                                                                                             subgraphInputs.size()),
+                                                     flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(),
+                                                                                             subgraphOutputs.size()),
+                                                     flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset<flatbuffers::String> modelDescription =
+                                                 flatBufferBuilder.CreateString("ArmnnDelegate: GATHER Operator Model");
+    flatbuffers::Offset<OperatorCode>        operatorCode     = CreateOperatorCode(flatBufferBuilder,
+                                                                                   BuiltinOperator_GATHER);
+
+    flatbuffers::Offset<Model> flatbufferModel =
+                                   CreateModel(flatBufferBuilder,
+                                               TFLITE_SCHEMA_VERSION,
+                                               flatBufferBuilder.CreateVector(&operatorCode, 1),
+                                               flatBufferBuilder.CreateVector(&subgraph, 1),
+                                               modelDescription,
+                                               flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template<typename T>
+void GatherTest(tflite::TensorType tensorType,
+                std::vector<armnn::BackendId>& backends,
+                std::vector<int32_t>& paramsShape,
+                std::vector<int32_t>& indicesShape,
+                std::vector<int32_t>& expectedOutputShape,
+                int32_t axis,
+                std::vector<T>& paramsValues,
+                std::vector<int32_t>& indicesValues,
+                std::vector<T>& expectedOutputValues,
+                float quantScale = 1.0f,
+                int quantOffset = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateGatherTfLiteModel(tensorType,
+                                                            paramsShape,
+                                                            indicesShape,
+                                                            expectedOutputShape,
+                                                            axis,
+                                                            quantScale,
+                                                            quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, paramsValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, paramsValues);
+    armnnDelegate::FillInput<int32_t>(tfLiteDelegate, 1, indicesValues);
+    armnnDelegate::FillInput<int32_t>(armnnDelegate, 1, indicesValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                        armnnDelegate,
+                                        expectedOutputShape,
+                                        expectedOutputValues,
+                                        0);
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+}
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/LogicalTest.cpp b/arch/arm/ARMnn/delegate/src/test/LogicalTest.cpp
new file mode 100644
index 0000000000..9fa2d3dde0
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/LogicalTest.cpp
@@ -0,0 +1,226 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ElementwiseUnaryTestHelper.hpp"
+#include "LogicalTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void LogicalBinaryAndBoolTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2 };
+
+    // Set input and output values
+    std::vector<bool> input0Values { 0, 0, 1, 1 };
+    std::vector<bool> input1Values { 0, 1, 0, 1 };
+    std::vector<bool> expectedOutputValues { 0, 0, 0, 1 };
+
+    LogicalBinaryTest<bool>(tflite::BuiltinOperator_LOGICAL_AND,
+                            ::tflite::TensorType_BOOL,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+void LogicalBinaryAndBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2 };
+
+    std::vector<bool> input0Values { 0, 1, 0, 1 };
+    std::vector<bool> input1Values { 1 };
+    std::vector<bool> expectedOutputValues { 0, 1, 0, 1 };
+
+    LogicalBinaryTest<bool>(tflite::BuiltinOperator_LOGICAL_AND,
+                            ::tflite::TensorType_BOOL,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+void LogicalBinaryOrBoolTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2 };
+
+    std::vector<bool> input0Values { 0, 0, 1, 1 };
+    std::vector<bool> input1Values { 0, 1, 0, 1 };
+    std::vector<bool> expectedOutputValues { 0, 1, 1, 1 };
+
+    LogicalBinaryTest<bool>(tflite::BuiltinOperator_LOGICAL_OR,
+                            ::tflite::TensorType_BOOL,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+void LogicalBinaryOrBroadcastTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2 };
+
+    std::vector<bool> input0Values { 0, 1, 0, 1 };
+    std::vector<bool> input1Values { 1 };
+    std::vector<bool> expectedOutputValues { 1, 1, 1, 1 };
+
+    LogicalBinaryTest<bool>(tflite::BuiltinOperator_LOGICAL_OR,
+                            ::tflite::TensorType_BOOL,
+                            backends,
+                            input0Shape,
+                            input1Shape,
+                            expectedOutputShape,
+                            input0Values,
+                            input1Values,
+                            expectedOutputValues);
+}
+
+// LogicalNot operator uses ElementwiseUnary unary layer and descriptor but is still classed as logical operator.
+void LogicalNotBoolTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2 };
+
+    std::vector<bool> inputValues { 0, 1, 0, 1 };
+    std::vector<bool> expectedOutputValues { 1, 0, 1, 0 };
+
+    ElementwiseUnaryBoolTest(tflite::BuiltinOperator_LOGICAL_NOT,
+                             backends,
+                             inputShape,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+TEST_SUITE("LogicalBinaryTests_GpuAccTests")
+{
+
+TEST_CASE ("LogicalBinary_AND_Bool_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LogicalBinaryAndBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_AND_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LogicalBinaryAndBroadcastTest(backends);
+}
+
+TEST_CASE ("Logical_NOT_Bool_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LogicalNotBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_OR_Bool_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LogicalBinaryOrBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_OR_Broadcast_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LogicalBinaryOrBroadcastTest(backends);
+}
+
+}
+
+
+TEST_SUITE("LogicalBinaryTests_CpuAccTests")
+{
+
+TEST_CASE ("LogicalBinary_AND_Bool_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LogicalBinaryAndBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_AND_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LogicalBinaryAndBroadcastTest(backends);
+}
+
+TEST_CASE ("Logical_NOT_Bool_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LogicalNotBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_OR_Bool_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LogicalBinaryOrBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_OR_Broadcast_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LogicalBinaryOrBroadcastTest(backends);
+}
+
+}
+
+
+TEST_SUITE("LogicalBinaryTests_CpuRefTests")
+{
+
+TEST_CASE ("LogicalBinary_AND_Bool_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LogicalBinaryAndBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_AND_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LogicalBinaryAndBroadcastTest(backends);
+}
+
+TEST_CASE ("Logical_NOT_Bool_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LogicalNotBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_OR_Bool_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LogicalBinaryOrBoolTest(backends);
+}
+
+TEST_CASE ("LogicalBinary_OR_Broadcast_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LogicalBinaryOrBroadcastTest(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/LogicalTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/LogicalTestHelper.hpp
new file mode 100644
index 0000000000..d08a1af388
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/LogicalTestHelper.hpp
@@ -0,0 +1,198 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateLogicalBinaryTfLiteModel(tflite::BuiltinOperator logicalOperatorCode,
+                                                 tflite::TensorType tensorType,
+                                                 const std::vector <int32_t>& input0TensorShape,
+                                                 const std::vector <int32_t>& input1TensorShape,
+                                                 const std::vector <int32_t>& outputTensorShape,
+                                                 float quantScale = 1.0f,
+                                                 int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input0TensorShape.data(),
+                                                                      input0TensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_0"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input1TensorShape.data(),
+                                                                      input1TensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_1"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+    flatbuffers::Offset<void> operatorBuiltinOptions = 0;
+    switch (logicalOperatorCode)
+    {
+        case BuiltinOperator_LOGICAL_AND:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_LogicalAndOptions;
+            operatorBuiltinOptions = CreateLogicalAndOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_LOGICAL_OR:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_LogicalOrOptions;
+            operatorBuiltinOptions = CreateLogicalOrOptions(flatBufferBuilder).Union();
+            break;
+        }
+        default:
+            break;
+    }
+    const std::vector<int32_t> operatorInputs{ {0, 1} };
+    const std::vector<int32_t> operatorOutputs{ 2 };
+    flatbuffers::Offset <Operator> logicalBinaryOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{ 2 };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&logicalBinaryOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Logical Binary Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, logicalOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void LogicalBinaryTest(tflite::BuiltinOperator logicalOperatorCode,
+                       tflite::TensorType tensorType,
+                       std::vector<armnn::BackendId>& backends,
+                       std::vector<int32_t>& input0Shape,
+                       std::vector<int32_t>& input1Shape,
+                       std::vector<int32_t>& expectedOutputShape,
+                       std::vector<T>& input0Values,
+                       std::vector<T>& input1Values,
+                       std::vector<T>& expectedOutputValues,
+                       float quantScale = 1.0f,
+                       int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateLogicalBinaryTfLiteModel(logicalOperatorCode,
+                                                                   tensorType,
+                                                                   input0Shape,
+                                                                   input1Shape,
+                                                                   expectedOutputShape,
+                                                                   quantScale,
+                                                                   quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data for the armnn interpreter
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 0, input0Values);
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 1, input1Values);
+
+    // Set input data for the tflite interpreter
+    armnnDelegate::FillInput(tfLiteInterpreter, 0, input0Values);
+    armnnDelegate::FillInput(tfLiteInterpreter, 1, input1Values);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data, comparing Boolean values is handled differently and needs to call the CompareData function
+    // directly. This is because Boolean types get converted to a bit representation in a vector.
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelegateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+
+    armnnDelegate::CompareData(expectedOutputValues, armnnDelegateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(expectedOutputValues, tfLiteDelegateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelegateOutputData, armnnDelegateOutputData, expectedOutputValues.size());
+
+    armnnDelegateInterpreter.reset(nullptr);
+    tfLiteInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/LstmTest.cpp b/arch/arm/ARMnn/delegate/src/test/LstmTest.cpp
new file mode 100644
index 0000000000..1fa9f0c8bf
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/LstmTest.cpp
@@ -0,0 +1,189 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "LstmTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void LstmTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 2;
+    int32_t inputSize = 2;
+    int32_t outputSize = 4;
+    // cellSize and outputSize have the same size when there is no projection.
+    int32_t numUnits = outputSize;
+
+    std::vector<int32_t> inputShape {batchSize , inputSize};
+    std::vector<int32_t> cellStateInTensorInfo {batchSize , numUnits};
+    std::vector<int32_t> outputStateInTensorInfo {batchSize , outputSize};
+
+    std::vector<int32_t> scratchBufferTensorInfo {batchSize, numUnits * 4};
+    std::vector<int32_t> cellStateOutTensorInfo {batchSize, numUnits};
+    std::vector<int32_t> outputStateOutTensorInfo {batchSize, outputSize};
+    std::vector<int32_t> outputTensorInfo {batchSize, outputSize};
+
+    std::vector<int32_t> tensorInfo4 {numUnits};
+    std::vector<int32_t> tensorInfo8 {numUnits, 2};
+    std::vector<int32_t> tensorInfo16 {numUnits, 4};
+
+    //tensorInfo8,
+    bool hasInputToInputWeights = true;
+    std::vector<float> inputToInputWeights {-0.45018822f, -0.02338299f, -0.0870589f,
+                                            -0.34550029f, 0.04266912f, -0.15680569f,
+                                            -0.34856534f, 0.43890524f};
+
+    std::vector<float> inputToForgetWeights {0.09701663f, 0.20334584f, -0.50592935f,
+                                             -0.31343272f, -0.40032279f, 0.44781327f,
+                                             0.01387155f, -0.35593212f};
+
+    std::vector<float> inputToCellWeights {-0.50013041f, 0.1370284f, 0.11810488f, 0.2013163f,
+                                           -0.20583314f, 0.44344562f, 0.22077113f,
+                                           -0.29909778f};
+
+    std::vector<float> inputToOutputWeights {-0.25065863f, -0.28290087f, 0.04613829f,
+                                             0.40525138f, 0.44272184f, 0.03897077f,
+                                             -0.1556896f, 0.19487578f};
+
+    //tensorInfo16,
+    bool hasRecurrentToInputWeights = true;
+    std::vector<float> recurrentToInputWeights {-0.0063535f, -0.2042388f, 0.31454784f,
+                                                -0.35746509f, 0.28902304f, 0.08183324f,
+                                                -0.16555229f, 0.02286911f, -0.13566875f,
+                                                0.03034258f, 0.48091322f, -0.12528998f,
+                                                0.24077177f, -0.51332325f, -0.33502164f,
+                                                0.10629296f};
+
+    std::vector<float> recurrentToForgetWeights {-0.48684245f, -0.06655136f, 0.42224967f,
+                                                 0.2112639f, 0.27654213f, 0.20864892f,
+                                                 -0.07646349f, 0.45877004f, 0.00141793f,
+                                                 -0.14609534f, 0.36447752f, 0.09196436f,
+                                                 0.28053468f, 0.01560611f, -0.20127171f,
+                                                 -0.01140004f};
+
+    std::vector<float> recurrentToCellWeights {-0.3407414f, 0.24443203f, -0.2078532f,
+                                               0.26320225f, 0.05695659f, -0.00123841f,
+                                               -0.4744786f, -0.35869038f, -0.06418842f,
+                                               -0.13502428f, -0.501764f, 0.22830659f,
+                                               -0.46367589f, 0.26016325f, -0.03894562f,
+                                               -0.16368064f};
+
+    std::vector<float> recurrentToOutputWeights {0.43385774f, -0.17194885f, 0.2718237f,
+                                                 0.09215671f, 0.24107647f, -0.39835793f,
+                                                 0.18212086f, 0.01301402f, 0.48572797f,
+                                                 -0.50656658f, 0.20047462f, -0.20607421f,
+                                                 -0.51818722f, -0.15390486f, 0.0468148f,
+                                                 0.39922136f};
+    // tensorInfo4
+    bool hasCellToInputWeights = false;
+    std::vector<float> cellToInputWeights {};
+    bool hasCellToForgetWeights = false;
+    std::vector<float> cellToForgetWeights {};
+    bool hasCellToOutputWeights = false;
+    std::vector<float> cellToOutputWeights {};
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias {0., 0., 0., 0.};
+    std::vector<float> forgetGateBias {1., 1., 1., 1.};
+    std::vector<float> cellBias {0., 0., 0., 0.};
+    std::vector<float> outputGateBias {0., 0., 0., 0.};
+
+    bool hasProjectionWeights = false;
+    std::vector<float> projectionWeights;
+    bool hasProjectionBias = false;
+    std::vector<float> projectionBias;
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues {2., 3., 3., 4.};
+    std::vector<float> expectedOutputValues {-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
+                                             -0.0185422f,   0.11281417f,  0.24466537f, -0.1826292f};
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 0.f;
+    float clippingThresProj = 0.f;
+
+    LstmTestImpl<float>(backends,
+                        ::tflite::TensorType_FLOAT32,
+                        batchSize,
+                        inputSize,
+                        outputSize,
+                        numUnits,
+                        hasInputToInputWeights,
+                        inputToInputWeights,
+                        inputToForgetWeights,
+                        inputToCellWeights,
+                        inputToOutputWeights,
+                        hasRecurrentToInputWeights,
+                        recurrentToInputWeights,
+                        recurrentToForgetWeights,
+                        recurrentToCellWeights,
+                        recurrentToOutputWeights,
+                        hasCellToInputWeights,
+                        cellToInputWeights,
+                        hasCellToForgetWeights,
+                        cellToForgetWeights,
+                        hasCellToOutputWeights,
+                        cellToOutputWeights,
+                        hasInputGateBias,
+                        inputGateBias,
+                        forgetGateBias,
+                        cellBias,
+                        outputGateBias,
+                        hasProjectionWeights,
+                        projectionWeights,
+                        hasProjectionBias,
+                        projectionBias,
+                        hasInputLayerNormWeights,
+                        inputLayerNormWeights,
+                        hasForgetLayerNormWeights,
+                        forgetLayerNormWeights,
+                        hasCellLayerNormWeights,
+                        cellLayerNormWeights,
+                        hasOutputLayerNormWeights,
+                        outputLayerNormWeights,
+                        inputValues,
+                        expectedOutputValues,
+                        activationFunction,
+                        clippingThresCell,
+                        clippingThresProj);
+}
+
+TEST_SUITE("LstmTest_CpuRefTests")
+{
+
+TEST_CASE ("LstmTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    LstmTest(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_CpuRef")
+
+TEST_SUITE("LstmTest_CpuAccTests")
+{
+
+TEST_CASE ("LstmTest_CpuAcc_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    LstmTest(backends);
+}
+
+} //End of TEST_SUITE("Convolution2dTest_CpuAcc")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/LstmTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/LstmTestHelper.hpp
new file mode 100644
index 0000000000..36a606119a
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/LstmTestHelper.hpp
@@ -0,0 +1,691 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+#include <tensorflow/lite/c/common.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T>
+std::vector<char> CreateLstmTfLiteModel(tflite::TensorType tensorType,
+                                        int32_t batchSize,
+                                        int32_t inputSize,
+                                        int32_t outputSize,
+                                        int32_t numUnits,
+                                        bool hasInputToInputWeights,
+                                        const std::vector<T>& inputToInputWeights,
+                                        const std::vector<T>& inputToForgetWeights,
+                                        const std::vector<T>& inputToCellWeights,
+                                        const std::vector<T>& inputToOutputWeights,
+                                        bool hasRecurrentToInputWeights,
+                                        const std::vector<T>& recurrentToInputWeights,
+                                        const std::vector<T>& recurrentToForgetWeights,
+                                        const std::vector<T>& recurrentToCellWeights,
+                                        const std::vector<T>& recurrentToOutputWeights,
+                                        bool hasCellToInputWeights,
+                                        const std::vector<T>& cellToInputWeights,
+                                        bool hasCellToForgetWeights,
+                                        const std::vector<T>& cellToForgetWeights,
+                                        bool hasCellToOutputWeights,
+                                        const std::vector<T>& cellToOutputWeights,
+                                        bool hasInputGateBias,
+                                        const std::vector<T>& inputGateBias,
+                                        const std::vector<T>& forgetGateBias,
+                                        const std::vector<T>& cellBias,
+                                        const std::vector<T>& outputGateBias,
+                                        bool hasProjectionWeights,
+                                        const std::vector<T>& projectionWeights,
+                                        bool hasProjectionBias,
+                                        const std::vector<T>& projectionBias,
+                                        bool hasInputLayerNormWeights,
+                                        const std::vector<T>& inputLayerNormWeights,
+                                        bool hasForgetLayerNormWeights,
+                                        const std::vector<T>& forgetLayerNormWeights,
+                                        bool hasCellLayerNormWeights,
+                                        const std::vector<T>& cellLayerNormWeights,
+                                        bool hasOutputLayerNormWeights,
+                                        const std::vector<T>& outputLayerNormWeights,
+                                        tflite::ActivationFunctionType activationFunction,
+                                        float clippingThresCell,
+                                        float clippingThresProj,
+                                        float quantScale = 1.0f,
+                                        int quantOffset  = 0,
+                                        float outputQuantScale = 2.0f,
+                                        int outputQuantOffset  = 0)
+{
+
+    std::vector <int32_t> tensorInfo0 {};
+    std::vector <int32_t> tensorInfo4 {numUnits};
+    std::vector <int32_t> tensorInfo8 {numUnits, static_cast<int32_t>(2)};
+    std::vector <int32_t> tensorInfo16 {numUnits, static_cast<int32_t>(4)};
+
+    std::vector<int32_t> inputShape {batchSize , inputSize};
+    std::vector<int32_t> outputShape {batchSize , outputSize};
+
+    std::vector<int32_t> outputStateInDimensions{batchSize, outputSize};
+    std::vector<int32_t> cellStateInDimensions{batchSize, numUnits};
+
+    std::vector<int> operatorInputs;
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    std::vector<flatbuffers::Offset<Tensor>> tensors;
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    auto outputQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(inputShape.data(),
+                                                                           inputShape.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("input_0"),
+                                   quantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasInputToInputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToInputWeights.data()),
+                                                        sizeof(T) * inputToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo8.data(),
+                                                                               tensorInfo8.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputToInputWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToForgetWeights.data()),
+                                                    sizeof(T) * inputToForgetWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo8.data(),
+                                                                           tensorInfo8.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToForgetWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToCellWeights.data()),
+                                                    sizeof(T) * inputToCellWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo8.data(),
+                                                                           tensorInfo8.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToCellWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToOutputWeights.data()),
+                                                    sizeof(T) * inputToOutputWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo8.data(),
+                                                                           tensorInfo8.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToOutputWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasRecurrentToInputWeights)
+    {
+        buffers.push_back(CreateBuffer(
+            flatBufferBuilder,
+            flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(recurrentToInputWeights.data()),
+                                           sizeof(T) * recurrentToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo16.data(),
+                                                                               tensorInfo16.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("recurrentToInputWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToForgetWeights.data()),
+                                                    sizeof(T) * recurrentToForgetWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo16.data(),
+                                                                           tensorInfo16.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("recurrentToForgetWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToCellWeights.data()),
+                                                    sizeof(T) * recurrentToCellWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo16.data(),
+                                                                           tensorInfo16.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("recurrentToCellWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToOutputWeights.data()),
+                                                    sizeof(T) * recurrentToOutputWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo16.data(),
+                                                                           tensorInfo16.size()),
+                                   tensorType,
+                                   buffers.size() - 1 ,
+                                   flatBufferBuilder.CreateString("recurrentToOutputWeights"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasCellToInputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToInputWeights.data()),
+                                                        sizeof(T) * cellToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToInputWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellToForgetWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToForgetWeights.data()),
+                                                        sizeof(T) * cellToForgetWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToForgetWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellToOutputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToOutputWeights.data()),
+                                                        sizeof(T) * cellToOutputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToOutputWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasInputGateBias)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(inputGateBias.data()),
+                                                        sizeof(T) * inputGateBias.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputGateBias"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(forgetGateBias.data()),
+                                                    sizeof(T) * forgetGateBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                           tensorInfo4.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("forgetGateBias"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(cellBias.data()),
+                                                    sizeof(T) * cellBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                           tensorInfo4.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("cellBias"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(outputGateBias.data()),
+                                                    sizeof(T) * outputGateBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                           tensorInfo4.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("outputGateBias"),
+                                   outputQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasProjectionWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(projectionWeights.data()),
+                                                        sizeof(T) * projectionWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("outputGateBias"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasProjectionBias)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(projectionBias.data()),
+                                                        sizeof(T) * projectionBias.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("projectionBias"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(outputStateInDimensions.data(),
+                                                                           outputStateInDimensions.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("outputStateInInfo"),
+                                   outputQuantizationParameters,
+                                   true));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(cellStateInDimensions.data(),
+                                                                           cellStateInDimensions.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("cellStateInInfo"),
+                                   outputQuantizationParameters,
+                                   true));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasInputLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                                              reinterpret_cast<const uint8_t *>(inputLayerNormWeights.data()),
+                                              sizeof(T) * inputLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputLayerNormWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasForgetLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                                              reinterpret_cast<const uint8_t *>(forgetLayerNormWeights.data()),
+                                              sizeof(T) * forgetLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("forgetLayerNormWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(cellLayerNormWeights.data()),
+                                                        sizeof(T) * cellLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellLayerNormWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasOutputLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                             reinterpret_cast<const uint8_t *>(outputLayerNormWeights.data()),
+                             sizeof(T) * outputLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfo4.data(),
+                                                                               tensorInfo4.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("outputLayerNormWeights"),
+                                       outputQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+    int outputBufferId = buffers.size();
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(outputShape.data(),
+                                                                           outputShape.size()),
+                                   tensorType,
+                                   outputBufferId,
+                                   flatBufferBuilder.CreateString("output"),
+                                   outputQuantizationParameters));
+    std::vector<int> operatorOutputs;
+    operatorOutputs.push_back(buffers.size() - 1);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_LSTMOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateLSTMOptions(flatBufferBuilder,
+                          activationFunction,
+                          clippingThresCell,
+                          clippingThresProj).Union();
+
+    flatbuffers::Offset <Operator> lstmOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType, operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       flatBufferBuilder.CreateVector(&lstmOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: LSTM Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         tflite::BuiltinOperator_LSTM);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void LstmTestImpl(std::vector<armnn::BackendId>& backends,
+                  tflite::TensorType tensorType,
+                  int32_t batchSize,
+                  int32_t inputSize,
+                  int32_t outputSize,
+                  int32_t numUnits,
+                  bool hasInputToInputWeights,
+                  const std::vector<T>& inputToInputWeights,
+                  const std::vector<T>& inputToForgetWeights,
+                  const std::vector<T>& inputToCellWeights,
+                  const std::vector<T>& inputToOutputWeights,
+                  bool hasRecurrentToInputWeights,
+                  const std::vector<T>& recurrentToInputWeights,
+                  const std::vector<T>& recurrentToForgetWeights,
+                  const std::vector<T>& recurrentToCellWeights,
+                  const std::vector<T>& recurrentToOutputWeights,
+                  bool hasCellToInputWeights,
+                  const std::vector<T>& cellToInputWeights,
+                  bool hasCellToForgetWeights,
+                  const std::vector<T>& cellToForgetWeights,
+                  bool hasCellToOutputWeights,
+                  const std::vector<T>& cellToOutputWeights,
+                  bool hasInputGateBias,
+                  const std::vector<T>& inputGateBias,
+                  const std::vector<T>& forgetGateBias,
+                  const std::vector<T>& cellBias,
+                  const std::vector<T>& outputGateBias,
+                  bool hasProjectionWeights,
+                  const std::vector<T>& projectionWeights,
+                  bool hasProjectionBias,
+                  const std::vector<T>& projectionBias,
+                  bool hasInputLayerNormWeights,
+                  const std::vector<T>& inputLayerNormWeights,
+                  bool hasForgetLayerNormWeights,
+                  const std::vector<T>& forgetLayerNormWeights,
+                  bool hasCellLayerNormWeights,
+                  const std::vector<T>& cellLayerNormWeights,
+                  bool hasOutputLayerNormWeights,
+                  const std::vector<T>& outputLayerNormWeights,
+                  std::vector<T>& inputValues,
+                  std::vector<T>& expectedOutputValues,
+                  tflite::ActivationFunctionType activationFunction,
+                  float clippingThresCell,
+                  float clippingThresProj)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer = CreateLstmTfLiteModel(tensorType,
+                                                          batchSize,
+                                                          inputSize,
+                                                          outputSize,
+                                                          numUnits,
+                                                          hasInputToInputWeights,
+                                                          inputToInputWeights,
+                                                          inputToForgetWeights,
+                                                          inputToCellWeights,
+                                                          inputToOutputWeights,
+                                                          hasRecurrentToInputWeights,
+                                                          recurrentToInputWeights,
+                                                          recurrentToForgetWeights,
+                                                          recurrentToCellWeights,
+                                                          recurrentToOutputWeights,
+                                                          hasCellToInputWeights,
+                                                          cellToInputWeights,
+                                                          hasCellToForgetWeights,
+                                                          cellToForgetWeights,
+                                                          hasCellToOutputWeights,
+                                                          cellToOutputWeights,
+                                                          hasInputGateBias,
+                                                          inputGateBias,
+                                                          forgetGateBias,
+                                                          cellBias,
+                                                          outputGateBias,
+                                                          hasProjectionWeights,
+                                                          projectionWeights,
+                                                          hasProjectionBias,
+                                                          projectionBias,
+                                                          hasInputLayerNormWeights,
+                                                          inputLayerNormWeights,
+                                                          hasForgetLayerNormWeights,
+                                                          forgetLayerNormWeights,
+                                                          hasCellLayerNormWeights,
+                                                          cellLayerNormWeights,
+                                                          hasOutputLayerNormWeights,
+                                                          outputLayerNormWeights,
+                                                          activationFunction,
+                                                          clippingThresCell,
+                                                          clippingThresProj);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+
+    armnnDelegate::CompareData(expectedOutputValues.data(), armnnDelegateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(expectedOutputValues.data(), tfLiteDelagateOutputData, expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelagateOutputData, armnnDelegateOutputData, expectedOutputValues.size());
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/MirrorPadTest.cpp b/arch/arm/ARMnn/delegate/src/test/MirrorPadTest.cpp
new file mode 100644
index 0000000000..ca66181a30
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/MirrorPadTest.cpp
@@ -0,0 +1,341 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PadTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void MirrorPadSymmetric2dTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 3, 3 };
+    std::vector<int32_t> outputShape { 7, 7 };
+    std::vector<int32_t> paddingShape { 2, 2 };
+
+    std::vector<float> inputValues =
+    {
+        1.0f, 2.0f, 3.0f,
+        4.0f, 5.0f, 6.0f,
+        7.0f, 8.0f, 9.0f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        5.0f, 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f,
+        2.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f,
+        2.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f,
+        5.0f, 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f,
+        8.0f, 7.0f, 7.0f, 8.0f, 9.0f, 9.0f, 8.0f,
+        8.0f, 7.0f, 7.0f, 8.0f, 9.0f, 9.0f, 8.0f,
+        5.0f, 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f
+    };
+
+    std::vector<int32_t> paddingDim = { 2, 2, 2, 2 };
+
+    PadTest<float>(tflite::BuiltinOperator_MIRROR_PAD,
+                   ::tflite::TensorType_FLOAT32,
+                   backends,
+                   inputShape,
+                   paddingShape,
+                   outputShape,
+                   inputValues,
+                   paddingDim,
+                   expectedOutputValues,
+                   0,    // Padding value - Not used in these tests.
+                   1.0f, // Scale
+                   0,    // Offset
+                   tflite::MirrorPadMode_SYMMETRIC);
+}
+
+void MirrorPadReflect2dTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 3, 3 };
+    std::vector<int32_t> outputShape { 7, 7 };
+    std::vector<int32_t> paddingShape { 2, 2 };
+
+    std::vector<float> inputValues =
+    {
+        1.0f, 2.0f, 3.0f,
+        4.0f, 5.0f, 6.0f,
+        7.0f, 8.0f, 9.0f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        9.0f, 8.0f, 7.0f, 8.0f, 9.0f, 8.0f, 7.0f,
+        6.0f, 5.0f, 4.0f, 5.0f, 6.0f, 5.0f, 4.0f,
+        3.0f, 2.0f, 1.0f, 2.0f, 3.0f, 2.0f, 1.0f,
+        6.0f, 5.0f, 4.0f, 5.0f, 6.0f, 5.0f, 4.0f,
+        9.0f, 8.0f, 7.0f, 8.0f, 9.0f, 8.0f, 7.0f,
+        6.0f, 5.0f, 4.0f, 5.0f, 6.0f, 5.0f, 4.0f,
+        3.0f, 2.0f, 1.0f, 2.0f, 3.0f, 2.0f, 1.0f
+    };
+
+    std::vector<int32_t> paddingDim = { 2, 2, 2, 2 };
+
+    PadTest<float>(tflite::BuiltinOperator_MIRROR_PAD,
+                   ::tflite::TensorType_FLOAT32,
+                   backends,
+                   inputShape,
+                   paddingShape,
+                   outputShape,
+                   inputValues,
+                   paddingDim,
+                   expectedOutputValues,
+                   0,    // Padding value - Not used in these tests.
+                   1.0f, // Scale
+                   0,    // Offset
+                   tflite::MirrorPadMode_REFLECT);
+}
+
+void MirrorPadSymmetric3dTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 2, 2, 2 };
+    std::vector<int32_t> outputShape { 4, 4, 4 };
+    std::vector<int32_t> paddingShape { 3, 2 };
+
+    std::vector<float> inputValues =
+    {
+        // Channel 0, Height (2) x Width (2)
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+
+        // Channel 1, Height (2) x Width (2)
+        5.0f, 6.0f,
+        7.0f, 8.0f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        1.0f, 1.0f, 2.0f, 2.0f,
+        1.0f, 1.0f, 2.0f, 2.0f,
+        3.0f, 3.0f, 4.0f, 4.0f,
+        3.0f, 3.0f, 4.0f, 4.0f,
+
+        1.0f, 1.0f, 2.0f, 2.0f,
+        1.0f, 1.0f, 2.0f, 2.0f,
+        3.0f, 3.0f, 4.0f, 4.0f,
+        3.0f, 3.0f, 4.0f, 4.0f,
+
+        5.0f, 5.0f, 6.0f, 6.0f,
+        5.0f, 5.0f, 6.0f, 6.0f,
+        7.0f, 7.0f, 8.0f, 8.0f,
+        7.0f, 7.0f, 8.0f, 8.0f,
+
+        5.0f, 5.0f, 6.0f, 6.0f,
+        5.0f, 5.0f, 6.0f, 6.0f,
+        7.0f, 7.0f, 8.0f, 8.0f,
+        7.0f, 7.0f, 8.0f, 8.0f
+    };
+
+    std::vector<int32_t> paddingDim = { 1, 1, 1, 1, 1, 1 };
+
+    PadTest<float>(tflite::BuiltinOperator_MIRROR_PAD,
+                   ::tflite::TensorType_FLOAT32,
+                   backends,
+                   inputShape,
+                   paddingShape,
+                   outputShape,
+                   inputValues,
+                   paddingDim,
+                   expectedOutputValues,
+                   0,    // Padding value - Not used in these tests.
+                   1.0f, // Scale
+                   0,    // Offset
+                   tflite::MirrorPadMode_SYMMETRIC);
+}
+
+void MirrorPadReflect3dTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 2, 2, 2 };
+    std::vector<int32_t> outputShape { 4, 4, 4 };
+    std::vector<int32_t> paddingShape { 3, 2 };
+
+    std::vector<float> inputValues =
+    {
+        // Channel 0, Height (2) x Width (2)
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+
+        // Channel 1, Height (2) x Width (2)
+        5.0f, 6.0f,
+        7.0f, 8.0f
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        8.0f, 7.0f, 8.0f, 7.0f,
+        6.0f, 5.0f, 6.0f, 5.0f,
+        8.0f, 7.0f, 8.0f, 7.0f,
+        6.0f, 5.0f, 6.0f, 5.0f,
+
+        4.0f, 3.0f, 4.0f, 3.0f,
+        2.0f, 1.0f, 2.0f, 1.0f,
+        4.0f, 3.0f, 4.0f, 3.0f,
+        2.0f, 1.0f, 2.0f, 1.0f,
+
+        8.0f, 7.0f, 8.0f, 7.0f,
+        6.0f, 5.0f, 6.0f, 5.0f,
+        8.0f, 7.0f, 8.0f, 7.0f,
+        6.0f, 5.0f, 6.0f, 5.0f,
+
+        4.0f, 3.0f, 4.0f, 3.0f,
+        2.0f, 1.0f, 2.0f, 1.0f,
+        4.0f, 3.0f, 4.0f, 3.0f,
+        2.0f, 1.0f, 2.0f, 1.0f
+    };
+
+    std::vector<int32_t> paddingDim = { 1, 1, 1, 1, 1, 1 };
+
+    PadTest<float>(tflite::BuiltinOperator_MIRROR_PAD,
+                   ::tflite::TensorType_FLOAT32,
+                   backends,
+                   inputShape,
+                   paddingShape,
+                   outputShape,
+                   inputValues,
+                   paddingDim,
+                   expectedOutputValues,
+                   0,    // Padding value - Not used in these tests.
+                   1.0f, // Scale
+                   0,    // Offset
+                   tflite::MirrorPadMode_REFLECT);
+}
+
+void MirrorPadSymmetricUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 3, 3 };
+    std::vector<int32_t> outputShape { 5, 7 };
+    std::vector<int32_t> paddingShape { 2, 2 };
+
+    std::vector<uint8_t> inputValues =
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9
+    };
+
+    std::vector<uint8_t> expectedOutputValues =
+    {
+        2, 1, 1, 2, 3, 3, 2,
+        2, 1, 1, 2, 3, 3, 2,
+        5, 4, 4, 5, 6, 6, 5,
+        8, 7, 7, 8, 9, 9, 8,
+        8, 7, 7, 8, 9, 9, 8,
+    };
+
+    std::vector<int32_t> paddingDim = { 1, 1, 2, 2 };
+
+    PadTest<uint8_t>(tflite::BuiltinOperator_MIRROR_PAD,
+                     ::tflite::TensorType_UINT8,
+                     backends,
+                     inputShape,
+                     paddingShape,
+                     outputShape,
+                     inputValues,
+                     paddingDim,
+                     expectedOutputValues,
+                     0,    // Padding value - Not used in these tests.
+                     1.0f, // Scale
+                     1,    // Offset
+                     tflite::MirrorPadMode_SYMMETRIC);
+}
+
+void MirrorPadReflectInt8Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 3, 3 };
+    std::vector<int32_t> outputShape { 7, 5 };
+    std::vector<int32_t> paddingShape { 2, 2 };
+
+    std::vector<int8_t> inputValues =
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9
+    };
+
+    std::vector<int8_t> expectedOutputValues =
+    {
+        8, 7, 8, 9, 8,
+        5, 4, 5, 6, 5,
+        2, 1, 2, 3, 2,
+        5, 4, 5, 6, 5,
+        8, 7, 8, 9, 8,
+        5, 4, 5, 6, 5,
+        2, 1, 2, 3, 2
+    };
+
+    std::vector<int32_t> paddingDim = { 2, 2, 1, 1 };
+
+    PadTest<int8_t>(tflite::BuiltinOperator_MIRROR_PAD,
+                    ::tflite::TensorType_INT8,
+                    backends,
+                    inputShape,
+                    paddingShape,
+                    outputShape,
+                    inputValues,
+                    paddingDim,
+                    expectedOutputValues,
+                    0,    // Padding value - Not used in these tests.
+                    1.0f, // Scale
+                    1,    // Offset
+                    tflite::MirrorPadMode_REFLECT);
+}
+
+TEST_SUITE("MirrorPad_CpuRefTests")
+{
+
+TEST_CASE ("MirrorPadSymmetric2d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MirrorPadSymmetric2dTest(backends);
+}
+
+TEST_CASE ("MirrorPadReflect2d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MirrorPadReflect2dTest(backends);
+}
+
+TEST_CASE ("MirrorPadSymmetric3d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MirrorPadSymmetric3dTest(backends);
+}
+
+TEST_CASE ("MirrorPadReflect3d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MirrorPadReflect3dTest(backends);
+}
+
+TEST_CASE ("MirrorPadSymmetricUint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MirrorPadSymmetricUint8Test(backends);
+}
+
+TEST_CASE ("MirrorPadSymmetricInt8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MirrorPadReflectInt8Test(backends);
+}
+
+} // TEST_SUITE("MirrorPad_CpuRefTests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/NeonDelegateTests_NDK_Issue.cpp b/arch/arm/ARMnn/delegate/src/test/NeonDelegateTests_NDK_Issue.cpp
new file mode 100644
index 0000000000..a437a08a49
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/NeonDelegateTests_NDK_Issue.cpp
@@ -0,0 +1,63 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NormalizationTestHelper.hpp"
+#include "SoftmaxTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+// There's a known Android NDK bug which causes this subset of Neon Tests to
+// fail. We'll exclude these tests in if we're doing
+// a debug build and NDK is less than r21.
+// The exclusion takes place in test/CMakeLists.txt
+// https://github.com/android/ndk/issues/1135
+
+TEST_SUITE ("Softmax_CpuAccTests")
+{
+
+TEST_CASE ("Softmax_Standard_Beta_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    std::vector<float> expectedOutput = {0.00994190481, 0.0445565246, 0.0734612942, 0.329230666, 0.542809606,
+                                         0.710742831, 0.158588171, 0.0961885825, 0.0214625746, 0.0130177103};
+    SoftmaxTestCase(tflite::BuiltinOperator_SOFTMAX, backends, 1, expectedOutput);
+}
+
+TEST_CASE ("Softmax_Different_Beta_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    std::vector<float> expectedOutput = {
+        0.0946234912, 0.148399189, 0.172415257, 0.270400971, 0.314161092,
+        0.352414012, 0.224709094, 0.193408906, 0.123322964, 0.106145054};
+    SoftmaxTestCase(tflite::BuiltinOperator_SOFTMAX, backends, 0.3, expectedOutput);
+}
+
+TEST_CASE ("Log_Softmax_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    std::vector<float> expectedOutput =
+        {-4.61099672, -3.11099672, -2.61099672, -1.11099672, -0.610996664,
+         -0.341444582, -1.84144461, -2.34144449, -3.84144449, -4.34144449};
+    SoftmaxTestCase(tflite::BuiltinOperator_LOG_SOFTMAX, backends, 0, expectedOutput);
+}
+} // TEST_SUITE ("Softmax_CpuAccTests")
+
+TEST_SUITE("L2Normalization_CpuAccTests")
+{
+
+TEST_CASE ("L2NormalizationFp32Test_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    L2NormalizationTest(backends);
+}
+} // TEST_SUITE("L2NormalizationFp32Test_CpuAcc_Test")
+}
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/NormalizationTest.cpp b/arch/arm/ARMnn/delegate/src/test/NormalizationTest.cpp
new file mode 100644
index 0000000000..e33dcf056e
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/NormalizationTest.cpp
@@ -0,0 +1,72 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NormalizationTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+TEST_SUITE("L2Normalization_CpuRefTests")
+{
+
+TEST_CASE ("L2NormalizationFp32Test_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    L2NormalizationTest(backends);
+}
+
+} // TEST_SUITE("L2Normalization_CpuRefTests")
+
+TEST_SUITE("L2Normalization_GpuAccTests")
+{
+
+TEST_CASE ("L2NormalizationFp32Test_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    L2NormalizationTest(backends);
+}
+
+} // TEST_SUITE("L2Normalization_GpuAccTests")
+
+TEST_SUITE("LocalResponseNormalization_CpuRefTests")
+{
+
+TEST_CASE ("LocalResponseNormalizationTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    LocalResponseNormalizationTest(backends, 3, 1.f, 1.f, 1.f);
+}
+
+} // TEST_SUITE("LocalResponseNormalization_CpuRefTests")
+
+TEST_SUITE("LocalResponseNormalization_CpuAccTests")
+{
+
+TEST_CASE ("LocalResponseNormalizationTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    LocalResponseNormalizationTest(backends, 3, 1.f, 1.f, 1.f);
+}
+
+} // TEST_SUITE("LocalResponseNormalization_CpuAccTests")
+
+TEST_SUITE("LocalResponseNormalization_GpuAccTests")
+{
+
+TEST_CASE ("LocalResponseNormalizationTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    LocalResponseNormalizationTest(backends, 3, 1.f, 1.f, 1.f);
+}
+
+} // TEST_SUITE("LocalResponseNormalization_GpuAccTests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/NormalizationTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/NormalizationTestHelper.hpp
new file mode 100644
index 0000000000..ebdfdc1a25
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/NormalizationTestHelper.hpp
@@ -0,0 +1,262 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateNormalizationTfLiteModel(tflite::BuiltinOperator normalizationOperatorCode,
+                                                 tflite::TensorType tensorType,
+                                                 const std::vector<int32_t>& inputTensorShape,
+                                                 const std::vector<int32_t>& outputTensorShape,
+                                                 int32_t radius,
+                                                 float bias,
+                                                 float alpha,
+                                                 float beta,
+                                                 float quantScale = 1.0f,
+                                                 int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    auto inputTensor = CreateTensor(flatBufferBuilder,
+                                    flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                            inputTensorShape.size()),
+                                    tensorType,
+                                    0,
+                                    flatBufferBuilder.CreateString("input"),
+                                    quantizationParameters);
+
+    auto outputTensor = CreateTensor(flatBufferBuilder,
+                                     flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                             outputTensorShape.size()),
+                                     tensorType,
+                                     1,
+                                     flatBufferBuilder.CreateString("output"),
+                                     quantizationParameters);
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors = { inputTensor, outputTensor };
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    std::vector<int32_t> operatorInputs = { 0 };
+    std::vector<int> subgraphInputs = { 0 };
+
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_L2NormOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateL2NormOptions(flatBufferBuilder,
+                                                                           tflite::ActivationFunctionType_NONE).Union();
+
+    if (normalizationOperatorCode == tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION)
+    {
+        operatorBuiltinOptionsType = BuiltinOptions_LocalResponseNormalizationOptions;
+        operatorBuiltinOptions =
+            CreateLocalResponseNormalizationOptions(flatBufferBuilder, radius, bias, alpha, beta).Union();
+    }
+
+    // create operator
+    const std::vector<int32_t> operatorOutputs{ 1 };
+    flatbuffers::Offset <Operator> normalizationOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphOutputs{ 1 };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&normalizationOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Normalization Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         normalizationOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void NormalizationTest(tflite::BuiltinOperator normalizationOperatorCode,
+                       tflite::TensorType tensorType,
+                       const std::vector<armnn::BackendId>& backends,
+                       const std::vector<int32_t>& inputShape,
+                       std::vector<int32_t>& outputShape,
+                       std::vector<T>& inputValues,
+                       std::vector<T>& expectedOutputValues,
+                       int32_t radius = 0,
+                       float bias = 0.f,
+                       float alpha = 0.f,
+                       float beta = 0.f,
+                       float quantScale = 1.0f,
+                       int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateNormalizationTfLiteModel(normalizationOperatorCode,
+                                                                   tensorType,
+                                                                   inputShape,
+                                                                   outputShape,
+                                                                   radius,
+                                                                   bias,
+                                                                   alpha,
+                                                                   beta,
+                                                                   quantScale,
+                                                                   quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    CHECK(tfLiteModel != nullptr);
+
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData(tfLiteInterpreter, armnnDelegateInterpreter, outputShape, expectedOutputValues);
+}
+
+void L2NormalizationTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape  { 1, 1, 1, 10 };
+    std::vector<int32_t> outputShape { 1, 1, 1, 10 };
+
+    std::vector<float> inputValues
+    {
+        1.0f,
+        2.0f,
+        3.0f,
+        4.0f,
+        5.0f,
+        6.0f,
+        7.0f,
+        8.0f,
+        9.0f,
+        10.0f
+    };
+
+    const float approxInvL2Norm = 0.050964719f;
+    std::vector<float> expectedOutputValues
+    {
+        1.0f  * approxInvL2Norm,
+        2.0f  * approxInvL2Norm,
+        3.0f  * approxInvL2Norm,
+        4.0f  * approxInvL2Norm,
+        5.0f  * approxInvL2Norm,
+        6.0f  * approxInvL2Norm,
+        7.0f  * approxInvL2Norm,
+        8.0f  * approxInvL2Norm,
+        9.0f  * approxInvL2Norm,
+        10.0f * approxInvL2Norm
+    };
+
+    NormalizationTest<float>(tflite::BuiltinOperator_L2_NORMALIZATION,
+                             ::tflite::TensorType_FLOAT32,
+                             backends,
+                             inputShape,
+                             outputShape,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+void LocalResponseNormalizationTest(std::vector<armnn::BackendId>& backends,
+                                    int32_t radius,
+                                    float bias,
+                                    float alpha,
+                                    float beta)
+{
+    // Set input data
+    std::vector<int32_t> inputShape  { 2, 2, 2, 1 };
+    std::vector<int32_t> outputShape { 2, 2, 2, 1 };
+
+    std::vector<float> inputValues
+    {
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        5.0f, 6.0f,
+        7.0f, 8.0f
+    };
+
+    std::vector<float> expectedOutputValues
+    {
+        0.5f, 0.400000006f, 0.300000012f, 0.235294119f,
+        0.192307696f, 0.16216217f, 0.140000001f, 0.123076923f
+    };
+
+    NormalizationTest<float>(tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
+                             ::tflite::TensorType_FLOAT32,
+                             backends,
+                             inputShape,
+                             outputShape,
+                             inputValues,
+                             expectedOutputValues,
+                             radius,
+                             bias,
+                             alpha,
+                             beta);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/PackTest.cpp b/arch/arm/ARMnn/delegate/src/test/PackTest.cpp
new file mode 100644
index 0000000000..aea903bcd0
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/PackTest.cpp
@@ -0,0 +1,516 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PackTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+template <typename T>
+void PackFp32Axis0Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 2, 3, 2, 3 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> expectedOutputValues =
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18,
+
+
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tensorType,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                0);
+}
+
+template <typename T>
+void PackFp32Axis1Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 3, 2, 2, 3 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> expectedOutputValues =
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        19, 20, 21,
+        22, 23, 24,
+
+
+        7, 8, 9,
+        10, 11, 12,
+
+        25, 26, 27,
+        28, 29, 30,
+
+
+        13, 14, 15,
+        16, 17, 18,
+
+        31, 32, 33,
+        34, 35, 36
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tensorType,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                1);
+}
+
+template <typename T>
+void PackFp32Axis2Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 3, 2, 2, 3 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<float> expectedOutputValues =
+    {
+        1, 2, 3,
+        19, 20, 21,
+
+        4, 5, 6,
+        22, 23, 24,
+
+        7, 8, 9,
+        25, 26, 27,
+
+        10, 11, 12,
+        28, 29, 30,
+
+        13, 14, 15,
+        31, 32, 33,
+
+        16, 17, 18,
+        34, 35, 36
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tensorType,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                2);
+}
+
+template <typename T>
+void PackFp32Axis3Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 3, 2, 3, 2 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> expectedOutputValues =
+    {
+        1, 19,
+        2, 20,
+        3, 21,
+
+        4, 22,
+        5, 23,
+        6, 24,
+
+
+        7, 25,
+        8, 26,
+        9, 27,
+
+        10, 28,
+        11, 29,
+        12, 30,
+
+
+        13, 31,
+        14, 32,
+        15, 33,
+
+        16, 34,
+        17, 35,
+        18, 36
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tflite::TensorType_FLOAT32,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                3);
+}
+
+template <typename T>
+void PackFp32Inputs3Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 3 };
+    std::vector<int32_t> expectedOutputShape { 3, 3, 3 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9
+    });
+
+    inputValues.push_back(
+    {
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27
+    });
+
+    std::vector<T> expectedOutputValues =
+    {
+        1, 2, 3,
+        10, 11, 12,
+        19, 20, 21,
+
+        4, 5, 6,
+        13, 14, 15,
+        22, 23, 24,
+
+        7, 8, 9,
+        16, 17, 18,
+        25, 26, 27
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tensorType,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                1);
+}
+
+TEST_SUITE("Pack_CpuAccTests")
+{
+
+// Fp32
+TEST_CASE ("Pack_Fp32_Axis0_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis0Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis1_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis1Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis2_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis2Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis3_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Inputs3_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Inputs3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Pack_Uint8_Axis0_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis0Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Pack_Uint8_Inputs3_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Inputs3Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+// Uint8
+TEST_CASE ("Pack_Int8_Axis0_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis0Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+TEST_CASE ("Pack_Int8_Inputs3_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Inputs3Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+}
+
+TEST_SUITE("Pack_GpuAccTests")
+{
+
+// Fp32
+TEST_CASE ("Pack_Fp32_Axis0_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis0Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis1_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis1Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis2_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis2Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis3_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Inputs3_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Inputs3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Pack_Uint8_Axis0_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis0Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Pack_Uint8_Inputs3_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Inputs3Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+// Int8
+TEST_CASE ("Pack_Int8_Axis0_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis0Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+TEST_CASE ("Pack_Int8_Inputs3_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Inputs3Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+}
+
+TEST_SUITE("Pack_CpuRefTests")
+{
+
+// Fp32
+TEST_CASE ("Pack_Fp32_Axis0_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis0Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis1_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis1Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis2_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis2Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis3_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Inputs3_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Inputs3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Pack_Uint8_Axis0_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis0Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Pack_Uint8_Inputs3_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Inputs3Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+// Int8
+TEST_CASE ("Pack_Int8_Axis0_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis0Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+TEST_CASE ("Pack_Int8_Inputs3_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Inputs3Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/PackTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/PackTestHelper.hpp
new file mode 100644
index 0000000000..0869228326
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/PackTestHelper.hpp
@@ -0,0 +1,185 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreatePackTfLiteModel(tflite::BuiltinOperator packOperatorCode,
+                                        tflite::TensorType tensorType,
+                                        std::vector<int32_t>& inputTensorShape,
+                                        const std::vector <int32_t>& outputTensorShape,
+                                        const int32_t inputTensorNum,
+                                        unsigned int axis = 0,
+                                        float quantScale = 1.0f,
+                                        int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::vector<int32_t> operatorInputs{};
+    const std::vector<int32_t> operatorOutputs{inputTensorNum};
+    std::vector<int> subgraphInputs{};
+    const std::vector<int> subgraphOutputs{inputTensorNum};
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors(inputTensorNum + 1);
+    for (int i = 0; i < inputTensorNum; ++i)
+    {
+        tensors[i] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                          inputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("input" + std::to_string(i)),
+                                  quantizationParameters);
+
+        // Add number of inputs to vector.
+        operatorInputs.push_back(i);
+        subgraphInputs.push_back(i);
+    }
+
+    // Create output tensor
+    tensors[inputTensorNum] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_PackOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+            CreatePackOptions(flatBufferBuilder, inputTensorNum, axis).Union();
+
+    flatbuffers::Offset <Operator> packOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&packOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Pack Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, packOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void PackTest(tflite::BuiltinOperator packOperatorCode,
+              tflite::TensorType tensorType,
+              std::vector<armnn::BackendId>& backends,
+              std::vector<int32_t>& inputShape,
+              std::vector<int32_t>& expectedOutputShape,
+              std::vector<std::vector<T>>& inputValues,
+              std::vector<T>& expectedOutputValues,
+              unsigned int axis = 0,
+              float quantScale = 1.0f,
+              int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreatePackTfLiteModel(packOperatorCode,
+                                                          tensorType,
+                                                          inputShape,
+                                                          expectedOutputShape,
+                                                          inputValues.size(),
+                                                          axis,
+                                                          quantScale,
+                                                          quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data for all input tensors.
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        // Get single input tensor and assign to interpreters.
+        auto inputTensorValues = inputValues[i];
+        armnnDelegate::FillInput<T>(tfLiteInterpreter, i, inputTensorValues);
+        armnnDelegate::FillInput<T>(armnnDelegateInterpreter, i, inputTensorValues);
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/PadTest.cpp b/arch/arm/ARMnn/delegate/src/test/PadTest.cpp
new file mode 100644
index 0000000000..4721b685cc
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/PadTest.cpp
@@ -0,0 +1,606 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PadTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void Pad2dTest(std::vector<armnn::BackendId>& backends,
+               tflite::BuiltinOperator padOperatorCode = tflite::BuiltinOperator_PAD,
+               float pad = 0.0f)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 2, 2, 2 };
+    std::vector<int32_t> outputShape { 3, 5, 6 };
+    std::vector<int32_t> paddingShape { 3, 2 };
+
+    std::vector<float> inputValues = { 0.0f,  4.0f,
+                                       2.0f, -5.0f,
+                                       6.0f,  1.0f,
+                                       5.0f, -2.0f };
+
+    std::vector<float> expectedOutputValues = { pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, 0.0f, 4.0f, pad, pad,
+                                                pad, pad, 2.0f, -5.0f, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, 6.0f, 1.0f, pad, pad,
+                                                pad, pad, 5.0f, -2.0f, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad };
+
+    std::vector<int32_t> paddingDim = { 0, 1, 2, 1, 2, 2 };
+
+    PadTest<float>(padOperatorCode,
+                   ::tflite::TensorType_FLOAT32,
+                   backends,
+                   inputShape,
+                   paddingShape,
+                   outputShape,
+                   inputValues,
+                   paddingDim,
+                   expectedOutputValues,
+                   pad);
+}
+
+void Pad3dTest(std::vector<armnn::BackendId>& backends,
+               tflite::BuiltinOperator padOperatorCode = tflite::BuiltinOperator_PAD,
+               float pad = 0.0f)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 2, 2, 2 };
+    std::vector<int32_t> outputShape { 3, 5, 6 };
+    std::vector<int32_t> paddingShape { 3, 2 };
+
+    std::vector<float> inputValues = { 0.0f, 4.0f,
+                                       2.0f, 5.0f,
+                                       6.0f, 1.0f,
+                                       5.0f, 2.0f };
+
+    std::vector<float> expectedOutputValues = { pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, 0.0f, 4.0f, pad, pad,
+                                                pad, pad, 2.0f, 5.0f, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, 6.0f, 1.0f, pad, pad,
+                                                pad, pad, 5.0f, 2.0f, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad,
+                                                pad, pad, pad, pad, pad, pad };
+
+    std::vector<int32_t> paddingDim = { 0, 1, 2, 1, 2, 2 };
+
+    PadTest<float>(padOperatorCode,
+                   ::tflite::TensorType_FLOAT32,
+                   backends,
+                   inputShape,
+                   paddingShape,
+                   outputShape,
+                   inputValues,
+                   paddingDim,
+                   expectedOutputValues,
+                   pad);
+}
+
+void Pad4dTest(std::vector<armnn::BackendId>& backends,
+               tflite::BuiltinOperator padOperatorCode = tflite::BuiltinOperator_PAD,
+               float pad = 0.0f)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 2, 2, 3, 2 };
+    std::vector<int32_t> outputShape { 4, 5, 7, 4 };
+    std::vector<int32_t> paddingShape { 4, 2 };
+
+    std::vector<float> inputValues = { 0.0f,  1.0f,
+                                       2.0f,  3.0f,
+                                       4.0f,  5.0f,
+
+                                       6.0f,  7.0f,
+                                       8.0f,  9.0f,
+                                       10.0f, 11.0f,
+
+                                       12.0f, 13.0f,
+                                       14.0f, 15.0f,
+                                       16.0f, 17.0f,
+
+                                       18.0f, 19.0f,
+                                       20.0f, 21.0f,
+                                       22.0f, 23.0f };
+
+    std::vector<float> expectedOutputValues = { pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, 0.0f, 1.0f, pad,
+                                                pad, 2.0f, 3.0f, pad,
+                                                pad, 4.0f, 5.0f, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, 6.0f, 7.0f, pad,
+                                                pad, 8.0f, 9.0f, pad,
+                                                pad, 10.0f, 11.0f, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, 12.0f, 13.0f, pad,
+                                                pad, 14.0f, 15.0f, pad,
+                                                pad, 16.0f, 17.0f, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, 18.0f, 19.0f, pad,
+                                                pad, 20.0f, 21.0f, pad,
+                                                pad, 22.0f, 23.0f, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad,
+                                                pad, pad, pad, pad };
+
+    std::vector<int32_t> paddingDim = { 1, 1, 2, 1, 3, 1, 1, 1 };
+
+    PadTest<float>(padOperatorCode,
+                   ::tflite::TensorType_FLOAT32,
+                   backends,
+                   inputShape,
+                   paddingShape,
+                   outputShape,
+                   inputValues,
+                   paddingDim,
+                   expectedOutputValues,
+                   pad);
+}
+
+void PadInt8Test(std::vector<armnn::BackendId>& backends,
+                 tflite::BuiltinOperator padOperatorCode = tflite::BuiltinOperator_PAD,
+                 int8_t paddingValue = 0,
+                 int8_t p = 3,
+                 float quantizationScale = -2.0f,
+                 int32_t quantizationOffset = 3)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 2, 2, 2 };
+    std::vector<int32_t> outputShape { 3, 5, 6 };
+    std::vector<int32_t> paddingShape { 3, 2 };
+
+    std::vector<int8_t> inputValues = { 0,  4,
+                                        2, -5,
+                                        6,  1,
+                                        5, -2 };
+
+    std::vector<int8_t> expectedOutputValues = { p, p, p, p, p, p,
+                                                 p, p, p, p, p, p,
+                                                 p, p, 0, 4, p, p,
+                                                 p, p, 2, -5, p, p,
+                                                 p, p, p, p, p, p,
+
+                                                 p, p, p, p, p, p,
+                                                 p, p, p, p, p, p,
+                                                 p, p, 6, 1, p, p,
+                                                 p, p, 5, -2, p, p,
+                                                 p, p, p, p, p, p,
+
+                                                 p, p, p, p, p, p,
+                                                 p, p, p, p, p, p,
+                                                 p, p, p, p, p, p,
+                                                 p, p, p, p, p, p,
+                                                 p, p, p, p, p, p };
+
+    std::vector<int32_t> paddingDim = { 0, 1, 2, 1, 2, 2 };
+
+    PadTest<int8_t>(padOperatorCode,
+                    ::tflite::TensorType_INT8,
+                    backends,
+                    inputShape,
+                    paddingShape,
+                    outputShape,
+                    inputValues,
+                    paddingDim,
+                    expectedOutputValues,
+                    paddingValue,
+                    quantizationScale,
+                    quantizationOffset);
+}
+
+void PadUint8Test(std::vector<armnn::BackendId>& backends,
+                  tflite::BuiltinOperator padOperatorCode = tflite::BuiltinOperator_PAD,
+                  uint8_t paddingValue = 0,
+                  uint8_t p = 3,
+                  float quantizationScale = -2.0f,
+                  int32_t quantizationOffset = 3)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 2, 2, 2 };
+    std::vector<int32_t> outputShape { 3, 5, 6 };
+    std::vector<int32_t> paddingShape { 3, 2 };
+
+    std::vector<uint8_t> inputValues = { 0, 4,
+                                         2, 5,
+                                         6, 1,
+                                         5, 2 };
+
+    std::vector<uint8_t> expectedOutputValues = { p, p, p, p, p, p,
+                                                  p, p, p, p, p, p,
+                                                  p, p, 0, 4, p, p,
+                                                  p, p, 2, 5, p, p,
+                                                  p, p, p, p, p, p,
+
+                                                  p, p, p, p, p, p,
+                                                  p, p, p, p, p, p,
+                                                  p, p, 6, 1, p, p,
+                                                  p, p, 5, 2, p, p,
+                                                  p, p, p, p, p, p,
+
+                                                  p, p, p, p, p, p,
+                                                  p, p, p, p, p, p,
+                                                  p, p, p, p, p, p,
+                                                  p, p, p, p, p, p,
+                                                  p, p, p, p, p, p };
+
+    std::vector<int32_t> paddingDim = { 0, 1, 2, 1, 2, 2 };
+
+    PadTest<uint8_t>(padOperatorCode,
+                     ::tflite::TensorType_UINT8,
+                     backends,
+                     inputShape,
+                     paddingShape,
+                     outputShape,
+                     inputValues,
+                     paddingDim,
+                     expectedOutputValues,
+                     paddingValue,
+                     quantizationScale,
+                     quantizationOffset);
+}
+
+TEST_SUITE("Pad_CpuRefTests")
+{
+
+TEST_CASE ("Pad2d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Pad2dTest(backends);
+}
+
+TEST_CASE ("Pad3d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Pad3dTest(backends);
+}
+
+TEST_CASE ("Pad4d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Pad4dTest(backends);
+}
+
+TEST_CASE ("Pad_Int8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PadInt8Test(backends);
+}
+
+TEST_CASE ("Pad_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PadUint8Test(backends);
+}
+
+TEST_CASE ("PadV22d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Pad2dTest(backends, tflite::BuiltinOperator_PADV2, -2.5);
+}
+
+TEST_CASE ("PadV23d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Pad3dTest(backends, tflite::BuiltinOperator_PADV2, 2.0);
+}
+
+TEST_CASE ("PadV24d_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    Pad4dTest(backends, tflite::BuiltinOperator_PADV2, -1.33);
+}
+
+TEST_CASE ("PadV2_Int8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PadInt8Test(backends, tflite::BuiltinOperator_PADV2, -1, -1);
+}
+
+TEST_CASE ("PadV2_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PadUint8Test(backends, tflite::BuiltinOperator_PADV2, -1, -1);
+}
+
+} // TEST_SUITE("Pad_CpuRefTests")
+
+TEST_SUITE("Pad_CpuAccTests")
+{
+
+TEST_CASE ("Pad2d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Pad2dTest(backends);
+}
+
+TEST_CASE ("Pad3d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Pad3dTest(backends);
+}
+
+TEST_CASE ("Pad4d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Pad4dTest(backends);
+}
+
+TEST_CASE ("Pad_Int8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PadInt8Test(backends);
+}
+
+TEST_CASE ("Pad_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PadUint8Test(backends);
+}
+
+TEST_CASE ("PadV22d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Pad2dTest(backends, tflite::BuiltinOperator_PADV2, -2.5);
+}
+
+TEST_CASE ("PadV23d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Pad3dTest(backends, tflite::BuiltinOperator_PADV2, 2.0);
+}
+
+TEST_CASE ("PadV24d_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    Pad4dTest(backends, tflite::BuiltinOperator_PADV2, -1.33);
+}
+
+TEST_CASE ("PadV2_Int8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PadInt8Test(backends, tflite::BuiltinOperator_PADV2, -1, -1);
+}
+
+TEST_CASE ("PadV2_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PadUint8Test(backends, tflite::BuiltinOperator_PADV2, -1, -1);
+}
+
+} // TEST_SUITE("Pad_CpuAccTests")
+
+TEST_SUITE("Pad_GpuAccTests")
+{
+
+TEST_CASE ("Pad2d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Pad2dTest(backends);
+}
+
+TEST_CASE ("Pad3d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Pad3dTest(backends);
+}
+
+TEST_CASE ("Pad4d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Pad4dTest(backends);
+}
+
+TEST_CASE ("Pad_Int8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PadInt8Test(backends);
+}
+
+TEST_CASE ("Pad_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PadUint8Test(backends);
+}
+
+TEST_CASE ("PadV22d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Pad2dTest(backends, tflite::BuiltinOperator_PADV2, -2.5);
+}
+
+TEST_CASE ("PadV23d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Pad3dTest(backends, tflite::BuiltinOperator_PADV2, 2.0);
+}
+
+TEST_CASE ("PadV24d_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    Pad4dTest(backends, tflite::BuiltinOperator_PADV2, -1.33);
+}
+
+TEST_CASE ("PadV2_Int8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PadInt8Test(backends, tflite::BuiltinOperator_PADV2, -1, -1);
+}
+
+TEST_CASE ("PadV2_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PadUint8Test(backends, tflite::BuiltinOperator_PADV2, -1, -1);
+}
+
+} // TEST_SUITE("Pad_GpuAccTests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/PadTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/PadTestHelper.hpp
new file mode 100644
index 0000000000..5b9a1bcc36
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/PadTestHelper.hpp
@@ -0,0 +1,224 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+template <typename T>
+std::vector<char> CreatePadTfLiteModel(
+    tflite::BuiltinOperator padOperatorCode,
+    tflite::TensorType tensorType,
+    tflite::MirrorPadMode paddingMode,
+    const std::vector<int32_t>& inputTensorShape,
+    const std::vector<int32_t>& paddingTensorShape,
+    const std::vector<int32_t>& outputTensorShape,
+    const std::vector<int32_t>& paddingDim,
+    const std::vector<T> paddingValue,
+    float quantScale = 1.0f,
+    int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    auto inputTensor = CreateTensor(flatBufferBuilder,
+                                    flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                            inputTensorShape.size()),
+                                    tensorType,
+                                    0,
+                                    flatBufferBuilder.CreateString("input"),
+                                    quantizationParameters);
+
+    auto paddingTensor = CreateTensor(flatBufferBuilder,
+                                      flatBufferBuilder.CreateVector<int32_t>(paddingTensorShape.data(),
+                                                                              paddingTensorShape.size()),
+                                      tflite::TensorType_INT32,
+                                      1,
+                                      flatBufferBuilder.CreateString("padding"));
+
+    auto outputTensor = CreateTensor(flatBufferBuilder,
+                                     flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                             outputTensorShape.size()),
+                                     tensorType,
+                                     2,
+                                     flatBufferBuilder.CreateString("output"),
+                                     quantizationParameters);
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors = { inputTensor, paddingTensor, outputTensor};
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(paddingDim.data()),
+                                                    sizeof(int32_t) * paddingDim.size())));
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    std::vector<int32_t> operatorInputs;
+    std::vector<int> subgraphInputs;
+
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_PadOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions;
+
+    if (padOperatorCode == tflite::BuiltinOperator_PAD)
+    {
+        operatorInputs = {{ 0, 1 }};
+        subgraphInputs = {{ 0, 1 }};
+        operatorBuiltinOptions = CreatePadOptions(flatBufferBuilder).Union();
+    }
+    else if(padOperatorCode == tflite::BuiltinOperator_MIRROR_PAD)
+    {
+        operatorInputs = {{ 0, 1 }};
+        subgraphInputs = {{ 0, 1 }};
+
+        operatorBuiltinOptionsType = BuiltinOptions_MirrorPadOptions;
+        operatorBuiltinOptions = CreateMirrorPadOptions(flatBufferBuilder, paddingMode).Union();
+    }
+    else if (padOperatorCode == tflite::BuiltinOperator_PADV2)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(paddingValue.data()),
+                                                        sizeof(T))));
+
+        const std::vector<int32_t> shape = { 1 };
+        auto padValueTensor = CreateTensor(flatBufferBuilder,
+                                           flatBufferBuilder.CreateVector<int32_t>(shape.data(),
+                                                                                   shape.size()),
+                                           tensorType,
+                                           3,
+                                           flatBufferBuilder.CreateString("paddingValue"),
+                                           quantizationParameters);
+
+        tensors.push_back(padValueTensor);
+
+        operatorInputs = {{ 0, 1, 3 }};
+        subgraphInputs = {{ 0, 1, 3 }};
+
+        operatorBuiltinOptionsType = BuiltinOptions_PadV2Options;
+        operatorBuiltinOptions = CreatePadV2Options(flatBufferBuilder).Union();
+    }
+
+    // create operator
+    const std::vector<int32_t> operatorOutputs{ 2 };
+    flatbuffers::Offset <Operator> paddingOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphOutputs{ 2 };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&paddingOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Pad Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         padOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void PadTest(tflite::BuiltinOperator padOperatorCode,
+             tflite::TensorType tensorType,
+             const std::vector<armnn::BackendId>& backends,
+             const std::vector<int32_t>& inputShape,
+             const std::vector<int32_t>& paddingShape,
+             std::vector<int32_t>& outputShape,
+             std::vector<T>& inputValues,
+             std::vector<int32_t>& paddingDim,
+             std::vector<T>& expectedOutputValues,
+             T paddingValue,
+             float quantScale = 1.0f,
+             int quantOffset  = 0,
+             tflite::MirrorPadMode paddingMode = tflite::MirrorPadMode_SYMMETRIC)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreatePadTfLiteModel<T>(padOperatorCode,
+                                                            tensorType,
+                                                            paddingMode,
+                                                            inputShape,
+                                                            paddingShape,
+                                                            outputShape,
+                                                            paddingDim,
+                                                            {paddingValue},
+                                                            quantScale,
+                                                            quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    CHECK(tfLiteModel != nullptr);
+
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter, armnnDelegateInterpreter, outputShape, expectedOutputValues);
+}
+
+} // anonymous namespace
diff --git a/arch/arm/ARMnn/delegate/src/test/Pooling2dTest.cpp b/arch/arm/ARMnn/delegate/src/test/Pooling2dTest.cpp
new file mode 100644
index 0000000000..fd52aee70d
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/Pooling2dTest.cpp
@@ -0,0 +1,1275 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Pooling2dTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void MaxPool2dFP32PaddingValidTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 1, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { 12.0f, 7.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_MAX_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_VALID,
+                         2,
+                         2,
+                         2,
+                         2);
+}
+
+void MaxPool2dInt8PaddingValidTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 1, 2, 1 };
+
+    std::vector<int8_t > inputValues = { -5, 8, -10, 7,
+                                         8, 12, -15, 2,
+                                         3, -4, -1, -11 };
+
+    std::vector<int8_t> expectedOutputValues = { 12, 7 };
+
+    Pooling2dTest<int8_t>(tflite::BuiltinOperator_MAX_POOL_2D,
+                          ::tflite::TensorType_INT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          ::tflite::Padding_VALID,
+                          2,
+                          2,
+                          2,
+                          2,
+                          tflite::ActivationFunctionType_NONE,
+                          2.5f,
+                          1);
+}
+
+void MaxPool2dFP32PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { 12.0f, 7.0f, 3.0f, -1.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_MAX_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_SAME,
+                         2,
+                         2,
+                         2,
+                         2);
+}
+
+void MaxPool2dInt8PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<int8_t> inputValues = { -5, 8, -10, 7,
+                                        8, 12, -15, 2,
+                                        3, -4, -1, -11 };
+
+    std::vector<int8_t> expectedOutputValues = { 12, 7, 3, -1 };
+
+    Pooling2dTest<int8_t>(tflite::BuiltinOperator_MAX_POOL_2D,
+                          ::tflite::TensorType_INT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          ::tflite::Padding_SAME,
+                          2,
+                          2,
+                          2,
+                          2,
+                          tflite::ActivationFunctionType_NONE,
+                          2.5f,
+                          1);
+}
+
+void MaxPool2dFP32ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<float> inputValues = { -5.0f, -8.0f, -10.0f, 7.0f,
+                                       -8.0f, -12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { 0.0f, 0.0f, 7.0f, 3.0f, 0.0f, 2.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_MAX_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_VALID,
+                         1,
+                         1,
+                         2,
+                         2,
+                         ::tflite::ActivationFunctionType_RELU);
+}
+
+void MaxPool2dInt8ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<int8_t> inputValues = { -5, -8, -10, 7,
+                                        -8, -12, -15, 2,
+                                        3, -4, -1, -11 };
+
+    std::vector<int8_t> expectedOutputValues = { 1, 1, 7, 3, 1, 2 };
+
+    Pooling2dTest<int8_t>(tflite::BuiltinOperator_MAX_POOL_2D,
+                          ::tflite::TensorType_INT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          ::tflite::Padding_VALID,
+                          1,
+                          1,
+                          2,
+                          2,
+                          ::tflite::ActivationFunctionType_RELU,
+                          2.0f,
+                          1);
+}
+
+void MaxPool2dFP32Relu6Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, -8.0f, -10.0f, 7.0f,
+                                       -8.0f, -12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { 0.0f, 0.0f, 3.0f, 0.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_MAX_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_SAME,
+                         2,
+                         2,
+                         1,
+                         1,
+                         ::tflite::ActivationFunctionType_RELU6);
+}
+
+void MaxPool2dInt8Relu6Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<int8_t> inputValues = { -5, -8, -10, 7,
+                                        -8, -12, -15, 2,
+                                        3, -4, -1, -11 };
+
+    std::vector<int8_t> expectedOutputValues = { 1, 1, 3, 1 };
+
+    Pooling2dTest<int8_t>(tflite::BuiltinOperator_MAX_POOL_2D,
+                          ::tflite::TensorType_INT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          ::tflite::Padding_SAME,
+                          2,
+                          2,
+                          1,
+                          1,
+                          ::tflite::ActivationFunctionType_RELU6,
+                          2.0f,
+                          1);
+}
+
+void MaxPool2dUint8PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<uint8_t> inputValues = { 5, 8, 10, 7,
+                                         8, 12, 15, 2,
+                                         3, 4, 1, 11 };
+
+    std::vector<uint8_t> expectedOutputValues = { 12, 15, 4, 11 };
+
+    Pooling2dTest<uint8_t>(tflite::BuiltinOperator_MAX_POOL_2D,
+                           ::tflite::TensorType_UINT8,
+                           backends,
+                           inputShape,
+                           outputShape,
+                           inputValues,
+                           expectedOutputValues,
+                           ::tflite::Padding_SAME,
+                           2,
+                           2,
+                           2,
+                           2,
+                           tflite::ActivationFunctionType_NONE,
+                           2.5f,
+                           1);
+}
+
+void MaxPool2dUint8ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<uint8_t> inputValues = { 12, 8, 10, 15,
+                                         8, 5, 7, 2,
+                                         3, 4, 1, 11 };
+
+    std::vector<uint8_t> expectedOutputValues = { 12, 10, 15, 8, 7, 11 };
+
+    Pooling2dTest<uint8_t>(tflite::BuiltinOperator_MAX_POOL_2D,
+                           ::tflite::TensorType_UINT8,
+                           backends,
+                           inputShape,
+                           outputShape,
+                           inputValues,
+                           expectedOutputValues,
+                           ::tflite::Padding_VALID,
+                           1,
+                           1,
+                           2,
+                           2,
+                           ::tflite::ActivationFunctionType_RELU,
+                           2.0f,
+                           1);
+}
+
+void MaxPool2dInt16PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<int16_t> inputValues = { -5, 8, -10, 7,
+                                         8, 12, -15, 2,
+                                         3, -4, -1, -11 };
+
+    std::vector<int16_t> expectedOutputValues = { 12, 7, 3, -1 };
+
+    Pooling2dTest<int16_t>(tflite::BuiltinOperator_MAX_POOL_2D,
+                           ::tflite::TensorType_INT16,
+                           backends,
+                           inputShape,
+                           outputShape,
+                           inputValues,
+                           expectedOutputValues,
+                           ::tflite::Padding_SAME,
+                           2,
+                           2,
+                           2,
+                           2,
+                           tflite::ActivationFunctionType_NONE,
+                           2.5f,
+                           0);
+}
+
+void MaxPool2dInt16ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<int16_t> inputValues = { -5, -8, -10, 7,
+                                         -8, -12, -15, 2,
+                                         3, -4, -1, -11 };
+
+    std::vector<int16_t> expectedOutputValues = { 0, 0, 7, 3, 0, 2 };
+
+    Pooling2dTest<int16_t>(tflite::BuiltinOperator_MAX_POOL_2D,
+                           ::tflite::TensorType_INT16,
+                           backends,
+                           inputShape,
+                           outputShape,
+                           inputValues,
+                           expectedOutputValues,
+                           ::tflite::Padding_VALID,
+                           1,
+                           1,
+                           2,
+                           2,
+                           ::tflite::ActivationFunctionType_RELU,
+                           2.0f,
+                           0);
+}
+
+void AveragePool2dFP32PaddingValidTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 1, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { 5.75f, -4.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_VALID,
+                         2,
+                         2,
+                         2,
+                         2);
+}
+
+void AveragePool2dInt8PaddingValidTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 1, 2, 1 };
+
+    std::vector<int8_t > inputValues = { -5, 8, -10, 7,
+                                         8, 12, -15, 2,
+                                         3, -4, -1, -11 };
+
+    std::vector<int8_t> expectedOutputValues = { 6, -4 };
+
+    Pooling2dTest<int8_t>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                          ::tflite::TensorType_INT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          ::tflite::Padding_VALID,
+                          2,
+                          2,
+                          2,
+                          2,
+                          tflite::ActivationFunctionType_NONE,
+                          2.5f,
+                          1);
+}
+
+void AveragePool2dFP32PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { 5.75f, -4.0f, -0.5f, -6.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_SAME,
+                         2,
+                         2,
+                         2,
+                         2);
+}
+
+void AveragePool2dInt8PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<int8_t > inputValues = { -5, 8, -10, 7,
+                                         8, 12, -15, 2,
+                                         3, -4, -1, -11 };
+
+    std::vector<int8_t> expectedOutputValues = { 6, -4, -1, -6 };
+
+    Pooling2dTest<int8_t>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                          ::tflite::TensorType_INT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          ::tflite::Padding_SAME,
+                          2,
+                          2,
+                          2,
+                          2,
+                          tflite::ActivationFunctionType_NONE,
+                          2.5f,
+                          1);
+}
+
+void AveragePool2dFP32ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       -8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, 11.0f };
+
+    std::vector<float> expectedOutputValues = { 1.75f, 0.0f, 0.0f, 0.75f, 0.0f, 0.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_VALID,
+                         1,
+                         1,
+                         2,
+                         2,
+                         ::tflite::ActivationFunctionType_RELU);
+}
+
+void AveragePool2dInt8ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<int8_t> inputValues = { -5, 8, -10, 7,
+                                        -8, 12, -15, 2,
+                                        3, -4, -1, 11 };
+
+    std::vector<int8_t> expectedOutputValues = { 2, 1, 1, 1, 1, 1 };
+
+    Pooling2dTest<int8_t>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                          ::tflite::TensorType_INT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          ::tflite::Padding_VALID,
+                          1,
+                          1,
+                          2,
+                          2,
+                          ::tflite::ActivationFunctionType_RELU,
+                          2.5f,
+                          1);
+}
+
+void AveragePool2dFP32Relu6Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       -8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, 11.0f };
+
+    std::vector<float> expectedOutputValues = { 0.0f, 0.0f, 3.0f, 0.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_SAME,
+                         2,
+                         2,
+                         1,
+                         1,
+                         ::tflite::ActivationFunctionType_RELU6);
+}
+
+void AveragePool2dInt8Relu6Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<int8_t> inputValues = { -5, 8, -10, 7,
+                                        -8, 12, -15, 2,
+                                        3, -4, -1, 11 };
+
+    std::vector<int8_t> expectedOutputValues = { 1, 1, 3, 1 };
+
+    Pooling2dTest<int8_t>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                          ::tflite::TensorType_INT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          ::tflite::Padding_SAME,
+                          2,
+                          2,
+                          1,
+                          1,
+                          ::tflite::ActivationFunctionType_RELU6,
+                          2.5f,
+                          1);
+}
+
+void AveragePool2dUint8PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<uint8_t> inputValues = { 5, 8, 10, 7,
+                                         8, 12, 15, 2,
+                                         3, 4, 1, 11 };
+
+    std::vector<uint8_t> expectedOutputValues = { 8, 9, 4, 6 };
+
+    Pooling2dTest<uint8_t>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                           ::tflite::TensorType_UINT8,
+                           backends,
+                           inputShape,
+                           outputShape,
+                           inputValues,
+                           expectedOutputValues,
+                           ::tflite::Padding_SAME,
+                           2,
+                           2,
+                           2,
+                           2,
+                           tflite::ActivationFunctionType_NONE,
+                           2.5f,
+                           1);
+}
+
+void AveragePool2dUint8ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<uint8_t> inputValues = { 12, 8, 10, 15,
+                                         8, 5, 7, 2,
+                                         3, 4, 1, 11 };
+
+    std::vector<uint8_t> expectedOutputValues = { 8, 8, 9, 5, 4, 5 };
+
+    Pooling2dTest<uint8_t>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                           ::tflite::TensorType_UINT8,
+                           backends,
+                           inputShape,
+                           outputShape,
+                           inputValues,
+                           expectedOutputValues,
+                           ::tflite::Padding_VALID,
+                           1,
+                           1,
+                           2,
+                           2,
+                           ::tflite::ActivationFunctionType_RELU,
+                           2.0f,
+                           1);
+}
+
+void AveragePool2dInt16PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<int16_t > inputValues = { -5, 8, -10, 7,
+                                         8, 12, -15, 2,
+                                         3, -4, -1, -11 };
+
+    std::vector<int16_t> expectedOutputValues = { 6, -4, -1, -6 };
+
+    Pooling2dTest<int16_t>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                           ::tflite::TensorType_INT16,
+                           backends,
+                           inputShape,
+                           outputShape,
+                           inputValues,
+                           expectedOutputValues,
+                           ::tflite::Padding_SAME,
+                           2,
+                           2,
+                           2,
+                           2,
+                           tflite::ActivationFunctionType_NONE,
+                           2.5f,
+                           0);
+}
+
+void AveragePool2dInt16ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<int16_t> inputValues = { -5, 8, -10, 7,
+                                         -8, 12, -15, 2,
+                                         3, -4, -1, 11 };
+
+    std::vector<int16_t> expectedOutputValues = { 2, 0, 0, 1, 0, 0 };
+
+    Pooling2dTest<int16_t>(tflite::BuiltinOperator_AVERAGE_POOL_2D,
+                           ::tflite::TensorType_INT16,
+                           backends,
+                           inputShape,
+                           outputShape,
+                           inputValues,
+                           expectedOutputValues,
+                           ::tflite::Padding_VALID,
+                           1,
+                           1,
+                           2,
+                           2,
+                           ::tflite::ActivationFunctionType_RELU,
+                           2.5f,
+                           0);
+}
+
+void L2Pool2dFP32PaddingValidTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 1, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { 8.616844f, 9.721111f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_L2_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_VALID,
+                         2,
+                         2,
+                         2,
+                         2);
+}
+
+void L2Pool2dFP32PaddingSameTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { 8.616844f, 9.721111f, 3.535534f, 7.81025f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_L2_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_SAME,
+                         2,
+                         2,
+                         2,
+                         2);
+}
+
+void L2Pool2dFP32ReluTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       -8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, 11.0f };
+
+    std::vector<float> expectedOutputValues = { 8.616844f, 11.543396f, 9.721111f, 7.632169f, 9.8234415f, 9.367497f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_L2_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_VALID,
+                         1,
+                         1,
+                         2,
+                         2,
+                         ::tflite::ActivationFunctionType_RELU);
+}
+
+void L2Pool2dFP32Relu6Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       -8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, 11.0f };
+
+    std::vector<float> expectedOutputValues = { 5.0f, 6.0f, 3.0f, 1.0f };
+
+    Pooling2dTest<float>(tflite::BuiltinOperator_L2_POOL_2D,
+                         ::tflite::TensorType_FLOAT32,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         ::tflite::Padding_SAME,
+                         2,
+                         2,
+                         1,
+                         1,
+                         ::tflite::ActivationFunctionType_RELU6);
+}
+
+TEST_SUITE("Pooling2d_GpuAccTests")
+{
+
+TEST_CASE ("MaxPooling2d_FP32_PaddingValid_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_PaddingValid_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dInt8PaddingValidTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_PaddingSame_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_PaddingSame_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dInt8PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_Relu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_Relu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dInt8ReluTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_Relu6_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dFP32Relu6Test(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_Relu6_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dInt8Relu6Test(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Uint8_PaddingSame_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dUint8PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Uint8_Relu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    MaxPool2dUint8ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_PaddingValid_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_PaddingValid_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dInt8PaddingValidTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_PaddingSame_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_PaddingSame_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dInt8PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_Relu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_Relu6_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dFP32Relu6Test(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_Relu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dInt8ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_Relu6_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dInt8Relu6Test(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Uint8_PaddingSame_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dUint8PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Uint8_Relu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    AveragePool2dUint8ReluTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_PaddingValid_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    L2Pool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_PaddingSame_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    L2Pool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_Relu_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    L2Pool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_Relu6_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    L2Pool2dFP32Relu6Test(backends);
+}
+
+} // TEST_SUITE("Pooling2d_GpuAccTests")
+
+TEST_SUITE("Pooling2d_CpuAccTests")
+{
+
+TEST_CASE ("MaxPooling2d_FP32_PaddingValid_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_PaddingValid_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dInt8PaddingValidTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_PaddingSame_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_PaddingSame_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dInt8PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_Relu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_Relu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dInt8ReluTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_Relu6_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dFP32Relu6Test(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_Relu6_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dInt8Relu6Test(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Uint8_PaddingSame_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dUint8PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Uint8_Relu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    MaxPool2dUint8ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_PaddingValid_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_PaddingValid_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dInt8PaddingValidTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_PaddingSame_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_PaddingSame_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dInt8PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_Relu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_Relu6_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dFP32Relu6Test(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_Relu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dInt8ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_Relu6_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dInt8Relu6Test(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Uint8_PaddingSame_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dUint8PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Uint8_Relu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    AveragePool2dUint8ReluTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_PaddingValid_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    L2Pool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_PaddingSame_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    L2Pool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_Relu_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    L2Pool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_Relu6_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    L2Pool2dFP32Relu6Test(backends);
+}
+
+} // TEST_SUITE("Pooling2d_CpuAccTests")
+
+TEST_SUITE("Pooling2d_CpuRefTests")
+{
+
+TEST_CASE ("MaxPooling2d_FP32_PaddingValid_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_PaddingValid_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dInt8PaddingValidTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dInt8PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dInt8ReluTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_FP32_Relu6_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dFP32Relu6Test(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int8_Relu6_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dInt8Relu6Test(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Uint8_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dUint8PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Uint8_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dUint8ReluTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int16_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dInt16PaddingSameTest(backends);
+}
+
+TEST_CASE ("MaxPooling2d_Int16_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    MaxPool2dInt16ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_PaddingValid_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_PaddingValid_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dInt8PaddingValidTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dInt8PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_FP32_Relu6_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dFP32Relu6Test(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dInt8ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int8_Relu6_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dInt8Relu6Test(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Uint8_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dUint8PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Uint8_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dUint8ReluTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int16_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dInt16PaddingSameTest(backends);
+}
+
+TEST_CASE ("AveragePooling2d_Int16_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    AveragePool2dInt16ReluTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_PaddingValid_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    L2Pool2dFP32PaddingValidTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_PaddingSame_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    L2Pool2dFP32PaddingSameTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_Relu_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    L2Pool2dFP32ReluTest(backends);
+}
+
+TEST_CASE ("L2Pooling2d_FP32_Relu6_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    L2Pool2dFP32Relu6Test(backends);
+}
+
+} // TEST_SUITE("Pooling2d_CpuRefTests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/Pooling2dTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/Pooling2dTestHelper.hpp
new file mode 100644
index 0000000000..b5d36b0231
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/Pooling2dTestHelper.hpp
@@ -0,0 +1,196 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreatePooling2dTfLiteModel(
+    tflite::BuiltinOperator poolingOperatorCode,
+    tflite::TensorType tensorType,
+    const std::vector <int32_t>& inputTensorShape,
+    const std::vector <int32_t>& outputTensorShape,
+    tflite::Padding padding = tflite::Padding_SAME,
+    int32_t strideWidth = 0,
+    int32_t strideHeight = 0,
+    int32_t filterWidth = 0,
+    int32_t filterHeight = 0,
+    tflite::ActivationFunctionType fusedActivation = tflite::ActivationFunctionType_NONE,
+    float quantScale = 1.0f,
+    int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_Pool2DOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreatePool2DOptions(flatBufferBuilder,
+                                                                           padding,
+                                                                           strideWidth,
+                                                                           strideHeight,
+                                                                           filterWidth,
+                                                                           filterHeight,
+                                                                           fusedActivation).Union();
+
+    const std::vector<int32_t> operatorInputs{0};
+    const std::vector<int32_t> operatorOutputs{1};
+    flatbuffers::Offset <Operator> poolingOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0};
+    const std::vector<int> subgraphOutputs{1};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&poolingOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Pooling2d Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, poolingOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void Pooling2dTest(tflite::BuiltinOperator poolingOperatorCode,
+                   tflite::TensorType tensorType,
+                   std::vector<armnn::BackendId>& backends,
+                   std::vector<int32_t>& inputShape,
+                   std::vector<int32_t>& outputShape,
+                   std::vector<T>& inputValues,
+                   std::vector<T>& expectedOutputValues,
+                   tflite::Padding padding = tflite::Padding_SAME,
+                   int32_t strideWidth = 0,
+                   int32_t strideHeight = 0,
+                   int32_t filterWidth = 0,
+                   int32_t filterHeight = 0,
+                   tflite::ActivationFunctionType fusedActivation = tflite::ActivationFunctionType_NONE,
+                   float quantScale = 1.0f,
+                   int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreatePooling2dTfLiteModel(poolingOperatorCode,
+                                                               tensorType,
+                                                               inputShape,
+                                                               outputShape,
+                                                               padding,
+                                                               strideWidth,
+                                                               strideHeight,
+                                                               filterWidth,
+                                                               filterHeight,
+                                                               fusedActivation,
+                                                               quantScale,
+                                                               quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    CHECK(tfLiteModel != nullptr);
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelegateInputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelegateInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    armnnDelegate::CompareOutputData(tfLiteInterpreter, armnnDelegateInterpreter, outputShape, expectedOutputValues);
+}
+
+} // anonymous namespace
+
+
+
+
diff --git a/arch/arm/ARMnn/delegate/src/test/PreluTest.cpp b/arch/arm/ARMnn/delegate/src/test/PreluTest.cpp
new file mode 100644
index 0000000000..d9e08d20ca
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/PreluTest.cpp
@@ -0,0 +1,134 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PreluTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate {
+
+void PreluFloatSimpleTest(std::vector <armnn::BackendId>& backends, bool isAlphaConst, bool isDynamicOutput = false) {
+    std::vector<int32_t> inputShape { 1, 2, 3 };
+    std::vector<int32_t> alphaShape { 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3 };
+
+    if (isDynamicOutput)
+    {
+        outputShape.clear();
+    }
+
+    std::vector<float> inputData = { -14.f, 2.f, 0.f, 1.f, -5.f, 14.f };
+    std::vector<float> alphaData = { 0.5f };
+    std::vector<float> expectedOutput = { -7.f, 2.f, 0.f, 1.f, -2.5f, 14.f };
+
+    PreluTest(tflite::BuiltinOperator_PRELU,
+              ::tflite::TensorType_FLOAT32,
+              backends,
+              inputShape,
+              alphaShape,
+              outputShape,
+              inputData,
+              alphaData,
+              expectedOutput,
+              isAlphaConst);
+}
+
+TEST_SUITE("Prelu_CpuRefTests")
+{
+
+TEST_CASE ("PreluFp32SimpleConstTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PreluFloatSimpleTest(backends, true);
+}
+
+TEST_CASE ("PreluFp32SimpleTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PreluFloatSimpleTest(backends, false);
+}
+
+TEST_CASE ("PreluFp32SimpleConstDynamicTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PreluFloatSimpleTest(backends, true, true);
+}
+
+TEST_CASE ("PreluFp32SimpleDynamicTest_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PreluFloatSimpleTest(backends, false, true);
+}
+
+} // TEST_SUITE("Prelu_CpuRefTests")
+
+TEST_SUITE("Prelu_CpuAccTests")
+{
+
+TEST_CASE ("PreluFp32SimpleConstTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PreluFloatSimpleTest(backends, true);
+}
+
+TEST_CASE ("PreluFp32SimpleTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PreluFloatSimpleTest(backends, false);
+}
+
+TEST_CASE ("PreluFp32SimpleConstDynamicTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PreluFloatSimpleTest(backends, true, true);
+}
+
+TEST_CASE ("PreluFp32SimpleDynamicTest_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    PreluFloatSimpleTest(backends, false, true);
+}
+
+} // TEST_SUITE("Prelu_CpuAccTests")
+
+TEST_SUITE("Prelu_GpuAccTests")
+{
+
+TEST_CASE ("PreluFp32SimpleConstTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PreluFloatSimpleTest(backends, true);
+}
+
+TEST_CASE ("PreluFp32SimpleTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PreluFloatSimpleTest(backends, false);
+}
+
+TEST_CASE ("PreluFp32SimpleConstDynamicTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PreluFloatSimpleTest(backends, true, true);
+}
+
+TEST_CASE ("PreluFp32SimpleDynamicTest_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    PreluFloatSimpleTest(backends, false, true);
+}
+
+} // TEST_SUITE("Prelu_GpuAccTests")
+
+}
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/PreluTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/PreluTestHelper.hpp
new file mode 100644
index 0000000000..b6c18ccdfb
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/PreluTestHelper.hpp
@@ -0,0 +1,193 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreatePreluTfLiteModel(tflite::BuiltinOperator preluOperatorCode,
+                                         tflite::TensorType tensorType,
+                                         const std::vector<int32_t>& inputShape,
+                                         const std::vector<int32_t>& alphaShape,
+                                         const std::vector<int32_t>& outputShape,
+                                         std::vector<float>& alphaData,
+                                         bool alphaIsConstant)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(
+        reinterpret_cast<const uint8_t *>(alphaData.data()), sizeof(float) * alphaData.size())));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ 1.0f }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ 0 }));
+
+    auto inputTensor = CreateTensor(flatBufferBuilder,
+                                    flatBufferBuilder.CreateVector<int32_t>(inputShape.data(),
+                                                                          inputShape.size()),
+                                    tensorType,
+                                    0,
+                                    flatBufferBuilder.CreateString("input"),
+                                    quantizationParameters);
+
+    auto alphaTensor = CreateTensor(flatBufferBuilder,
+                                    flatBufferBuilder.CreateVector<int32_t>(alphaShape.data(),
+                                                                          alphaShape.size()),
+                                    tensorType,
+                                    1,
+                                    flatBufferBuilder.CreateString("alpha"),
+                                    quantizationParameters);
+
+    auto outputTensor = CreateTensor(flatBufferBuilder,
+                                     flatBufferBuilder.CreateVector<int32_t>(outputShape.data(),
+                                                                           outputShape.size()),
+                                     tensorType,
+                                     0,
+                                     flatBufferBuilder.CreateString("output"),
+                                     quantizationParameters);
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors = { inputTensor, alphaTensor, outputTensor };
+
+    const std::vector<int> operatorInputs{0, 1};
+    const std::vector<int> operatorOutputs{2};
+    flatbuffers::Offset <Operator> preluOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()));
+
+    std::vector<int> subgraphInputs{0};
+    if (!alphaIsConstant)
+    {
+        subgraphInputs.push_back(1);
+    }
+
+    const std::vector<int> subgraphOutputs{2};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&preluOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Prelu Operator Model");
+    flatbuffers::Offset <OperatorCode> opCode = CreateOperatorCode(flatBufferBuilder, preluOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&opCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void PreluTest(tflite::BuiltinOperator preluOperatorCode,
+               tflite::TensorType tensorType,
+               const std::vector<armnn::BackendId>& backends,
+               const std::vector<int32_t>& inputShape,
+               const std::vector<int32_t>& alphaShape,
+               std::vector<int32_t>& outputShape,
+               std::vector<float>& inputData,
+               std::vector<float>& alphaData,
+               std::vector<float>& expectedOutput,
+               bool alphaIsConstant)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer = CreatePreluTfLiteModel(preluOperatorCode,
+                                                           tensorType,
+                                                           inputShape,
+                                                           alphaShape,
+                                                           outputShape,
+                                                           alphaData,
+                                                           alphaIsConstant);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    CHECK(tfLiteModel != nullptr);
+
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+          (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<float>(tfLiteInterpreter, 0, inputData);
+    armnnDelegate::FillInput<float>(armnnDelegateInterpreter, 0, inputData);
+
+    // Set alpha data if not constant
+    if (!alphaIsConstant) {
+        armnnDelegate::FillInput<float>(tfLiteInterpreter, 1, alphaData);
+        armnnDelegate::FillInput<float>(armnnDelegateInterpreter, 1, alphaData);
+    }
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+
+    auto tfLiteDelegateOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
+
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+
+    for (size_t i = 0; i < expectedOutput.size(); i++)
+    {
+        CHECK(expectedOutput[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteDelegateOutputData[i] == expectedOutput[i]);
+        CHECK(tfLiteDelegateOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/QuantizationTest.cpp b/arch/arm/ARMnn/delegate/src/test/QuantizationTest.cpp
new file mode 100644
index 0000000000..fbc2903d38
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/QuantizationTest.cpp
@@ -0,0 +1,455 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "QuantizationTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// Dequantize operator test functions.
+void DequantizeUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    // Set input and output data
+    std::vector<uint8_t> inputValues
+    {
+        0, 1, 2, 3, // Lower bounds
+        252, 253, 254, 255 // Upper bounds
+    };
+    std::vector<float> expectedOutputValues
+    {
+        0.f, 1.f, 2.f, 3.f,
+        252.f, 253.f, 254.f, 255.f
+    };
+
+    QuantizationTest<uint8_t, float>(tflite::BuiltinOperator_DEQUANTIZE,
+                                     ::tflite::TensorType_UINT8,
+                                     ::tflite::TensorType_FLOAT32,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void DequantizeInt8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<int8_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -128, -127, 126, 127
+    };
+    std::vector<float> expectedOutputValues
+    {
+        -1.f, 0.f, 1.f, 2.f,
+        -128.f, -127.f, 126.f, 127.f
+    };
+
+    QuantizationTest<int8_t , float>(tflite::BuiltinOperator_DEQUANTIZE,
+                                     ::tflite::TensorType_INT8,
+                                     ::tflite::TensorType_FLOAT32,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void DequantizeInt16Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 5 };
+    std::vector<int32_t> outputShape { 2, 5 };
+
+    std::vector<int16_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+    std::vector<float> expectedOutputValues
+    {
+        -1.f, 0.f, 1.f, 2.f,
+        -32768.f, -16384.f, 16384.f, 32767.f
+    };
+
+    QuantizationTest<int16_t, float>(tflite::BuiltinOperator_DEQUANTIZE,
+                                     ::tflite::TensorType_INT16,
+                                     ::tflite::TensorType_FLOAT32,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+// Quantize operator test functions.
+void QuantizeFloat32Uint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    // Set input and output data
+    std::vector<float> inputValues
+    {
+         -1.f, 0.f, 1.f, 2.f, // Lower bounds
+         252.f, 253.f, 255.f, 256.f // Upper bounds
+    };
+    std::vector<uint8_t> expectedOutputValues
+    {
+        0, 0, 1, 2,
+        252, 253, 255, 255
+    };
+
+    QuantizationTest<float, uint8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                     ::tflite::TensorType_FLOAT32,
+                                     ::tflite::TensorType_UINT8,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void QuantizeFloat32Int8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<float> inputValues
+    {
+        -1.f, 0.f, 1.f, 2.f,
+        -128.5f, -127.f, 126.f, 127.5f
+    };
+    std::vector<int8_t> expectedOutputValues
+    {
+        -1, 0, 1, 2,
+        -128, -127, 126, 127
+    };
+
+    QuantizationTest<float, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                     ::tflite::TensorType_FLOAT32,
+                                     ::tflite::TensorType_INT8,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void QuantizeFloat32Int16Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<float> inputValues
+    {
+        -1.f, 0.f, 1.f, 2.f,
+        -32768.5f, -16384.f, 16384.f, 32767.5f
+    };
+    std::vector<int16_t> expectedOutputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+
+    QuantizationTest<float, int16_t>(tflite::BuiltinOperator_QUANTIZE,
+                                    ::tflite::TensorType_FLOAT32,
+                                    ::tflite::TensorType_INT16,
+                                    backends,
+                                    inputShape,
+                                    outputShape,
+                                    inputValues,
+                                    expectedOutputValues);
+}
+
+void QuantizeInt16Int16Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<int16_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+    std::vector<int16_t> expectedOutputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+
+    QuantizationTest<int16_t, int16_t>(tflite::BuiltinOperator_QUANTIZE,
+                                     ::tflite::TensorType_INT16,
+                                     ::tflite::TensorType_INT16,
+                                     backends,
+                                     inputShape,
+                                     outputShape,
+                                     inputValues,
+                                     expectedOutputValues);
+}
+
+void QuantizeInt16Int8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<int16_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -32768, -16384, 16384, 32767
+    };
+    std::vector<int8_t> expectedOutputValues
+    {
+        -1, 0, 1, 2,
+        -128, -128, 127, 127
+    };
+
+    QuantizationTest<int16_t, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                       ::tflite::TensorType_INT16,
+                                       ::tflite::TensorType_INT8,
+                                       backends,
+                                       inputShape,
+                                       outputShape,
+                                       inputValues,
+                                       expectedOutputValues);
+}
+
+void QuantizeInt8Uint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<int8_t> inputValues
+    {
+        -1, 0, 1, 2,
+        -128, -127, 126, 127
+    };
+    std::vector<uint8_t> expectedOutputValues
+    {
+        0, 0, 1, 2,
+        0, 0, 126, 127
+    };
+
+    QuantizationTest<int8_t, uint8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                      ::tflite::TensorType_INT8,
+                                      ::tflite::TensorType_UINT8,
+                                      backends,
+                                      inputShape,
+                                      outputShape,
+                                      inputValues,
+                                      expectedOutputValues);
+}
+
+void QuantizeUint8Int8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 2, 4 };
+    std::vector<int32_t> outputShape { 2, 4 };
+
+    std::vector<uint8_t> inputValues
+    {
+        0, 1, 2, 3,
+        126, 127, 254, 255
+    };
+    std::vector<int8_t> expectedOutputValues
+    {
+        0, 1, 2, 3,
+        126, 127, 127, 127
+    };
+
+    QuantizationTest<uint8_t, int8_t>(tflite::BuiltinOperator_QUANTIZE,
+                                      ::tflite::TensorType_UINT8,
+                                      ::tflite::TensorType_INT8,
+                                      backends,
+                                      inputShape,
+                                      outputShape,
+                                      inputValues,
+                                      expectedOutputValues);
+}
+
+TEST_SUITE("CpuRef_QuantizationTests")
+{
+
+TEST_CASE ("DEQUANTIZE_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    DequantizeUint8Test(backends);
+}
+
+
+TEST_CASE ("DEQUANTIZE_INT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    DequantizeInt8Test(backends);
+}
+
+
+TEST_CASE ("DEQUANTIZE_INT16_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    DequantizeInt16Test(backends);
+}
+
+
+TEST_CASE ("QUANTIZE_FLOAT32_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    QuantizeFloat32Uint8Test(backends);
+}
+
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    QuantizeFloat32Int8Test(backends);
+}
+
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT16_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    QuantizeFloat32Int16Test(backends);
+}
+
+
+TEST_CASE ("QUANTIZE_INT16_INT16_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    QuantizeInt16Int16Test(backends);
+}
+
+
+TEST_CASE ("QUANTIZE_INT16_INT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    QuantizeInt16Int8Test(backends);
+}
+
+
+
+TEST_CASE ("QUANTIZE_INT8_UINT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    QuantizeInt8Uint8Test(backends);
+}
+
+
+TEST_CASE ("QUANTIZE_UINT8_INT8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    QuantizeUint8Int8Test(backends);
+}
+
+}
+
+TEST_SUITE("CpuAcc_QuantizationTests")
+{
+
+// Dequantize Operator Tests
+TEST_CASE ("DEQUANTIZE_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    DequantizeUint8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    DequantizeInt8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT16_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    DequantizeInt16Test(backends);
+}
+
+// Quantize Operator Tests
+TEST_CASE ("QUANTIZE_FLOAT32_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    QuantizeFloat32Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    QuantizeFloat32Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT8_UINT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    QuantizeInt8Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_UINT8_INT8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    QuantizeUint8Int8Test(backends);
+}
+
+}
+
+TEST_SUITE("GpuAcc_QuantizationTests")
+{
+
+// Dequantize Operator Tests
+TEST_CASE ("DEQUANTIZE_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    DequantizeUint8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    DequantizeInt8Test(backends);
+}
+
+TEST_CASE ("DEQUANTIZE_INT16_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    DequantizeInt16Test(backends);
+}
+
+// Quantize Operator Tests
+TEST_CASE ("QUANTIZE_FLOAT32_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    QuantizeFloat32Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_FLOAT32_INT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    QuantizeFloat32Int8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_INT8_UINT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    QuantizeInt8Uint8Test(backends);
+}
+
+TEST_CASE ("QUANTIZE_UINT8_INT8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    QuantizeUint8Int8Test(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/QuantizationTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/QuantizationTestHelper.hpp
new file mode 100644
index 0000000000..e4155040cd
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/QuantizationTestHelper.hpp
@@ -0,0 +1,197 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateQuantizationTfLiteModel(tflite::BuiltinOperator quantizationOperatorCode,
+                                                tflite::TensorType inputTensorType,
+                                                tflite::TensorType outputTensorType,
+                                                const std::vector <int32_t>& inputTensorShape,
+                                                const std::vector <int32_t>& outputTensorShape,
+                                                float quantScale = 1.0f,
+                                                int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }),
+                                         QuantizationDetails_CustomQuantization);
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              inputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              outputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+    flatbuffers::Offset<void> operatorBuiltinOptions = 0;
+    switch (quantizationOperatorCode)
+    {
+        case BuiltinOperator_QUANTIZE:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_QuantizeOptions;
+            operatorBuiltinOptions = CreateQuantizeOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_DEQUANTIZE:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_DequantizeOptions;
+            operatorBuiltinOptions = CreateDequantizeOptions(flatBufferBuilder).Union();
+            break;
+        }
+        default:
+            break;
+    }
+
+    const std::vector<int32_t> operatorInputs{0};
+    const std::vector<int32_t> operatorOutputs{1};
+    flatbuffers::Offset <Operator> quantizationOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{0};
+    const std::vector<int> subgraphOutputs{1};
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&quantizationOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Quantization Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, quantizationOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename InputT, typename OutputT>
+void QuantizationTest(tflite::BuiltinOperator quantizeOperatorCode,
+                      tflite::TensorType inputTensorType,
+                      tflite::TensorType outputTensorType,
+                      std::vector<armnn::BackendId>& backends,
+                      std::vector<int32_t>& inputShape,
+                      std::vector<int32_t>& outputShape,
+                      std::vector<InputT>&  inputValues,
+                      std::vector<OutputT>& expectedOutputValues,
+                      float quantScale = 1.0f,
+                      int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateQuantizationTfLiteModel(quantizeOperatorCode,
+                                                                  inputTensorType,
+                                                                  outputTensorType,
+                                                                  inputShape,
+                                                                  outputShape,
+                                                                  quantScale,
+                                                                  quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<InputT>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<InputT>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<OutputT>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<OutputT>(armnnDelegateOutputId);
+
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteDelageOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/RedefineTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/RedefineTestHelper.hpp
new file mode 100644
index 0000000000..6f061572b4
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/RedefineTestHelper.hpp
@@ -0,0 +1,192 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateRedefineTfLiteModel(
+    tflite::BuiltinOperator redefineOperatorCode,
+    tflite::TensorType tensorType,
+    const std::vector<int32_t>& inputTensorShape,
+    const std::vector<int32_t>& outputTensorShape,
+    const std::vector<int32_t>& targetShape,
+    bool useOption = true,
+    float quantScale = 1.0f,
+    int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    auto inputTensor = CreateTensor(flatBufferBuilder,
+                                    flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                            inputTensorShape.size()),
+                                    tensorType,
+                                    0,
+                                    flatBufferBuilder.CreateString("input"),
+                                    quantizationParameters);
+
+    auto outputTensor = CreateTensor(flatBufferBuilder,
+                                     flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                             outputTensorShape.size()),
+                                     tensorType,
+                                     1,
+                                     flatBufferBuilder.CreateString("output"),
+                                     quantizationParameters);
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors;
+    std::vector<int32_t> operatorInputs;
+    std::vector<int> subgraphInputs;
+    flatbuffers::Offset<void> operatorBuiltinOptions;
+
+    if (useOption)
+    {
+        tensors = { inputTensor, outputTensor};
+        operatorInputs = {0};
+        subgraphInputs = {0};
+        operatorBuiltinOptions = CreateReshapeOptions(
+            flatBufferBuilder,
+            flatBufferBuilder.CreateVector(targetShape.data(), targetShape.size())).Union();
+    }
+    else
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(targetShape.data()),
+                                                        sizeof(int32_t) * targetShape.size())));
+        int32_t size = static_cast<int32_t>(targetShape.size());
+        auto shapeTensor = CreateTensor(flatBufferBuilder,
+                                        flatBufferBuilder.CreateVector<int32_t>( { size } ),
+                                        tflite::TensorType_INT32,
+                                        2,
+                                        flatBufferBuilder.CreateString("shape"));
+        tensors = { inputTensor, outputTensor, shapeTensor };
+        operatorInputs = {0, 2};
+        subgraphInputs = {0, 2};
+        operatorBuiltinOptions = CreateReshapeOptions(flatBufferBuilder).Union();
+    }
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_ReshapeOptions;
+
+    const std::vector<int32_t> operatorOutputs{1};
+    flatbuffers::Offset <Operator> redefineOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphOutputs{1};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&redefineOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Reshape Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         redefineOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void RedefineTest(tflite::BuiltinOperator redefineOperatorCode,
+                  tflite::TensorType tensorType,
+                  const std::vector<armnn::BackendId>& backends,
+                  const std::vector<int32_t>& inputShape,
+                  std::vector<int32_t>& outputShape,
+                  std::vector<T>& inputValues,
+                  std::vector<T>& expectedOutputValues,
+                  std::vector<int32_t>& targetShape,
+                  bool useOption = true,
+                  float quantScale = 1.0f,
+                  int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateRedefineTfLiteModel(redefineOperatorCode,
+                                                              tensorType,
+                                                              inputShape,
+                                                              outputShape,
+                                                              targetShape,
+                                                              useOption,
+                                                              quantScale,
+                                                              quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    CHECK(tfLiteModel != nullptr);
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter, armnnDelegateInterpreter, outputShape, expectedOutputValues);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ReduceTest.cpp b/arch/arm/ARMnn/delegate/src/test/ReduceTest.cpp
new file mode 100644
index 0000000000..5dd33562e6
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ReduceTest.cpp
@@ -0,0 +1,423 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ReduceTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void ReduceUint8KeepDimsTest(tflite::BuiltinOperator reduceOperatorCode,
+                             std::vector<armnn::BackendId>& backends,
+                             std::vector<uint8_t>& expectedOutputValues)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 3 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 1, 3 };
+
+    std::vector<uint8_t> input0Values { 1, 2, 3,
+                                        4, 3, 1  }; // Inputs
+    std::vector<int32_t> input1Values { 2 }; // Axis
+
+    ReduceTest<uint8_t>(reduceOperatorCode,
+                        ::tflite::TensorType_UINT8,
+                        backends,
+                        input0Shape,
+                        input1Shape,
+                        expectedOutputShape,
+                        input0Values,
+                        input1Values,
+                        expectedOutputValues,
+                        true);
+}
+
+void ReduceUint8Test(tflite::BuiltinOperator reduceOperatorCode,
+                     std::vector<armnn::BackendId>& backends,
+                     std::vector<uint8_t>& expectedOutputValues)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 3 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 3 };
+
+    std::vector<uint8_t> input0Values { 1, 2, 3,
+                                        4, 3, 1 }; // Inputs
+    std::vector<int32_t> input1Values { 2 }; // Axis
+
+    ReduceTest<uint8_t>(reduceOperatorCode,
+                        ::tflite::TensorType_UINT8,
+                        backends,
+                        input0Shape,
+                        input1Shape,
+                        expectedOutputShape,
+                        input0Values,
+                        input1Values,
+                        expectedOutputValues,
+                        false);
+}
+
+void ReduceFp32KeepDimsTest(tflite::BuiltinOperator reduceOperatorCode,
+                            std::vector<armnn::BackendId>& backends,
+                            std::vector<float>& expectedOutputValues)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 3 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 1, 3 };
+
+    std::vector<float>   input0Values { 1001.0f, 11.0f,   1003.0f,
+                                        10.0f,   1002.0f, 12.0f }; // Inputs
+    std::vector<int32_t> input1Values { 2 }; // Axis
+
+    ReduceTest<float>(reduceOperatorCode,
+                      ::tflite::TensorType_FLOAT32,
+                      backends,
+                      input0Shape,
+                      input1Shape,
+                      expectedOutputShape,
+                      input0Values,
+                      input1Values,
+                      expectedOutputValues,
+                      true);
+}
+
+void ReduceFp32Test(tflite::BuiltinOperator reduceOperatorCode,
+                    std::vector<armnn::BackendId>& backends,
+                    std::vector<float>& expectedOutputValues)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 3 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 3 };
+
+    std::vector<float>   input0Values { 1001.0f, 11.0f,   1003.0f,
+                                        10.0f,   1002.0f, 12.0f }; // Inputs
+    std::vector<int32_t> input1Values { 2 }; // Axis
+
+    ReduceTest<float>(reduceOperatorCode,
+                      ::tflite::TensorType_FLOAT32,
+                      backends,
+                      input0Shape,
+                      input1Shape,
+                      expectedOutputShape,
+                      input0Values,
+                      input1Values,
+                      expectedOutputValues,
+                      false);
+}
+
+// REDUCE_MAX Tests
+TEST_SUITE("ReduceMax_CpuRefTests")
+{
+
+TEST_CASE ("ReduceMax_Uint8_KeepDims_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<uint8_t> expectedOutputValues { 4, 3, 3 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("ReduceMax_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<uint8_t> expectedOutputValues { 4, 3, 3 };
+    ReduceUint8Test(tflite::BuiltinOperator_REDUCE_MAX,
+                    backends,
+                    expectedOutputValues);
+}
+
+TEST_CASE ("ReduceMax_Fp32_KeepDims_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<float>   expectedOutputValues { 1001.0f, 1002.0f, 1003.0f };
+    ReduceFp32KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX,
+                           backends,
+                           expectedOutputValues);
+}
+
+TEST_CASE ("ReduceMax_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<float>   expectedOutputValues { 1001.0f, 1002.0f, 1003.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MAX,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of ReduceMax_CpuRefTests
+
+TEST_SUITE("ReduceMax_CpuAccTests")
+{
+
+TEST_CASE ("ReduceMax_Uint8_KeepDims_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<uint8_t> expectedOutputValues { 4, 3, 3 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("ReduceMax_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<uint8_t> expectedOutputValues { 4, 3, 3 };
+    ReduceUint8Test(tflite::BuiltinOperator_REDUCE_MAX,
+                    backends,
+                    expectedOutputValues);
+}
+
+
+TEST_CASE ("ReduceMax_Fp32_KeepDims_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<float>   expectedOutputValues { 1001.0f, 1002.0f, 1003.0f };
+    ReduceFp32KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX,
+                           backends,
+                           expectedOutputValues);
+}
+
+TEST_CASE ("ReduceMax_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<float>   expectedOutputValues { 1001.0f, 1002.0f, 1003.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MAX,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of ReduceMax_CpuAccTests
+
+TEST_SUITE("ReduceMax_GpuAccTests")
+{
+
+TEST_CASE ("ReduceMax_Uint8_KeepDims_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<uint8_t> expectedOutputValues { 4, 3, 3 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("ReduceMax_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<uint8_t> expectedOutputValues { 4, 3, 3 };
+    ReduceUint8Test(tflite::BuiltinOperator_REDUCE_MAX,
+                    backends,
+                    expectedOutputValues);
+}
+
+
+TEST_CASE ("ReduceMax_Fp32_KeepDims_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<float>   expectedOutputValues { 1001.0f, 1002.0f, 1003.0f };
+    ReduceFp32KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX,
+                           backends,
+                           expectedOutputValues);
+}
+
+TEST_CASE ("ReduceMax_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<float>   expectedOutputValues { 1001.0f, 1002.0f, 1003.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MAX,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of ReduceMax_GpuAccTests
+
+// REDUCE_MIN Tests
+TEST_SUITE("ReduceMin_CpuRefTests")
+{
+
+TEST_CASE ("ReduceMin_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<float>   expectedOutputValues { 10.0f, 11.0f, 12.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MIN,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of ReduceMin_CpuRefTests
+
+TEST_SUITE("ReduceMin_CpuAccTests")
+{
+
+TEST_CASE ("ReduceMin_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<float>   expectedOutputValues { 10.0f, 11.0f, 12.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MIN,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of ReduceMin_CpuAccTests
+
+TEST_SUITE("ReduceMin_GpuAccTests")
+{
+
+TEST_CASE ("ReduceMin_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<float>   expectedOutputValues { 10.0f, 11.0f, 12.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MIN,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of ReduceMin_GpuAccTests
+
+// SUM Tests
+TEST_SUITE("Sum_CpuRefTests")
+{
+
+TEST_CASE ("Sum_Uint8_KeepDims_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<uint8_t> expectedOutputValues { 5, 5, 4 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_SUM,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("Sum_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<float>   expectedOutputValues { 1011.0f, 1013.0f, 1015.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_SUM,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of Sum_CpuRefTests
+
+TEST_SUITE("Sum_CpuAccTests")
+{
+
+TEST_CASE ("Sum_Uint8_KeepDims_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<uint8_t> expectedOutputValues { 5, 5, 4 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_SUM,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("Sum_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<float>   expectedOutputValues { 1011.0f, 1013.0f, 1015.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_SUM,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of Sum_CpuAccTests
+
+TEST_SUITE("Sum_GpuAccTests")
+{
+
+TEST_CASE ("Sum_Uint8_KeepDims_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<uint8_t> expectedOutputValues { 5, 5, 4 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_SUM,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("Sum_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<float>   expectedOutputValues { 1011.0f, 1013.0f, 1015.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_SUM,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of Sum_GpuAccTests
+
+// PROD Tests
+TEST_SUITE("Prod_CpuRefTests")
+{
+
+TEST_CASE ("Prod_Uint8_KeepDims_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<uint8_t> expectedOutputValues { 4, 6, 3 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_PROD,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("Prod_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    std::vector<float>   expectedOutputValues { 10010.0f, 11022.0f, 12036.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_PROD,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of Prod_CpuRefTests
+
+TEST_SUITE("Prod_CpuAccTests")
+{
+
+TEST_CASE ("Prod_Uint8_KeepDims_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<uint8_t> expectedOutputValues { 4, 6, 3 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_PROD,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("Prod_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    std::vector<float>   expectedOutputValues { 10010.0f, 11022.0f, 12036.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_PROD,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of Prod_CpuAccTests
+
+TEST_SUITE("Prod_GpuAccTests")
+{
+
+TEST_CASE ("Prod_Uint8_KeepDims_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<uint8_t> expectedOutputValues { 4, 6, 3 };
+    ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_PROD,
+                            backends,
+                            expectedOutputValues);
+}
+
+TEST_CASE ("Prod_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    std::vector<float>   expectedOutputValues { 10010.0f, 11022.0f, 12036.0f };
+    ReduceFp32Test(tflite::BuiltinOperator_REDUCE_PROD,
+                   backends,
+                   expectedOutputValues);
+}
+
+} // End of Prod_GpuAccTests
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ReduceTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ReduceTestHelper.hpp
new file mode 100644
index 0000000000..b41fcfa39b
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ReduceTestHelper.hpp
@@ -0,0 +1,186 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreateReduceTfLiteModel(tflite::BuiltinOperator reduceOperatorCode,
+                                        tflite::TensorType tensorType,
+                                        std::vector<int32_t>& input0TensorShape,
+                                        std::vector<int32_t>& input1TensorShape,
+                                        const std::vector <int32_t>& outputTensorShape,
+                                        std::vector<int32_t>& axisData,
+                                        const bool keepDims,
+                                        float quantScale = 1.0f,
+                                        int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 2> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input0TensorShape.data(),
+                                                                      input0TensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input1TensorShape.data(),
+                                                                      input1TensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+
+    // Create output tensor
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // Create operator. Reduce operations MIN, MAX, SUM, MEAN uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_ReducerOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateReducerOptions(flatBufferBuilder, keepDims).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1} };
+    const std::vector<int> operatorOutputs{ 2 };
+    flatbuffers::Offset <Operator> reduceOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{ 2 };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&reduceOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Reduce Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, reduceOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void ReduceTest(tflite::BuiltinOperator reduceOperatorCode,
+                tflite::TensorType tensorType,
+                std::vector<armnn::BackendId>& backends,
+                std::vector<int32_t>& input0Shape,
+                std::vector<int32_t>& input1Shape,
+                std::vector<int32_t>& expectedOutputShape,
+                std::vector<T>& input0Values,
+                std::vector<int32_t>& input1Values,
+                std::vector<T>& expectedOutputValues,
+                const bool keepDims,
+                float quantScale = 1.0f,
+                int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateReduceTfLiteModel(reduceOperatorCode,
+                                                            tensorType,
+                                                            input0Shape,
+                                                            input1Shape,
+                                                            expectedOutputShape,
+                                                            input1Values,
+                                                            keepDims,
+                                                            quantScale,
+                                                            quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, input0Values);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, input0Values);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ReshapeTest.cpp b/arch/arm/ARMnn/delegate/src/test/ReshapeTest.cpp
new file mode 100644
index 0000000000..11449e29b8
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ReshapeTest.cpp
@@ -0,0 +1,517 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RedefineTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+#include <half/half.hpp>
+
+using Half = half_float::half;
+
+namespace armnnDelegate
+{
+
+void ReshapeSimpleTest(std::vector<armnn::BackendId>& backends, bool useOption = true)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 3, 2, 2 };
+    std::vector<int32_t> targetShape { 1, 3, 2, 2 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                                8.0f, 12.0f, -15.0f, 2.0f,
+                                                3.0f, -4.0f, -1.0f, -11.0f };
+
+    RedefineTest<float>(tflite::BuiltinOperator_RESHAPE,
+                        ::tflite::TensorType_FLOAT32,
+                        backends,
+                        inputShape,
+                        outputShape,
+                        inputValues,
+                        expectedOutputValues,
+                        targetShape,
+                        useOption);
+}
+
+using namespace half_float::literal;
+
+void ReshapeSimpleFloat16Test(std::vector<armnn::BackendId>& backends, bool useOption = true)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 3, 2, 2 };
+    std::vector<int32_t> targetShape { 1, 3, 2, 2 };
+
+    std::vector<Half> inputValues = { 5._h, -8._h, -10._h, 7._h,
+                                      8._h, 12._h, -15._h, 2._h,
+                                      3._h, -4._h, -1._h, -11._h };
+
+    std::vector<Half> expectedOutputValues = { 5._h, -8._h, -10._h, 7._h,
+                                               8._h, 12._h, -15._h, 2._h,
+                                               3._h, -4._h, -1._h, -11._h };
+
+    RedefineTest<Half>(tflite::BuiltinOperator_RESHAPE,
+                        ::tflite::TensorType_FLOAT16,
+                        backends,
+                        inputShape,
+                        outputShape,
+                        inputValues,
+                        expectedOutputValues,
+                        targetShape,
+                        useOption);
+}
+
+void ReshapeReduceDimTest(std::vector<armnn::BackendId>& backends, bool useOption = true)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 1, 4, 3 };
+    std::vector<int32_t> targetShape { 1, 4, 3 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                                8.0f, 12.0f, -15.0f, 2.0f,
+                                                3.0f, -4.0f, -1.0f, -11.0f };
+
+    RedefineTest<float>(tflite::BuiltinOperator_RESHAPE,
+                        ::tflite::TensorType_FLOAT32,
+                        backends,
+                        inputShape,
+                        outputShape,
+                        inputValues,
+                        expectedOutputValues,
+                        targetShape,
+                        useOption);
+}
+
+void ReshapeFlattenTest(std::vector<armnn::BackendId>& backends, bool useOption = true)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 6, 2 };
+    std::vector<int32_t> targetShape { -1, 2 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                                8.0f, 12.0f, -15.0f, 2.0f,
+                                                3.0f, -4.0f, -1.0f, -11.0f };
+
+    RedefineTest<float>(tflite::BuiltinOperator_RESHAPE,
+                        ::tflite::TensorType_FLOAT32,
+                        backends,
+                        inputShape,
+                        outputShape,
+                        inputValues,
+                        expectedOutputValues,
+                        targetShape,
+                        useOption);
+}
+
+void ReshapeFlattenAllTest(std::vector<armnn::BackendId>& backends, bool useOption = true)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 12 };
+    std::vector<int32_t> targetShape { -1 };
+
+    std::vector<float> inputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                       8.0f, 12.0f, -15.0f, 2.0f,
+                                       3.0f, -4.0f, -1.0f, -11.0f };
+
+    std::vector<float> expectedOutputValues = { -5.0f, 8.0f, -10.0f, 7.0f,
+                                                8.0f, 12.0f, -15.0f, 2.0f,
+                                                3.0f, -4.0f, -1.0f, -11.0f };
+
+    RedefineTest<float>(tflite::BuiltinOperator_RESHAPE,
+                        ::tflite::TensorType_FLOAT32,
+                        backends,
+                        inputShape,
+                        outputShape,
+                        inputValues,
+                        expectedOutputValues,
+                        targetShape,
+                        useOption);
+}
+
+void ReshapeInt8Test(std::vector<armnn::BackendId>& backends, bool useOption = true)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 6, 2 };
+    std::vector<int32_t> targetShape { -1, 2 };
+
+    std::vector<int8_t> inputValues = { -5, 8, -10, 7,
+                                        8, 12, -15, 2,
+                                        3, -4, -1, -11 };
+
+    std::vector<int8_t> expectedOutputValues = { -5, 8, -10, 7,
+                                                 8, 12, -15, 2,
+                                                 3, -4, -1, -11 };
+
+    RedefineTest<int8_t>(tflite::BuiltinOperator_RESHAPE,
+                         ::tflite::TensorType_INT8,
+                         backends,
+                         inputShape,
+                         outputShape,
+                         inputValues,
+                         expectedOutputValues,
+                         targetShape,
+                         useOption,
+                         2.5f,
+                         1);
+}
+
+void ReshapeUint8Test(std::vector<armnn::BackendId>& backends, bool useOption = true)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 6, 2 };
+    std::vector<int32_t> targetShape { -1, 2 };
+
+    std::vector<uint8_t> inputValues = { 5, 8, 10, 7,
+                                         8, 12, 15, 2,
+                                         3, 4, 1, 11 };
+
+    std::vector<uint8_t> expectedOutputValues = { 5, 8, 10, 7,
+                                                  8, 12, 15, 2,
+                                                  3, 4, 1, 11 };
+
+    RedefineTest<uint8_t>(tflite::BuiltinOperator_RESHAPE,
+                          ::tflite::TensorType_UINT8,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          targetShape,
+                          useOption,
+                          2.5f,
+                          1);
+}
+
+void ReshapeInt16Test(std::vector<armnn::BackendId>& backends, bool useOption = true)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 3, 4, 1 };
+    std::vector<int32_t> outputShape { 6, 2 };
+    std::vector<int32_t> targetShape { -1, 2 };
+
+    std::vector<int16_t> inputValues = { -5, 8, -10, 7,
+                                         8, 12, -15, 2,
+                                         3, -4, -1, -11 };
+
+    std::vector<int16_t> expectedOutputValues = { -5, 8, -10, 7,
+                                                  8, 12, -15, 2,
+                                                  3, -4, -1, -11 };
+
+    RedefineTest<int16_t>(tflite::BuiltinOperator_RESHAPE,
+                          ::tflite::TensorType_INT16,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          targetShape,
+                          useOption,
+                          2.5f,
+                          0);
+}
+
+TEST_SUITE("Reshape_GpuAccTests")
+{
+
+TEST_CASE ("Reshape_Simple_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeSimpleTest(backends);
+}
+
+TEST_CASE ("Reshape_ReduceDimension_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeReduceDimTest(backends);
+}
+
+TEST_CASE ("Reshape_Flatten_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeFlattenTest(backends);
+}
+
+TEST_CASE ("Reshape_FlattenAll_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeFlattenAllTest(backends);
+}
+
+TEST_CASE ("Reshape_Int8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeInt8Test(backends);
+}
+
+TEST_CASE ("Reshape_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeUint8Test(backends);
+}
+
+TEST_CASE ("Reshape_Float16_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeSimpleFloat16Test(backends);
+}
+
+TEST_CASE ("Reshape_Simple_ShapeTensor_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeSimpleTest(backends, false);
+}
+
+TEST_CASE ("Reshape_ReduceDimension_ShapeTensor_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeReduceDimTest(backends, false);
+}
+
+TEST_CASE ("Reshape_Flatten_ShapeTensor_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeFlattenTest(backends, false);
+}
+
+TEST_CASE ("Reshape_FlattenAll_ShapeTensor_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeFlattenAllTest(backends, false);
+}
+
+TEST_CASE ("Reshape_Int8_ShapeTensor_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeInt8Test(backends, false);
+}
+
+TEST_CASE ("Reshape_Uint8_ShapeTensor_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeUint8Test(backends, false);
+}
+
+TEST_CASE ("Reshape_Float16_ShapeTensor_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ReshapeSimpleFloat16Test(backends, false);
+}
+
+} // TEST_SUITE("Reshape_GpuAccTests")
+
+TEST_SUITE("Reshape_CpuAccTests")
+{
+
+TEST_CASE ("Reshape_Simple_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeSimpleTest(backends);
+}
+
+TEST_CASE ("Reshape_ReduceDimension_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeReduceDimTest(backends);
+}
+
+TEST_CASE ("Reshape_Flatten_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeFlattenTest(backends);
+}
+
+TEST_CASE ("Reshape_FlattenAll_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeFlattenAllTest(backends);
+}
+
+TEST_CASE ("Reshape_Int8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeInt8Test(backends);
+}
+
+TEST_CASE ("Reshape_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeUint8Test(backends);
+}
+
+TEST_CASE ("Reshape_Float16_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeSimpleFloat16Test(backends);
+}
+
+TEST_CASE ("Reshape_Simple_ShapeTensor_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeSimpleTest(backends, false);
+}
+
+TEST_CASE ("Reshape_ReduceDimension_ShapeTensor_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeReduceDimTest(backends, false);
+}
+
+TEST_CASE ("Reshape_Flatten_ShapeTensor_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeFlattenTest(backends, false);
+}
+
+TEST_CASE ("Reshape_FlattenAll_ShapeTensor_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeFlattenAllTest(backends, false);
+}
+
+TEST_CASE ("Reshape_Int8_ShapeTensor_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeInt8Test(backends, false);
+}
+
+TEST_CASE ("Reshape_Uint8_ShapeTensor_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeUint8Test(backends, false);
+}
+
+TEST_CASE ("Reshape_Float16_ShapeTensor_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ReshapeSimpleFloat16Test(backends, false);
+}
+
+} // TEST_SUITE("Reshape_CpuAccTests")
+
+TEST_SUITE("Reshape_CpuRefTests")
+{
+
+TEST_CASE ("Reshape_Simple_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeSimpleTest(backends);
+}
+
+TEST_CASE ("Reshape_ReduceDimension_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeReduceDimTest(backends);
+}
+
+TEST_CASE ("Reshape_Flatten_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeFlattenTest(backends);
+}
+
+TEST_CASE ("Reshape_FlattenAll_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeFlattenAllTest(backends);
+}
+
+TEST_CASE ("Reshape_Int8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeInt8Test(backends);
+}
+
+TEST_CASE ("Reshape_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeUint8Test(backends);
+}
+
+TEST_CASE ("Reshape_Int16_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeInt16Test(backends);
+}
+
+TEST_CASE ("Reshape_Float16_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeSimpleFloat16Test(backends);
+}
+
+TEST_CASE ("Reshape_Simple_ShapeTensor_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeSimpleTest(backends, false);
+}
+
+TEST_CASE ("Reshape_ReduceDimension_ShapeTensor_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeReduceDimTest(backends, false);
+}
+
+TEST_CASE ("Reshape_Flatten_ShapeTensor_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeFlattenTest(backends, false);
+}
+
+TEST_CASE ("Reshape_FlattenAll_ShapeTensor_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeFlattenAllTest(backends, false);
+}
+
+TEST_CASE ("Reshape_Int8_ShapeTensor_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeInt8Test(backends, false);
+}
+
+TEST_CASE ("Reshape_Uint8_ShapeTensor_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeUint8Test(backends, false);
+}
+
+TEST_CASE ("Reshape_Int16_ShapeTensor_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeInt16Test(backends, false);
+}
+
+TEST_CASE ("Reshape_Float16_ShapeTensor_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ReshapeSimpleFloat16Test(backends, false);
+}
+
+} // TEST_SUITE("Reshape_CpuRefTests")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ResizeTest.cpp b/arch/arm/ARMnn/delegate/src/test/ResizeTest.cpp
new file mode 100644
index 0000000000..394ad6c7ae
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ResizeTest.cpp
@@ -0,0 +1,134 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ResizeTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void ResizeBiliniarFloat32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<float> input1Values
+        {
+            0.0f, 1.0f, 2.0f,
+            3.0f, 4.0f, 5.0f,
+            6.0f, 7.0f, 8.0f
+        };
+    const std::vector<int32_t> input2NewShape { 5, 5 };
+
+    // Calculate output data
+    std::vector<float> expectedOutputValues
+        {
+            0.0f, 0.6f, 1.2f, 1.8f, 2.0f,
+            1.8f, 2.4f, 3.0f, 3.6f, 3.8f,
+            3.6f, 4.2f, 4.8f, 5.4f, 5.6f,
+            5.4f, 6.0f, 6.6f, 7.2f, 7.4f,
+            6.0f, 6.6f, 7.2f, 7.8f, 8.0f
+        };
+
+    const std::vector<int32_t> input1Shape { 1, 3, 3, 1 };
+    const std::vector<int32_t> input2Shape { 2 };
+    const std::vector<int32_t> expectedOutputShape = input2NewShape;
+
+    ResizeFP32TestImpl(tflite::BuiltinOperator_RESIZE_BILINEAR,
+                       backends,
+                       input1Values,
+                       input1Shape,
+                       input2NewShape,
+                       input2Shape,
+                       expectedOutputValues,
+                       expectedOutputShape);
+}
+
+void ResizeNearestNeighbourFloat32Test(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<float> input1Values {  1.0f, 2.0f, 3.0f, 4.0f }
+    ;
+    const std::vector<int32_t> input2NewShape { 1, 1 };
+
+    // Calculate output data
+    std::vector<float> expectedOutputValues { 1.0f };
+
+    const std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    const std::vector<int32_t> input2Shape { 2 };
+    const std::vector<int32_t> expectedOutputShape = input2NewShape;
+
+    ResizeFP32TestImpl(tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+                       backends,
+                       input1Values,
+                       input1Shape,
+                       input2NewShape,
+                       input2Shape,
+                       expectedOutputValues,
+                       expectedOutputShape);
+}
+
+TEST_SUITE("ResizeTests_GpuAccTests")
+{
+
+TEST_CASE ("Resize_Biliniar_Float32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ResizeBiliniarFloat32Test(backends);
+}
+
+TEST_CASE ("Resize_NearestNeighbour_Float32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    ResizeNearestNeighbourFloat32Test(backends);
+}
+
+} // TEST_SUITE("ResizeTests_GpuAccTests")
+
+
+TEST_SUITE("ResizeTests_CpuAccTests")
+{
+
+TEST_CASE ("Resize_Biliniar_Float32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ResizeBiliniarFloat32Test(backends);
+}
+
+TEST_CASE ("Resize_NearestNeighbour_Float32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    ResizeNearestNeighbourFloat32Test(backends);
+}
+
+} // TEST_SUITE("ResizeTests_CpuAccTests")
+
+
+TEST_SUITE("ResizeTests_CpuRefTests")
+{
+
+TEST_CASE ("Resize_Biliniar_Float32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ResizeBiliniarFloat32Test(backends);
+}
+
+TEST_CASE ("Resize_NearestNeighbour_Float32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ResizeNearestNeighbourFloat32Test(backends);
+}
+
+} // TEST_SUITE("ResizeTests_CpuRefTests")
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/test/ResizeTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ResizeTestHelper.hpp
new file mode 100644
index 0000000000..030b2a7a4a
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ResizeTestHelper.hpp
@@ -0,0 +1,192 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateResizeTfLiteModel(tflite::BuiltinOperator operatorCode,
+                                          tflite::TensorType inputTensorType,
+                                          const std::vector <int32_t>& inputTensorShape,
+                                          const std::vector <int32_t>& sizeTensorData,
+                                          const std::vector <int32_t>& sizeTensorShape,
+                                          const std::vector <int32_t>& outputTensorShape)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    buffers.push_back(CreateBuffer(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector(
+                                           reinterpret_cast<const uint8_t*>(sizeTensorData.data()),
+                                           sizeof(int32_t) * sizeTensorData.size())));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(), inputTensorShape.size()),
+                              inputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input_tensor"));
+
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(sizeTensorShape.data(),
+                                                                      sizeTensorShape.size()),
+                              TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("size_input_tensor"));
+
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              inputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output_tensor"));
+
+    // Create Operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+    flatbuffers::Offset<void> operatorBuiltinOption = 0;
+    switch (operatorCode)
+    {
+        case BuiltinOperator_RESIZE_BILINEAR:
+        {
+            operatorBuiltinOption = CreateResizeBilinearOptions(flatBufferBuilder, false, false).Union();
+            operatorBuiltinOptionsType = tflite::BuiltinOptions_ResizeBilinearOptions;
+            break;
+        }
+        case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+        {
+            operatorBuiltinOption = CreateResizeNearestNeighborOptions(flatBufferBuilder, false, false).Union();
+            operatorBuiltinOptionsType = tflite::BuiltinOptions_ResizeNearestNeighborOptions;
+            break;
+        }
+        default:
+            break;
+    }
+
+    const std::vector<int> operatorInputs{0, 1};
+    const std::vector<int> operatorOutputs{2};
+    flatbuffers::Offset <Operator> resizeOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOption);
+
+    const std::vector<int> subgraphInputs{0, 1};
+    const std::vector<int> subgraphOutputs{2};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&resizeOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Resize Biliniar Operator Model");
+    flatbuffers::Offset <OperatorCode> opCode = CreateOperatorCode(flatBufferBuilder, operatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&opCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void ResizeFP32TestImpl(tflite::BuiltinOperator operatorCode,
+                        std::vector<armnn::BackendId>& backends,
+                        std::vector<float>& input1Values,
+                        std::vector<int32_t> input1Shape,
+                        std::vector<int32_t> input2NewShape,
+                        std::vector<int32_t> input2Shape,
+                        std::vector<float>& expectedOutputValues,
+                        std::vector<int32_t> expectedOutputShape)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer = CreateResizeTfLiteModel(operatorCode,
+                                                            ::tflite::TensorType_FLOAT32,
+                                                            input1Shape,
+                                                            input2NewShape,
+                                                            input2Shape,
+                                                            expectedOutputShape);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // The model will be executed using tflite and using the armnn delegate so that the outputs
+    // can be compared.
+
+    // Create TfLite Interpreter with armnn delegate
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create TfLite Interpreter without armnn delegate
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data for the armnn interpreter
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 0, input1Values);
+    armnnDelegate::FillInput(armnnDelegateInterpreter, 1, input2NewShape);
+
+    // Set input data for the tflite interpreter
+    armnnDelegate::FillInput(tfLiteInterpreter, 0, input1Values);
+    armnnDelegate::FillInput(tfLiteInterpreter, 1, input2NewShape);
+
+    // Run EnqueWorkload
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(expectedOutputValues[i] == doctest::Approx(armnnDelegateOutputData[i]));
+        CHECK(armnnDelegateOutputData[i] == doctest::Approx(tfLiteDelageOutputData[i]));
+    }
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/RoundTest.cpp b/arch/arm/ARMnn/delegate/src/test/RoundTest.cpp
new file mode 100644
index 0000000000..9d323f3700
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/RoundTest.cpp
@@ -0,0 +1,72 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RoundTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void FloorFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  {1, 3, 2, 3};
+    std::vector<int32_t> outputShape {1, 3, 2, 3};
+
+    std::vector<float> inputValues { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f,
+                                     1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f };
+
+    std::vector<float> expectedOutputValues { -38.0f, -16.0f, -9.0f, -2.0f, -2.0f, -2.0f, -1.0f, -1.0f, 0.0f,
+                                              1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 2.0f, 8.0f, 15.0f, 37.0f };
+
+    RoundTest<float>(tflite::BuiltinOperator_FLOOR,
+                     ::tflite::TensorType_FLOAT32,
+                     backends,
+                     inputShape,
+                     inputValues,
+                     expectedOutputValues);
+}
+
+// FLOOR Test Suite
+TEST_SUITE("FLOOR_CpuRefTests")
+{
+
+TEST_CASE ("FLOOR_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    FloorFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("FLOOR_CpuAccTests")
+{
+
+TEST_CASE ("FLOOR_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    FloorFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("FLOOR_GpuAccTests")
+{
+
+TEST_CASE ("FLOOR_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    FloorFp32Test(backends);
+}
+
+}
+// End of FLOOR Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/RoundTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/RoundTestHelper.hpp
new file mode 100644
index 0000000000..3a35ee0764
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/RoundTestHelper.hpp
@@ -0,0 +1,161 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+std::vector<char> CreateRoundTfLiteModel(tflite::BuiltinOperator roundOperatorCode,
+                                         tflite::TensorType tensorType,
+                                         const std::vector <int32_t>& tensorShape,
+                                         float quantScale = 1.0f,
+                                         int quantOffset = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({quantScale}),
+                                     flatBufferBuilder.CreateVector<int64_t>({quantOffset}));
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    const std::vector<int32_t> operatorInputs({0});
+    const std::vector<int32_t> operatorOutputs({1});
+
+    flatbuffers::Offset<Operator> roundOperator;
+    flatbuffers::Offset<flatbuffers::String> modelDescription;
+    flatbuffers::Offset<OperatorCode> operatorCode;
+
+    switch (roundOperatorCode)
+    {
+        case tflite::BuiltinOperator_FLOOR:
+        default:
+            roundOperator =
+                CreateOperator(flatBufferBuilder,
+                               0,
+                               flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                               flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()));
+                modelDescription = flatBufferBuilder.CreateString("ArmnnDelegate: Floor Operator Model");
+                operatorCode = CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_FLOOR);
+            break;
+    }
+    const std::vector<int32_t> subgraphInputs({0});
+    const std::vector<int32_t> subgraphOutputs({1});
+    flatbuffers::Offset<SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&roundOperator, 1));
+
+    flatbuffers::Offset<Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template<typename T>
+void RoundTest(tflite::BuiltinOperator roundOperatorCode,
+               tflite::TensorType tensorType,
+               std::vector<armnn::BackendId>& backends,
+               std::vector<int32_t>& shape,
+               std::vector<T>& inputValues,
+               std::vector<T>& expectedOutputValues,
+               float quantScale = 1.0f,
+               int quantOffset = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateRoundTfLiteModel(roundOperatorCode,
+                                                           tensorType,
+                                                           shape,
+                                                           quantScale,
+                                                           quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                        armnnDelegate,
+                                        shape,
+                                        expectedOutputValues,
+                                        0);
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+}
+
+} // anonymous namespace
diff --git a/arch/arm/ARMnn/delegate/src/test/ShapeTest.cpp b/arch/arm/ARMnn/delegate/src/test/ShapeTest.cpp
new file mode 100644
index 0000000000..b49910adf6
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ShapeTest.cpp
@@ -0,0 +1,45 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ShapeTestHelper.hpp"
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void ShapeSimpleTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape{ 1, 3, 2, 3 };
+
+    std::vector<int32_t> inputValues{ 1, 1, 1, 1, 1, 1, 1, 1,
+                                      1, 1, 1, 1, 1, 1, 1, 1, };
+
+    std::vector<int32_t> expectedOutputShape{ 4 };
+    std::vector<int32_t> expectedOutputValues{ 1, 3, 2, 3 };
+
+    ShapeTest<int32_t, int32_t>(::tflite::TensorType_INT32,
+                                ::tflite::TensorType_INT32,
+                                backends,
+                                inputShape,
+                                inputValues,
+                                expectedOutputValues,
+                                expectedOutputShape);
+}
+
+// SHAPE Test Suite
+TEST_SUITE("SHAPE_CpuRefTests")
+{
+
+TEST_CASE("SHAPE_Simple_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    ShapeSimpleTest(backends);
+}
+
+}
+// End of SHAPE Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/ShapeTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/ShapeTestHelper.hpp
new file mode 100644
index 0000000000..854c5084aa
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/ShapeTestHelper.hpp
@@ -0,0 +1,171 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+std::vector<char> CreateShapeTfLiteModel(tflite::TensorType inputTensorType,
+                                         tflite::TensorType outputTensorType,
+                                         const std::vector<int32_t>& inputTensorShape,
+                                         const std::vector<int32_t>& outputTensorShape,
+                                         float quantScale = 1.0f,
+                                         int quantOffset = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+             CreateQuantizationParameters(flatBufferBuilder,
+                                          0,
+                                          0,
+                                          flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                          flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              inputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              outputTensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    const std::vector<int32_t> operatorInputs({ 0 });
+    const std::vector<int32_t> operatorOutputs({ 1 });
+
+    flatbuffers::Offset<Operator> shapeOperator =
+                                      CreateOperator(flatBufferBuilder,
+                                                     0,
+                                                     flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(),
+                                                                                             operatorInputs.size()),
+                                                     flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(),
+                                                                                             operatorOutputs.size()),
+                                                     BuiltinOptions_ShapeOptions,
+                                                     CreateShapeOptions(flatBufferBuilder, outputTensorType).Union());
+
+    flatbuffers::Offset<flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: SHAPE Operator Model");
+
+    flatbuffers::Offset<OperatorCode> operatorCode =
+        CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_SHAPE);
+
+    const std::vector<int32_t>    subgraphInputs({ 0 });
+    const std::vector<int32_t>    subgraphOutputs({ 1 });
+
+    flatbuffers::Offset<SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(),
+                                                               subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(),
+                                                               subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&shapeOperator, 1));
+
+    flatbuffers::Offset<Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template<typename T, typename K>
+void ShapeTest(tflite::TensorType inputTensorType,
+               tflite::TensorType outputTensorType,
+               std::vector<armnn::BackendId>& backends,
+               std::vector<int32_t>& inputShape,
+               std::vector<T>& inputValues,
+               std::vector<K>& expectedOutputValues,
+               std::vector<int32_t>& expectedOutputShape,
+               float quantScale = 1.0f,
+               int quantOffset = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateShapeTfLiteModel(inputTensorType,
+                                                           outputTensorType,
+                                                           inputShape,
+                                                           expectedOutputShape,
+                                                           quantScale,
+                                                           quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+
+    std::unique_ptr < TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete) >
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<K>(tfLiteDelegate,
+                                        armnnDelegate,
+                                        expectedOutputShape,
+                                        expectedOutputValues,
+                                        0);
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+}
+
+} // anonymous namespace
diff --git a/arch/arm/ARMnn/delegate/src/test/SliceTest.cpp b/arch/arm/ARMnn/delegate/src/test/SliceTest.cpp
new file mode 100644
index 0000000000..bd0584936e
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/SliceTest.cpp
@@ -0,0 +1,243 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SliceTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void StridedSlice4DTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 3, 2, 3, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+    std::vector<int32_t> beginShape  { 4 };
+    std::vector<int32_t> endShape    { 4 };
+    std::vector<int32_t> strideShape { 4 };
+
+    std::vector<int32_t> beginData  { 1, 0, 0, 0 };
+    std::vector<int32_t> endData    { 2, 2, 3, 1 };
+    std::vector<int32_t> strideData { 1, 1, 1, 1 };
+    std::vector<float> inputData  { 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+                                    3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+                                    5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f };
+    std::vector<float> outputData { 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f };
+
+    StridedSliceTestImpl<float>(
+            backends,
+            inputData,
+            outputData,
+            beginData,
+            endData,
+            strideData,
+            inputShape,
+            beginShape,
+            endShape,
+            strideShape,
+            outputShape
+            );
+}
+
+void StridedSlice4DReverseTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 3, 2, 3, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 3, 1 };
+    std::vector<int32_t> beginShape  { 4 };
+    std::vector<int32_t> endShape    { 4 };
+    std::vector<int32_t> strideShape { 4 };
+
+    std::vector<int32_t> beginData  { 1, -1, 0, 0 };
+    std::vector<int32_t> endData    { 2, -3, 3, 1 };
+    std::vector<int32_t> strideData { 1, -1, 1, 1 };
+    std::vector<float>   inputData  { 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+                                      3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+                                      5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f };
+    std::vector<float>   outputData { 4.0f, 4.0f, 4.0f, 3.0f, 3.0f, 3.0f };
+
+    StridedSliceTestImpl<float>(
+            backends,
+            inputData,
+            outputData,
+            beginData,
+            endData,
+            strideData,
+            inputShape,
+            beginShape,
+            endShape,
+            strideShape,
+            outputShape
+    );
+}
+
+void StridedSliceSimpleStrideTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 3, 2, 3, 1 };
+    std::vector<int32_t> outputShape { 2, 1, 2, 1 };
+    std::vector<int32_t> beginShape  { 4 };
+    std::vector<int32_t> endShape    { 4 };
+    std::vector<int32_t> strideShape { 4 };
+
+    std::vector<int32_t> beginData  { 0, 0, 0, 0 };
+    std::vector<int32_t> endData    { 3, 2, 3, 1 };
+    std::vector<int32_t> strideData { 2, 2, 2, 1 };
+    std::vector<float>   inputData  { 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+                                      3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+                                      5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f };
+    std::vector<float>   outputData { 1.0f, 1.0f,
+                                      5.0f, 5.0f };
+
+    StridedSliceTestImpl<float>(
+            backends,
+            inputData,
+            outputData,
+            beginData,
+            endData,
+            strideData,
+            inputShape,
+            beginShape,
+            endShape,
+            strideShape,
+            outputShape
+    );
+}
+
+void StridedSliceSimpleRangeMaskTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape  { 3, 2, 3, 1 };
+    std::vector<int32_t> outputShape { 3, 2, 3, 1 };
+    std::vector<int32_t> beginShape  { 4 };
+    std::vector<int32_t> endShape    { 4 };
+    std::vector<int32_t> strideShape { 4 };
+
+    std::vector<int32_t> beginData  { 1, 1, 1, 1 };
+    std::vector<int32_t> endData    { 1, 1, 1, 1 };
+    std::vector<int32_t> strideData { 1, 1, 1, 1 };
+
+    int beginMask = -1;
+    int endMask   = -1;
+
+    std::vector<float>   inputData  { 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+                                      3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+                                      5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f };
+    std::vector<float>   outputData { 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+                                      3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+                                      5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f };
+
+    StridedSliceTestImpl<float>(
+            backends,
+            inputData,
+            outputData,
+            beginData,
+            endData,
+            strideData,
+            inputShape,
+            beginShape,
+            endShape,
+            strideShape,
+            outputShape,
+            beginMask,
+            endMask
+    );
+}
+
+
+TEST_SUITE("StridedSlice_CpuRefTests")
+{
+
+TEST_CASE ("StridedSlice_4D_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    StridedSlice4DTest(backends);
+}
+
+TEST_CASE ("StridedSlice_4D_Reverse_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    StridedSlice4DReverseTest(backends);
+}
+
+TEST_CASE ("StridedSlice_SimpleStride_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    StridedSliceSimpleStrideTest(backends);
+}
+
+TEST_CASE ("StridedSlice_SimpleRange_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    StridedSliceSimpleRangeMaskTest(backends);
+}
+
+} // StridedSlice_CpuRefTests TestSuite
+
+
+
+TEST_SUITE("StridedSlice_CpuAccTests")
+{
+
+TEST_CASE ("StridedSlice_4D_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    StridedSlice4DTest(backends);
+}
+
+TEST_CASE ("StridedSlice_4D_Reverse_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    StridedSlice4DReverseTest(backends);
+}
+
+TEST_CASE ("StridedSlice_SimpleStride_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    StridedSliceSimpleStrideTest(backends);
+}
+
+TEST_CASE ("StridedSlice_SimpleRange_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    StridedSliceSimpleRangeMaskTest(backends);
+}
+
+} // StridedSlice_CpuAccTests TestSuite
+
+
+
+TEST_SUITE("StridedSlice_GpuAccTests")
+{
+
+TEST_CASE ("StridedSlice_4D_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    StridedSlice4DTest(backends);
+}
+
+TEST_CASE ("StridedSlice_4D_Reverse_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    StridedSlice4DReverseTest(backends);
+}
+
+TEST_CASE ("StridedSlice_SimpleStride_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    StridedSliceSimpleStrideTest(backends);
+}
+
+TEST_CASE ("StridedSlice_SimpleRange_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    StridedSliceSimpleRangeMaskTest(backends);
+}
+
+} // StridedSlice_GpuAccTests TestSuite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/SliceTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/SliceTestHelper.hpp
new file mode 100644
index 0000000000..abaa807aed
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/SliceTestHelper.hpp
@@ -0,0 +1,241 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+#include <armnn/DescriptorsFwd.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+struct StridedSliceParams
+{
+    StridedSliceParams(std::vector<int32_t>& inputTensorShape,
+                       std::vector<int32_t>& beginTensorData,
+                       std::vector<int32_t>& endTensorData,
+                       std::vector<int32_t>& strideTensorData,
+                       std::vector<int32_t>& outputTensorShape,
+                       armnn::StridedSliceDescriptor& descriptor)
+        : m_InputTensorShape(inputTensorShape),
+          m_BeginTensorData(beginTensorData),
+          m_EndTensorData(endTensorData),
+          m_StrideTensorData(strideTensorData),
+          m_OutputTensorShape(outputTensorShape),
+          m_Descriptor (descriptor) {}
+
+    std::vector<int32_t> m_InputTensorShape;
+    std::vector<int32_t> m_BeginTensorData;
+    std::vector<int32_t> m_EndTensorData;
+    std::vector<int32_t> m_StrideTensorData;
+    std::vector<int32_t> m_OutputTensorShape;
+    armnn::StridedSliceDescriptor m_Descriptor;
+};
+
+std::vector<char> CreateSliceTfLiteModel(tflite::TensorType tensorType,
+                                         const std::vector<int32_t>& inputTensorShape,
+                                         const std::vector<int32_t>& beginTensorData,
+                                         const std::vector<int32_t>& endTensorData,
+                                         const std::vector<int32_t>& strideTensorData,
+                                         const std::vector<int32_t>& beginTensorShape,
+                                         const std::vector<int32_t>& endTensorShape,
+                                         const std::vector<int32_t>& strideTensorShape,
+                                         const std::vector<int32_t>& outputTensorShape,
+                                         const int32_t beginMask,
+                                         const int32_t endMask,
+                                         const int32_t ellipsisMask,
+                                         const int32_t newAxisMask,
+                                         const int32_t ShrinkAxisMask,
+                                         const armnn::DataLayout& dataLayout)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 4> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(beginTensorData.data()),
+                                                             sizeof(int32_t) * beginTensorData.size()));
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(endTensorData.data()),
+                                                             sizeof(int32_t) * endTensorData.size()));
+    buffers[3] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(strideTensorData.data()),
+                                                             sizeof(int32_t) * strideTensorData.size()));
+
+    std::array<flatbuffers::Offset<Tensor>, 5> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"));
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(beginTensorShape.data(),
+                                                                      beginTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("begin_tensor"));
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(endTensorShape.data(),
+                                                                      endTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              2,
+                              flatBufferBuilder.CreateString("end_tensor"));
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(strideTensorShape.data(),
+                                                                      strideTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              3,
+                              flatBufferBuilder.CreateString("stride_tensor"));
+    tensors[4] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"));
+
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_StridedSliceOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateStridedSliceOptions(flatBufferBuilder,
+                                                                                 beginMask,
+                                                                                 endMask,
+                                                                                 ellipsisMask,
+                                                                                 newAxisMask,
+                                                                                 ShrinkAxisMask).Union();
+
+    const std::vector<int> operatorInputs{ 0, 1, 2, 3 };
+    const std::vector<int> operatorOutputs{ 4 };
+    flatbuffers::Offset <Operator> sliceOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ 0, 1, 2, 3 };
+    const std::vector<int> subgraphOutputs{ 4 };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&sliceOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: StridedSlice Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         BuiltinOperator_STRIDED_SLICE);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void StridedSliceTestImpl(std::vector<armnn::BackendId>& backends,
+                          std::vector<T>& inputValues,
+                          std::vector<T>& expectedOutputValues,
+                          std::vector<int32_t>& beginTensorData,
+                          std::vector<int32_t>& endTensorData,
+                          std::vector<int32_t>& strideTensorData,
+                          std::vector<int32_t>& inputTensorShape,
+                          std::vector<int32_t>& beginTensorShape,
+                          std::vector<int32_t>& endTensorShape,
+                          std::vector<int32_t>& strideTensorShape,
+                          std::vector<int32_t>& outputTensorShape,
+                          const int32_t beginMask = 0,
+                          const int32_t endMask = 0,
+                          const int32_t ellipsisMask = 0,
+                          const int32_t newAxisMask = 0,
+                          const int32_t ShrinkAxisMask = 0,
+                          const armnn::DataLayout& dataLayout = armnn::DataLayout::NHWC)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSliceTfLiteModel(
+            ::tflite::TensorType_FLOAT32,
+            inputTensorShape,
+            beginTensorData,
+            endTensorData,
+            strideTensorData,
+            beginTensorShape,
+            endTensorShape,
+            strideTensorShape,
+            outputTensorShape,
+            beginMask,
+            endMask,
+            ellipsisMask,
+            newAxisMask,
+            ShrinkAxisMask,
+            dataLayout);
+
+    auto tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                        armnnDelegate,
+                                        outputTensorShape,
+                                        expectedOutputValues);
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+} // End of StridedSlice Test
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/SoftmaxTest.cpp b/arch/arm/ARMnn/delegate/src/test/SoftmaxTest.cpp
new file mode 100644
index 0000000000..3339c09918
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/SoftmaxTest.cpp
@@ -0,0 +1,77 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SoftmaxTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+TEST_SUITE ("Softmax_GpuAccTests")
+{
+
+TEST_CASE ("Softmax_Standard_Beta_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    std::vector<float> expectedOutput = {0.00994190481, 0.0445565246, 0.0734612942, 0.329230666, 0.542809606,
+                                         0.710742831, 0.158588171, 0.0961885825, 0.0214625746, 0.0130177103};
+    SoftmaxTestCase(tflite::BuiltinOperator_SOFTMAX, backends, 1, expectedOutput);
+}
+
+TEST_CASE ("Softmax_Different_Beta_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    std::vector<float> expectedOutput = {0.0946234912, 0.148399189, 0.172415257, 0.270400971, 0.314161092, 0.352414012,
+                                         0.224709094, 0.193408906, 0.123322964, 0.106145054};
+    SoftmaxTestCase(tflite::BuiltinOperator_SOFTMAX, backends, 0.3, expectedOutput);
+
+}
+
+TEST_CASE ("Log_Softmax_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    std::vector<float> expectedOutput =
+        {-4.61099672, -3.11099672, -2.61099672, -1.11099672, -0.610996664,
+         -0.341444582, -1.84144461, -2.34144449, -3.84144449, -4.34144449};
+    SoftmaxTestCase(tflite::BuiltinOperator_LOG_SOFTMAX, backends, 0, expectedOutput);
+}
+} // TEST_SUITE ("Softmax_GpuAccTests")
+
+TEST_SUITE ("Softmax_CpuRefTests")
+{
+
+TEST_CASE ("Softmax_Standard_Beta_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    std::vector<float> expectedOutput = {
+        0.00994190481, 0.0445565246, 0.0734612942, 0.329230666, 0.542809606,
+        0.710742831, 0.158588171, 0.0961885825, 0.0214625746, 0.0130177103};
+    SoftmaxTestCase(tflite::BuiltinOperator_SOFTMAX, backends, 1, expectedOutput);
+}
+
+TEST_CASE ("Softmax_Different_Beta_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    std::vector<float> expectedOutput = {
+        0.0946234912, 0.148399189, 0.172415257, 0.270400971, 0.314161092,
+        0.352414012, 0.224709094, 0.193408906, 0.123322964, 0.106145054};
+    SoftmaxTestCase(tflite::BuiltinOperator_SOFTMAX, backends, 0.3, expectedOutput);
+}
+
+TEST_CASE ("Log_Softmax_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    std::vector<float> expectedOutput =
+        {-4.61099672, -3.11099672, -2.61099672, -1.11099672, -0.610996664,
+         -0.341444582, -1.84144461, -2.34144449, -3.84144449, -4.34144449};
+    SoftmaxTestCase(tflite::BuiltinOperator_LOG_SOFTMAX, backends, 0, expectedOutput);
+}
+} // TEST_SUITE ("Softmax_CpuRefTests")
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/test/SoftmaxTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/SoftmaxTestHelper.hpp
new file mode 100644
index 0000000000..bd32c212e9
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/SoftmaxTestHelper.hpp
@@ -0,0 +1,192 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+#include <armnnUtils/FloatingPointComparison.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+std::vector<char> CreateSoftmaxTfLiteModel(tflite::BuiltinOperator softmaxOperatorCode,
+                                           tflite::TensorType tensorType,
+                                           const std::vector <int32_t>& tensorShape,
+                                           float beta)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(tensorShape.data(),
+                                                                      tensorShape.size()),
+                              tensorType,
+                              0);
+
+    const std::vector<int32_t> operatorInputs({0});
+    const std::vector<int32_t> operatorOutputs({1});
+
+    flatbuffers::Offset<Operator> softmaxOperator;
+    flatbuffers::Offset<flatbuffers::String> modelDescription;
+    flatbuffers::Offset<OperatorCode> operatorCode;
+
+    switch (softmaxOperatorCode)
+    {
+        case tflite::BuiltinOperator_SOFTMAX:
+            softmaxOperator =
+                CreateOperator(flatBufferBuilder,
+                               0,
+                               flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                               flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                               BuiltinOptions_SoftmaxOptions,
+                               CreateSoftmaxOptions(flatBufferBuilder, beta).Union());
+                modelDescription = flatBufferBuilder.CreateString("ArmnnDelegate: Softmax Operator Model");
+                operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                 tflite::BuiltinOperator_SOFTMAX);
+            break;
+        case tflite::BuiltinOperator_LOG_SOFTMAX:
+            softmaxOperator =
+                CreateOperator(flatBufferBuilder,
+                               0,
+                               flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                               flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                               BuiltinOptions_LogSoftmaxOptions,
+                               CreateLogSoftmaxOptions(flatBufferBuilder).Union());
+                flatBufferBuilder.CreateString("ArmnnDelegate: Log-Softmax Operator Model");
+            operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                              tflite::BuiltinOperator_LOG_SOFTMAX);
+            break;
+        default:
+            break;
+    }
+    const std::vector<int32_t> subgraphInputs({0});
+    const std::vector<int32_t> subgraphOutputs({1});
+    flatbuffers::Offset<SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&softmaxOperator, 1));
+    flatbuffers::Offset<Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+    flatBufferBuilder.Finish(flatbufferModel);
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void SoftmaxTest(tflite::BuiltinOperator softmaxOperatorCode,
+                 tflite::TensorType tensorType,
+                 std::vector<armnn::BackendId>& backends,
+                 std::vector<int32_t>& shape,
+                 std::vector<float>& inputValues,
+                 std::vector<float>& expectedOutputValues,
+                 float beta = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSoftmaxTfLiteModel(softmaxOperatorCode,
+                                                             tensorType,
+                                                             shape,
+                                                             beta);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteInterpreterInputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteInterpreterInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteInterpreterOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteInterpreterOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteInterpreterOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+
+    for (size_t i = 0; i < inputValues.size(); ++i)
+    {
+         CHECK(armnnUtils::within_percentage_tolerance(expectedOutputValues[i], armnnDelegateOutputData[i], 0.1));
+         CHECK(armnnUtils::within_percentage_tolerance(tfLiteInterpreterOutputData[i],
+                                                       armnnDelegateOutputData[i], 0.1));
+    }
+}
+
+
+/// Convenience function to run softmax and log-softmax test cases
+/// \param operatorCode tflite::BuiltinOperator_SOFTMAX or tflite::BuiltinOperator_LOG_SOFTMAX
+/// \param backends armnn backends to target
+/// \param beta multiplicative parameter to the softmax function
+/// \param expectedOutput to be checked against transformed input
+void SoftmaxTestCase(tflite::BuiltinOperator operatorCode,
+                     std::vector<armnn::BackendId> backends, float beta, std::vector<float> expectedOutput) {
+    std::vector<float> input = {
+        1.0, 2.5, 3.0, 4.5, 5.0,
+        -1.0, -2.5, -3.0, -4.5, -5.0};
+    std::vector<int32_t> shape = {2, 5};
+
+    SoftmaxTest(operatorCode,
+                tflite::TensorType_FLOAT32,
+                backends,
+                shape,
+                input,
+                expectedOutput,
+                beta);
+}
+
+} // anonymous namespace
diff --git a/arch/arm/ARMnn/delegate/src/test/SpaceDepthTest.cpp b/arch/arm/ARMnn/delegate/src/test/SpaceDepthTest.cpp
new file mode 100644
index 0000000000..f80e749b87
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/SpaceDepthTest.cpp
@@ -0,0 +1,207 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SpaceDepthTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void DepthToSpaceFp32Test(std::vector<armnn::BackendId>& backends, int blockSize)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 4 };
+    std::vector<int32_t> outputShape { 1, 4, 4, 1 };
+
+    std::vector<float> inputValues = { 1.f,  2.f,  3.f,  4.f,
+                                       5.f,  6.f,  7.f,  8.f,
+                                       9.f, 10.f, 11.f, 12.f,
+                                       13.f, 14.f, 15.f, 16.f };
+
+    std::vector<float> expectedOutputValues = { 1.f,   2.f,   5.f,   6.f,
+                                                3.f,   4.f,   7.f,   8.f,
+                                                9.f,  10.f,  13.f,  14.f,
+                                                11.f,  12.f,  15.f,  16.f };
+
+    SpaceDepthTest<float>(tflite::BuiltinOperator_DEPTH_TO_SPACE,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          blockSize);
+}
+
+void DepthToSpaceUint8Test(std::vector<armnn::BackendId>& backends, int blockSize)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 2, 1, 1, 4 };
+    std::vector<int32_t> outputShape { 2, 2, 2, 1 };
+
+    std::vector<uint8_t> inputValues = { 1,  2,  3,  4,
+                                         5,  6,  7,  8 };
+
+    std::vector<uint8_t> expectedOutputValues = { 1,  2,  3,  4,
+                                                  5,  6,  7,  8 };
+
+    SpaceDepthTest<uint8_t>(tflite::BuiltinOperator_DEPTH_TO_SPACE,
+                            ::tflite::TensorType_UINT8,
+                            backends,
+                            inputShape,
+                            outputShape,
+                            inputValues,
+                            expectedOutputValues,
+                            blockSize);
+}
+
+void SpaceToDepthFp32Test(std::vector<armnn::BackendId>& backends, int blockSize)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 2 };
+    std::vector<int32_t> outputShape { 1, 1, 1, 8 };
+
+    std::vector<float> inputValues = { 1.4f, 2.3f, 3.2f, 4.1f, 5.4f, 6.3f, 7.2f, 8.1f };
+    std::vector<float> expectedOutputValues = { 1.4f, 2.3f, 3.2f, 4.1f, 5.4f, 6.3f, 7.2f, 8.1f };
+
+    SpaceDepthTest<float>(tflite::BuiltinOperator_SPACE_TO_DEPTH,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          outputShape,
+                          inputValues,
+                          expectedOutputValues,
+                          blockSize);
+}
+
+void SpaceToDepthUint8Test(std::vector<armnn::BackendId>& backends, int blockSize)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> outputShape { 1, 1, 1, 4 };
+
+    std::vector<uint8_t> inputValues = { 1, 2, 3, 2 };
+    std::vector<uint8_t> expectedOutputValues = { 1, 2, 3, 2 };
+
+    SpaceDepthTest<uint8_t>(tflite::BuiltinOperator_SPACE_TO_DEPTH,
+                            ::tflite::TensorType_UINT8,
+                            backends,
+                            inputShape,
+                            outputShape,
+                            inputValues,
+                            expectedOutputValues,
+                            blockSize);
+}
+
+TEST_SUITE("DepthToSpace_CpuRefTests")
+{
+
+TEST_CASE ("DepthToSpaceFp32Test_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    DepthToSpaceFp32Test(backends, 2);
+}
+
+TEST_CASE ("DepthToSpaceUint8Test_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    DepthToSpaceUint8Test(backends, 2);
+}
+
+} // TEST_SUITE("DepthToSpace_CpuRefTests")
+
+
+TEST_SUITE("DepthToSpace_CpuAccTests")
+{
+
+TEST_CASE ("DepthToSpaceFp32Test_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    DepthToSpaceFp32Test(backends, 2);
+}
+
+TEST_CASE ("DepthToSpaceUint8Test_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    DepthToSpaceUint8Test(backends, 2);
+}
+
+} // TEST_SUITE("DepthToSpace_CpuAccTests")
+
+TEST_SUITE("DepthToSpace_GpuAccTests")
+{
+
+TEST_CASE ("DepthToSpaceFp32Test_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    DepthToSpaceFp32Test(backends, 2);
+}
+
+TEST_CASE ("DepthToSpaceUint8Test_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    DepthToSpaceUint8Test(backends, 2);
+}
+
+} // TEST_SUITE("DepthToSpace_GpuAccTests")
+
+TEST_SUITE("SpaceToDepth_CpuRefTests")
+{
+
+TEST_CASE ("SpaceToDepthFp32Test_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    SpaceToDepthFp32Test(backends, 2);
+}
+
+TEST_CASE ("SpaceToDepthUint8Test_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    SpaceToDepthUint8Test(backends, 2);
+}
+
+} // TEST_SUITE("SpaceToDepth_CpuRefTests")
+
+TEST_SUITE("SpaceToDepth_CpuAccTests")
+{
+
+TEST_CASE ("SpaceToDepthFp32Test_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    SpaceToDepthFp32Test(backends, 2);
+}
+
+TEST_CASE ("SpaceToDepthUint8Test_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    SpaceToDepthUint8Test(backends, 2);
+}
+
+} // TEST_SUITE("SpaceToDepth_CpuAccTests")
+
+TEST_SUITE("SpaceToDepth_GpuAccTests")
+{
+
+TEST_CASE ("SpaceToDepthFp32Test_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    SpaceToDepthFp32Test(backends, 2);
+}
+
+TEST_CASE ("SpaceToDepthUint8Test_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc };
+    SpaceToDepthUint8Test(backends, 2);
+}
+
+} // TEST_SUITE("SpaceToDepth_GpuAccTests")
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/test/SpaceDepthTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/SpaceDepthTestHelper.hpp
new file mode 100644
index 0000000000..d9a783c6a7
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/SpaceDepthTestHelper.hpp
@@ -0,0 +1,166 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+std::vector<char> CreateSpaceDepthTfLiteModel(tflite::BuiltinOperator spaceDepthOperatorCode,
+                                              tflite::TensorType tensorType,
+                                              const std::vector <int32_t>& inputTensorShape,
+                                              const std::vector <int32_t>& outputTensorShape,
+                                              int32_t blockSize)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({  1.0f }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ 0 }));
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    std::array<flatbuffers::Offset<Tensor>, 2> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    const std::vector<int32_t> operatorInputs({0});
+    const std::vector<int32_t> operatorOutputs({1});
+
+    flatbuffers::Offset<Operator> spaceDepthOperator;
+    flatbuffers::Offset<flatbuffers::String> modelDescription;
+    flatbuffers::Offset<OperatorCode> operatorCode;
+
+    switch (spaceDepthOperatorCode)
+    {
+        case tflite::BuiltinOperator_SPACE_TO_DEPTH:
+            spaceDepthOperator =
+                CreateOperator(flatBufferBuilder,
+                               0,
+                               flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                               flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                               BuiltinOptions_SpaceToDepthOptions,
+                               CreateSpaceToDepthOptions(flatBufferBuilder, blockSize).Union());
+                modelDescription = flatBufferBuilder.CreateString("ArmnnDelegate: SPACE_TO_DEPTH Operator Model");
+                operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                 tflite::BuiltinOperator_SPACE_TO_DEPTH);
+            break;
+        case tflite::BuiltinOperator_DEPTH_TO_SPACE:
+            spaceDepthOperator =
+                CreateOperator(flatBufferBuilder,
+                               0,
+                               flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                               flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                               BuiltinOptions_DepthToSpaceOptions,
+                               CreateDepthToSpaceOptions(flatBufferBuilder, blockSize).Union());
+                flatBufferBuilder.CreateString("ArmnnDelegate: DEPTH_TO_SPACE Operator Model");
+            operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                              tflite::BuiltinOperator_DEPTH_TO_SPACE);
+            break;
+        default:
+            break;
+    }
+    const std::vector<int32_t> subgraphInputs({0});
+    const std::vector<int32_t> subgraphOutputs({1});
+    flatbuffers::Offset<SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&spaceDepthOperator, 1));
+    flatbuffers::Offset<Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+    flatBufferBuilder.Finish(flatbufferModel);
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void SpaceDepthTest(tflite::BuiltinOperator spaceDepthOperatorCode,
+                    tflite::TensorType tensorType,
+                    std::vector<armnn::BackendId>& backends,
+                    std::vector<int32_t>& inputShape,
+                    std::vector<int32_t>& outputShape,
+                    std::vector<T>& inputValues,
+                    std::vector<T>& expectedOutputValues,
+                    int32_t blockSize = 2)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSpaceDepthTfLiteModel(spaceDepthOperatorCode,
+                                                                tensorType,
+                                                                inputShape,
+                                                                outputShape,
+                                                                blockSize);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData(tfLiteInterpreter, armnnDelegateInterpreter, outputShape, expectedOutputValues);
+}
+
+} // anonymous namespace
diff --git a/arch/arm/ARMnn/delegate/src/test/SplitTest.cpp b/arch/arm/ARMnn/delegate/src/test/SplitTest.cpp
new file mode 100644
index 0000000000..5940516583
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/SplitTest.cpp
@@ -0,0 +1,262 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SplitTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// SPLIT Operator
+void SplitUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 2, 2, 2} ;
+    std::vector<int32_t> outputShape0 { 2, 2, 2, 1 };
+    std::vector<int32_t> outputShape1 { 2, 2, 2, 1 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 3 };  // Axis
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7, 8,
+                                       9, 10, 11, 12, 13, 14, 15, 16 }; // Input
+
+
+    std::vector<uint8_t> expectedOutputValues0 { 1, 3, 5, 7, 9, 11, 13, 15 };
+    std::vector<uint8_t> expectedOutputValues1 { 2, 4, 6, 8, 10, 12, 14, 16 };
+    std::vector<std::vector<uint8_t>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitTest<uint8_t>(::tflite::TensorType_UINT8,
+                       backends,
+                       axisShape,
+                       inputShape,
+                       outputShapes,
+                       axisData,
+                       inputValues,
+                       expectedOutputValues,
+                       numSplits);
+}
+
+void SplitFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 2, 2, 2 };
+    std::vector<int32_t> outputShape0 { 2, 1, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };  // Axis
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                     9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f  }; // Input
+
+
+    std::vector<float> expectedOutputValues0 { 1.0f, 2.0f, 3.0f, 4.0f, 9.0f, 10.0f, 11.0f, 12.0f };
+    std::vector<float> expectedOutputValues1 { 5.0f, 6.0f, 7.0f, 8.0f, 13.0f, 14.0f, 15.0f, 16.0f };
+    std::vector<std::vector<float>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitTest<float>(::tflite::TensorType_FLOAT32,
+                     backends,
+                     axisShape,
+                     inputShape,
+                     outputShapes,
+                     axisData,
+                     inputValues,
+                     expectedOutputValues,
+                     numSplits);
+}
+
+// SPLIT Test Suite
+TEST_SUITE("SPLIT_CpuRefTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_CpuAccTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_GpuAccTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitFp32Test(backends);
+}
+
+}
+// End of SPLIT Test Suite
+
+// SPLIT_V Operator
+void SplitVUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 4, 2, 2 };
+    std::vector<int32_t> splitsShape { 2 };
+    std::vector<int32_t> outputShape0 { 2, 3, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };    // Axis
+    std::vector<int32_t> splitsData { 3, 1 };  // Splits
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7, 8,
+                                     9, 10, 11, 12, 13, 14, 15, 16,
+                                     17, 18, 19, 20, 21, 22, 23, 24,
+                                     25, 26, 27, 28, 29, 30, 31, 32   }; // Input
+
+
+    std::vector<uint8_t> expectedOutputValues0 { 1, 2, 3, 4, 5, 6, 7, 8,
+                                               9, 10, 11, 12, 17, 18, 19, 20,
+                                               21, 22, 23, 24, 25, 26, 27, 28 };
+    std::vector<uint8_t> expectedOutputValues1 { 13, 14, 15, 16, 29, 30, 31, 32 };
+    std::vector<std::vector<uint8_t>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitVTest<uint8_t>(::tflite::TensorType_UINT8,
+                        backends,
+                        inputShape,
+                        splitsShape,
+                        axisShape,
+                        outputShapes,
+                        inputValues,
+                        splitsData,
+                        axisData,
+                        expectedOutputValues,
+                        numSplits);
+}
+
+void SplitVFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 4, 2, 2 };
+    std::vector<int32_t> splitsShape { 2 };
+    std::vector<int32_t> outputShape0 { 2, 3, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };    // Axis
+    std::vector<int32_t> splitsData { 3, 1 };  // Splits
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                     9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f,
+                                     17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f,
+                                     25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f   }; // Input
+
+
+    std::vector<float> expectedOutputValues0 { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                               9.0f, 10.0f, 11.0f, 12.0f, 17.0f, 18.0f, 19.0f, 20.0f,
+                                               21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
+    std::vector<float> expectedOutputValues1 { 13.0f, 14.0f, 15.0f, 16.0f, 29.0f, 30.0f, 31.0f, 32.0f };
+    std::vector<std::vector<float>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitVTest<float>(::tflite::TensorType_FLOAT32,
+                      backends,
+                      inputShape,
+                      splitsShape,
+                      axisShape,
+                      outputShapes,
+                      inputValues,
+                      splitsData,
+                      axisData,
+                      expectedOutputValues,
+                      numSplits);
+}
+
+// SPLIT_V Test Suite
+TEST_SUITE("SPLIT_V_CpuRefTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitVFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_V_CpuAccTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitVFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_V_GpuAccTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitVFp32Test(backends);
+}
+
+}
+// End of SPLIT_V Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/SplitTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/SplitTestHelper.hpp
new file mode 100644
index 0000000000..31fc7d5e46
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/SplitTestHelper.hpp
@@ -0,0 +1,368 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreateSplitTfLiteModel(tflite::TensorType tensorType,
+                                         std::vector<int32_t>& axisTensorShape,
+                                         std::vector<int32_t>& inputTensorShape,
+                                         const std::vector<std::vector<int32_t>>& outputTensorShapes,
+                                         std::vector<int32_t>& axisData,
+                                         const int32_t numSplits,
+                                         float quantScale = 1.0f,
+                                         int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 2> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(axisTensorShape.data(),
+                                                                      axisTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    // Create output tensor
+    for (unsigned int i = 0; i < outputTensorShapes.size(); ++i)
+    {
+        tensors[i + 2] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(outputTensorShapes[i].data(),
+                                                                          outputTensorShapes[i].size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("output"),
+                                  quantizationParameters);
+    }
+
+    // create operator. Mean uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_SplitOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateSplitOptions(flatBufferBuilder, numSplits).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1} };
+    const std::vector<int> operatorOutputs{ {2, 3} };
+    flatbuffers::Offset <Operator> controlOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{ {2, 3} };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: SPLIT Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, BuiltinOperator_SPLIT);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void SplitTest(tflite::TensorType tensorType,
+               std::vector<armnn::BackendId>& backends,
+               std::vector<int32_t>& axisTensorShape,
+               std::vector<int32_t>& inputTensorShape,
+               std::vector<std::vector<int32_t>>& outputTensorShapes,
+               std::vector<int32_t>& axisData,
+               std::vector<T>& inputValues,
+               std::vector<std::vector<T>>& expectedOutputValues,
+               const int32_t numSplits,
+               float quantScale = 1.0f,
+               int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSplitTfLiteModel(tensorType,
+                                                           axisTensorShape,
+                                                           inputTensorShape,
+                                                           outputTensorShapes,
+                                                           axisData,
+                                                           numSplits,
+                                                           quantScale,
+                                                           quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 1, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 1, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    for (unsigned int i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                            armnnDelegate,
+                                            outputTensorShapes[i],
+                                            expectedOutputValues[i],
+                                            i);
+    }
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+} // End of SPLIT Test
+
+std::vector<char> CreateSplitVTfLiteModel(tflite::TensorType tensorType,
+                                          std::vector<int32_t>& inputTensorShape,
+                                          std::vector<int32_t>& splitsTensorShape,
+                                          std::vector<int32_t>& axisTensorShape,
+                                          const std::vector<std::vector<int32_t>>& outputTensorShapes,
+                                          std::vector<int32_t>& splitsData,
+                                          std::vector<int32_t>& axisData,
+                                          const int32_t numSplits,
+                                          float quantScale = 1.0f,
+                                          int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(splitsData.data()),
+                                                             sizeof(int32_t) * splitsData.size()));
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 5> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(splitsTensorShape.data(),
+                                                                      splitsTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("splits"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(axisTensorShape.data(),
+                                                                      axisTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              2,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+
+    // Create output tensor
+    for (unsigned int i = 0; i < outputTensorShapes.size(); ++i)
+    {
+        tensors[i + 3] = CreateTensor(flatBufferBuilder,
+                                      flatBufferBuilder.CreateVector<int32_t>(outputTensorShapes[i].data(),
+                                                                              outputTensorShapes[i].size()),
+                                      tensorType,
+                                      0,
+                                      flatBufferBuilder.CreateString("output"),
+                                      quantizationParameters);
+    }
+
+    // create operator. Mean uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_SplitVOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateSplitVOptions(flatBufferBuilder, numSplits).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1, 2} };
+    const std::vector<int> operatorOutputs{ {3, 4} };
+    flatbuffers::Offset <Operator> controlOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1, 2} };
+    const std::vector<int> subgraphOutputs{ {3, 4} };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: SPLIT_V Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, BuiltinOperator_SPLIT_V);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void SplitVTest(tflite::TensorType tensorType,
+                std::vector<armnn::BackendId>& backends,
+                std::vector<int32_t>& inputTensorShape,
+                std::vector<int32_t>& splitsTensorShape,
+                std::vector<int32_t>& axisTensorShape,
+                std::vector<std::vector<int32_t>>& outputTensorShapes,
+                std::vector<T>& inputValues,
+                std::vector<int32_t>& splitsData,
+                std::vector<int32_t>& axisData,
+                std::vector<std::vector<T>>& expectedOutputValues,
+                const int32_t numSplits,
+                float quantScale = 1.0f,
+                int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSplitVTfLiteModel(tensorType,
+                                                            inputTensorShape,
+                                                            splitsTensorShape,
+                                                            axisTensorShape,
+                                                            outputTensorShapes,
+                                                            splitsData,
+                                                            axisData,
+                                                            numSplits,
+                                                            quantScale,
+                                                            quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    for (unsigned int i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                            armnnDelegate,
+                                            outputTensorShapes[i],
+                                            expectedOutputValues[i],
+                                            i);
+    }
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+} // End of SPLIT_V Test
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/TestUtils.cpp b/arch/arm/ARMnn/delegate/src/test/TestUtils.cpp
new file mode 100644
index 0000000000..9dce4461da
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/TestUtils.cpp
@@ -0,0 +1,152 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TestUtils.hpp"
+
+namespace armnnDelegate
+{
+
+void CompareData(bool tensor1[], bool tensor2[], size_t tensorSize)
+{
+    auto compareBool = [](auto a, auto b) {return (((a == 0) && (b == 0)) || ((a != 0) && (b != 0)));};
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(compareBool(tensor1[i], tensor2[i]));
+    }
+}
+
+void CompareData(std::vector<bool>& tensor1, bool tensor2[], size_t tensorSize)
+{
+    auto compareBool = [](auto a, auto b) {return (((a == 0) && (b == 0)) || ((a != 0) && (b != 0)));};
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(compareBool(tensor1[i], tensor2[i]));
+    }
+}
+
+void CompareData(float tensor1[], float tensor2[], size_t tensorSize)
+{
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(tensor1[i] == doctest::Approx( tensor2[i] ));
+    }
+}
+
+void CompareData(float tensor1[], float tensor2[], size_t tensorSize, float percentTolerance)
+{
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(std::max(tensor1[i], tensor2[i]) - std::min(tensor1[i], tensor2[i]) <=
+              std::abs(tensor1[i]*percentTolerance/100));
+    }
+}
+
+void CompareData(uint8_t tensor1[], uint8_t tensor2[], size_t tensorSize)
+{
+    uint8_t tolerance = 1;
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(std::max(tensor1[i], tensor2[i]) - std::min(tensor1[i], tensor2[i]) <= tolerance);
+    }
+}
+
+void CompareData(int16_t tensor1[], int16_t tensor2[], size_t tensorSize)
+{
+    int16_t tolerance = 1;
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(std::max(tensor1[i], tensor2[i]) - std::min(tensor1[i], tensor2[i]) <= tolerance);
+    }
+}
+
+void CompareData(int32_t tensor1[], int32_t tensor2[], size_t tensorSize)
+{
+    int32_t tolerance = 1;
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(std::max(tensor1[i], tensor2[i]) - std::min(tensor1[i], tensor2[i]) <= tolerance);
+    }
+}
+
+void CompareData(int8_t tensor1[], int8_t tensor2[], size_t tensorSize)
+{
+    int8_t tolerance = 1;
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(std::max(tensor1[i], tensor2[i]) - std::min(tensor1[i], tensor2[i]) <= tolerance);
+    }
+}
+
+void CompareData(Half tensor1[], Half tensor2[], size_t tensorSize)
+{
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        CHECK(tensor1[i] == doctest::Approx( tensor2[i] ));
+    }
+}
+
+void CompareData(TfLiteFloat16 tensor1[], TfLiteFloat16 tensor2[], size_t tensorSize)
+{
+    uint16_t tolerance = 1;
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        uint16_t tensor1Data = tensor1[i].data;
+        uint16_t tensor2Data = tensor2[i].data;
+        CHECK(std::max(tensor1Data, tensor2Data) - std::min(tensor1Data, tensor2Data) <= tolerance);
+    }
+}
+
+void CompareData(TfLiteFloat16 tensor1[], Half tensor2[], size_t tensorSize) {
+    uint16_t tolerance = 1;
+    for (size_t i = 0; i < tensorSize; i++)
+    {
+        uint16_t tensor1Data = tensor1[i].data;
+        uint16_t tensor2Data = half_float::detail::float2half<std::round_indeterminate, float>(tensor2[i]);
+        CHECK(std::max(tensor1Data, tensor2Data) - std::min(tensor1Data, tensor2Data) <= tolerance);
+    }
+}
+
+template <>
+void CompareOutputData(std::unique_ptr<tflite::Interpreter>& tfLiteInterpreter,
+                       std::unique_ptr<tflite::Interpreter>& armnnDelegateInterpreter,
+                       std::vector<int32_t>& expectedOutputShape,
+                       std::vector<Half>& expectedOutputValues,
+                       unsigned int outputIndex)
+{
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
+    auto tfLiteDelegateOutputTensor = tfLiteInterpreter->tensor(tfLiteDelegateOutputId);
+    auto tfLiteDelegateOutputData = tfLiteInterpreter->typed_tensor<TfLiteFloat16>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[outputIndex];
+    auto armnnDelegateOutputTensor = armnnDelegateInterpreter->tensor(armnnDelegateOutputId);
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<TfLiteFloat16>(armnnDelegateOutputId);
+
+        CHECK(expectedOutputShape.size() == tfLiteDelegateOutputTensor->dims->size);
+        CHECK(expectedOutputShape.size() == armnnDelegateOutputTensor->dims->size);
+
+    for (size_t i = 0; i < expectedOutputShape.size(); i++)
+    {
+        CHECK(armnnDelegateOutputTensor->dims->data[i] == expectedOutputShape[i]);
+        CHECK(tfLiteDelegateOutputTensor->dims->data[i] == expectedOutputShape[i]);
+        CHECK(tfLiteDelegateOutputTensor->dims->data[i] == armnnDelegateOutputTensor->dims->data[i]);
+    }
+
+    armnnDelegate::CompareData(armnnDelegateOutputData, expectedOutputValues.data(), expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelegateOutputData, expectedOutputValues.data(), expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelegateOutputData, armnnDelegateOutputData, expectedOutputValues.size());
+}
+
+template <>
+void FillInput<Half>(std::unique_ptr<tflite::Interpreter>& interpreter, int inputIndex, std::vector<Half>& inputValues)
+{
+    auto tfLiteDelegateInputId = interpreter->inputs()[inputIndex];
+    auto tfLiteDelageInputData = interpreter->typed_tensor<TfLiteFloat16>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i].data = half_float::detail::float2half<std::round_indeterminate, float>(inputValues[i]);
+
+    }
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/TestUtils.hpp b/arch/arm/ARMnn/delegate/src/test/TestUtils.hpp
new file mode 100644
index 0000000000..5d4a0ed7d4
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/TestUtils.hpp
@@ -0,0 +1,101 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/interpreter.h>
+
+#include <doctest/doctest.h>
+
+#include <half/half.hpp>
+
+using Half = half_float::half;
+
+namespace armnnDelegate
+{
+
+/// Can be used to assign input data from a vector to a model input.
+/// Example usage can be found in ResizeTesthelper.hpp
+template <typename T>
+void FillInput(std::unique_ptr<tflite::Interpreter>& interpreter, int inputIndex, std::vector<T>& inputValues)
+{
+    auto tfLiteDelegateInputId = interpreter->inputs()[inputIndex];
+    auto tfLiteDelageInputData = interpreter->typed_tensor<T>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+}
+
+template <>
+void FillInput(std::unique_ptr<tflite::Interpreter>& interpreter, int inputIndex, std::vector<Half>& inputValues);
+
+/// Can be used to compare bool data coming from a tflite interpreter
+/// Boolean types get converted to a bit representation in a vector. vector.data() returns a void pointer
+/// instead of a pointer to bool. Therefore a special function to compare to vector of bool is required
+void CompareData(std::vector<bool>& tensor1, bool tensor2[], size_t tensorSize);
+void CompareData(bool tensor1[], bool tensor2[], size_t tensorSize);
+
+/// Can be used to compare float data coming from a tflite interpreter with a tolerance of limit_of_float*100
+void CompareData(float tensor1[], float tensor2[], size_t tensorSize);
+
+/// Can be used to compare float data coming from a tflite interpreter with a given percentage tolerance
+void CompareData(float tensor1[], float tensor2[], size_t tensorSize, float percentTolerance);
+
+/// Can be used to compare int8_t data coming from a tflite interpreter with a tolerance of 1
+void CompareData(int8_t tensor1[], int8_t tensor2[], size_t tensorSize);
+
+/// Can be used to compare uint8_t data coming from a tflite interpreter with a tolerance of 1
+void CompareData(uint8_t tensor1[], uint8_t tensor2[], size_t tensorSize);
+
+/// Can be used to compare int16_t data coming from a tflite interpreter with a tolerance of 1
+void CompareData(int16_t tensor1[], int16_t tensor2[], size_t tensorSize);
+
+/// Can be used to compare int32_t data coming from a tflite interpreter with a tolerance of 1
+void CompareData(int32_t tensor1[], int32_t tensor2[], size_t tensorSize);
+
+/// Can be used to compare Half (Float16) data with a tolerance of limit_of_float*100
+void CompareData(Half tensor1[], Half tensor2[], size_t tensorSize);
+
+/// Can be used to compare TfLiteFloat16 data coming from a tflite interpreter
+void CompareData(TfLiteFloat16 tensor1[], TfLiteFloat16 tensor2[], size_t tensorSize);
+
+/// Can be used to compare Half (Float16) data and TfLiteFloat16 data coming from a tflite interpreter
+void CompareData(TfLiteFloat16 tensor1[], Half tensor2[], size_t tensorSize);
+
+/// Can be used to compare the output tensor shape and values
+/// from armnnDelegateInterpreter and tfLiteInterpreter.
+/// Example usage can be found in ControlTestHelper.hpp
+template <typename T>
+void CompareOutputData(std::unique_ptr<tflite::Interpreter>& tfLiteInterpreter,
+                       std::unique_ptr<tflite::Interpreter>& armnnDelegateInterpreter,
+                       std::vector<int32_t>& expectedOutputShape,
+                       std::vector<T>& expectedOutputValues,
+                       unsigned int outputIndex = 0)
+{
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
+    auto tfLiteDelegateOutputTensor = tfLiteInterpreter->tensor(tfLiteDelegateOutputId);
+    auto tfLiteDelegateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[outputIndex];
+    auto armnnDelegateOutputTensor = armnnDelegateInterpreter->tensor(armnnDelegateOutputId);
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+
+    CHECK(expectedOutputShape.size() == tfLiteDelegateOutputTensor->dims->size);
+    CHECK(expectedOutputShape.size() == armnnDelegateOutputTensor->dims->size);
+
+    for (size_t i = 0; i < expectedOutputShape.size(); i++)
+    {
+        CHECK(expectedOutputShape[i] == armnnDelegateOutputTensor->dims->data[i]);
+        CHECK(tfLiteDelegateOutputTensor->dims->data[i] == expectedOutputShape[i]);
+        CHECK(tfLiteDelegateOutputTensor->dims->data[i] == armnnDelegateOutputTensor->dims->data[i]);
+    }
+
+    armnnDelegate::CompareData(expectedOutputValues.data(), armnnDelegateOutputData    , expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelegateOutputData   , expectedOutputValues.data(), expectedOutputValues.size());
+    armnnDelegate::CompareData(tfLiteDelegateOutputData   , armnnDelegateOutputData    , expectedOutputValues.size());
+}
+
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/test/TransposeTest.cpp b/arch/arm/ARMnn/delegate/src/test/TransposeTest.cpp
new file mode 100644
index 0000000000..67751e325a
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/TransposeTest.cpp
@@ -0,0 +1,46 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TransposeTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <doctest/doctest.h>
+#include <flatbuffers/flatbuffers.h>
+
+namespace armnnDelegate
+{
+
+TEST_SUITE ("Transpose_GpuAccTests")
+{
+
+TEST_CASE ("Transpose_Float32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    TransposeFP32Test(backends);
+}
+
+}
+
+TEST_SUITE ("Transpose_CpuAccTests")
+{
+
+TEST_CASE ("Transpose_Float32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    TransposeFP32Test(backends);
+}
+
+}
+
+TEST_SUITE ("Transpose_CpuRefTests")
+{
+TEST_CASE ("Transpose_Float32_CpuRef_Test")
+{
+        std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+        TransposeFP32Test(backends);
+}
+}
+} // namespace armnnDelegate
diff --git a/arch/arm/ARMnn/delegate/src/test/TransposeTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/TransposeTestHelper.hpp
new file mode 100644
index 0000000000..1d55273b9f
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/TransposeTestHelper.hpp
@@ -0,0 +1,174 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+std::vector<char> CreateTransposeTfLiteModel(tflite::TensorType tensorType,
+                                             const std::vector <int32_t>& input0TensorShape,
+                                             const std::vector <int32_t>& inputPermVecShape,
+                                             const std::vector <int32_t>& outputTensorShape,
+                                             const std::vector<int32_t>& inputPermVec)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+    std::array<flatbuffers::Offset<tflite::Buffer>, 2> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(inputPermVec.data()),
+                                                             sizeof(int32_t) * inputPermVec.size()));
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input0TensorShape.data(),
+                                                                      input0TensorShape.size()),
+                              tensorType, 0);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputPermVecShape.data(),
+                                                                      inputPermVecShape.size()),
+                              tflite::TensorType_INT32, 1,
+                              flatBufferBuilder.CreateString("permutation_vector"));
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType);
+    const std::vector<int32_t> operatorInputs{0, 1};
+    const std::vector<int32_t> operatorOutputs{2};
+    flatbuffers::Offset <Operator> transposeOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       BuiltinOptions_TransposeOptions,
+                       CreateTransposeOptions(flatBufferBuilder).Union());
+    const std::vector<int> subgraphInputs{0, 1};
+    const std::vector<int> subgraphOutputs{2};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&transposeOperator, 1));
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Transpose Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder,
+                                                                         tflite::BuiltinOperator_TRANSPOSE);
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+    flatBufferBuilder.Finish(flatbufferModel);
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void TransposeFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    using namespace tflite;
+
+    // set test input data
+    std::vector<int32_t> input0Shape {4, 2, 3};
+    std::vector<int32_t> inputPermVecShape {3};
+    std::vector<int32_t> outputShape {2, 3, 4};
+
+    std::vector<float> input0Values = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
+    std::vector<int32_t> inputPermVec = {2, 0, 1};
+    std::vector<float> expectedOutputValues = {0, 3, 6, 9, 12, 15, 18, 21, 1, 4, 7, 10,
+                                               13, 16, 19, 22, 2, 5, 8, 11, 14, 17, 20, 23};
+
+    // create model
+    std::vector<char> modelBuffer = CreateTransposeTfLiteModel(::tflite::TensorType_FLOAT32,
+                                                               input0Shape,
+                                                               inputPermVecShape,
+                                                               outputShape,
+                                                               inputPermVec);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data for tflite
+    auto tfLiteInterpreterInput0Id = tfLiteInterpreter->inputs()[0];
+    auto tfLiteInterpreterInput0Data = tfLiteInterpreter->typed_tensor<float>(tfLiteInterpreterInput0Id);
+    for (unsigned int i = 0; i < input0Values.size(); ++i)
+    {
+        tfLiteInterpreterInput0Data[i] = input0Values[i];
+    }
+
+    auto tfLiteInterpreterInput1Id = tfLiteInterpreter->inputs()[1];
+    auto tfLiteInterpreterInput1Data = tfLiteInterpreter->typed_tensor<int32_t>(tfLiteInterpreterInput1Id);
+    for (unsigned int i = 0; i < inputPermVec.size(); ++i)
+    {
+        tfLiteInterpreterInput1Data[i] = inputPermVec[i];
+    }
+
+    //Set input data for armnn delegate
+    auto armnnDelegateInput0Id = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInput0Data = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateInput0Id);
+    for (unsigned int i = 0; i < input0Values.size(); ++i)
+    {
+        armnnDelegateInput0Data[i] = input0Values[i];
+    }
+
+    auto armnnDelegateInput1Id = armnnDelegateInterpreter->inputs()[1];
+    auto armnnDelegateInput1Data = armnnDelegateInterpreter->typed_tensor<int32_t>(armnnDelegateInput1Id);
+    for (unsigned int i = 0; i < inputPermVec.size(); ++i)
+    {
+        armnnDelegateInput1Data[i] = inputPermVec[i];
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteInterpreterOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteInterpreterOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteInterpreterOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+    for (size_t i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteInterpreterOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteInterpreterOutputData[i] == armnnDelegateOutputData[i]);
+    }
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+}
diff --git a/arch/arm/ARMnn/delegate/src/test/UnidirectionalSequenceLstmTest.cpp b/arch/arm/ARMnn/delegate/src/test/UnidirectionalSequenceLstmTest.cpp
new file mode 100644
index 0000000000..4bee715788
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/UnidirectionalSequenceLstmTest.cpp
@@ -0,0 +1,1464 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "UnidirectionalSequenceLstmTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+void UnidirectionalSequenceLstmTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    // cellSize and outputSize have the same size when there is no projection.
+    int32_t numUnits = outputSize;
+
+    //tensorInfo12,
+    bool hasInputToInputWeights = true;
+    std::vector<float> inputToInputWeights = { -0.49536117f, -0.0556083915f, -0.102400711f,
+                                               -0.117484632f, 0.3298470976f, -0.1179017122f,
+                                               0.214305695f, 0.42135173085f, 0.003878414626f,
+                                               -0.348303917f, -0.1881275477f, 0.0343011027f };
+
+    std::vector<float> inputToForgetWeights = { 0.2415594226f, 0.15400093799f, 0.4566498398f,
+                                                -0.3810434485f, 0.268383264f, -0.009807467424f,
+                                                -0.3522925403f, -0.24275735512f, -0.28344226125f,
+                                                0.13512269116f, -0.4932442977f, -0.10039821991f };
+
+    std::vector<float> inputToCellWeights = { -0.2504855627f, 0.184490025045f, -0.2480507493f,
+                                              0.386399507f, -0.259465157985f, -0.16545993089f,
+                                              -0.4230232555f, 0.341664791103f, -0.18127849691f,
+                                              -0.2277662414f, -0.55275535589f, 0.34184026718f };
+
+    std::vector<float> inputToOutputWeights = { 0.2303854227f, 0.5218806862f, -0.4865379333f,
+                                                0.53969591851f, 0.23393625035f, -0.27140527306f,
+                                                0.50009280443f, 0.07511717046f, 0.3998299249f,
+                                                -0.51717478049f, 0.1889653282f, -0.367323637f };
+
+    //tensorInfo16,
+    bool hasRecurrentToInputWeights = true;
+    std::vector<float> recurrentToInputWeights = { -0.128009796112f, 0.1995525098f, -0.07745539397f, 0.1558421701f,
+                                                   -0.265254765766f, -0.38837709614f, -0.05636804124f, 0.4259087456f,
+                                                   0.17628988623f, 0.3877420127f, 0.53300309181f, -0.0959980934f,
+                                                   0.00302857416f, 0.3266998827f, -0.142509296562f, -0.04433270756f };
+
+    std::vector<float> recurrentToForgetWeights = { -0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+                                                    -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+                                                    -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+                                                    -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f };
+
+    std::vector<float> recurrentToCellWeights = { -0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+                                                  -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+                                                  0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+                                                  0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f };
+
+    std::vector<float> recurrentToOutputWeights = { -0.32921677827f, 0.32624614238f, -0.1388191282f, -0.17879831790f,
+                                                    -0.15185534954f, -0.16918526583f, -0.10087361183f, -0.5436913968f,
+                                                    0.016758225858f, 0.30454617738f, -0.41493862867f, -0.005565764375f,
+                                                    -0.12584099173f, -0.12319286912f, 0.2407919466f, -0.08879069983f };
+    // tensorInfo4
+    bool hasCellToInputWeights = false;
+    std::vector<float> cellToInputWeights;
+    bool hasCellToForgetWeights = false;
+    std::vector<float> cellToForgetWeights;
+    bool hasCellToOutputWeights = false;
+    std::vector<float> cellToOutputWeights;
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias = {0., 0., 0., 0.};
+    std::vector<float> forgetGateBias = {1., 1., 1., 1.};
+    std::vector<float> cellBias = {0., 0., 0., 0.};
+    std::vector<float> outputGateBias = {0., 0., 0., 0.};
+
+    bool hasProjectionWeights = false;
+    std::vector<float> projectionWeights;
+    bool hasProjectionBias = false;
+    std::vector<float> projectionBias;
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues = { 1., 2., 3., 4., 5., 4.,
+                                       3., 2., 1., 2., 3., 4.,
+                                       5., 4., 3., 2., 1., 2. };
+    std::vector<float> expectedOutputValues = { -0.0714901f, -0.162117f, -0.175168f, -0.0232934f,
+                                                -0.168107f, -0.414129f, -0.549875f, -0.00803579f,
+                                                -0.0668735f, 0.204078f, -0.42765f, -0.0312321f,
+                                                -0.120003f, -0.0941918f, -0.456391f, -0.0287019f,
+                                                -0.0342921f, 0.20824f, -0.656989f, -0.00415265f,
+                                                -0.10493f, 0.14211f, -0.583478f, -0.0329754f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = false;
+
+    UnidirectionalSequenceLstmTestImpl<float>(backends,
+                                              ::tflite::TensorType_FLOAT32,
+                                              batchSize,
+                                              timeSize,
+                                              inputSize,
+                                              outputSize,
+                                              numUnits,
+                                              hasInputToInputWeights,
+                                              inputToInputWeights,
+                                              inputToForgetWeights,
+                                              inputToCellWeights,
+                                              inputToOutputWeights,
+                                              hasRecurrentToInputWeights,
+                                              recurrentToInputWeights,
+                                              recurrentToForgetWeights,
+                                              recurrentToCellWeights,
+                                              recurrentToOutputWeights,
+                                              hasCellToInputWeights,
+                                              cellToInputWeights,
+                                              hasCellToForgetWeights,
+                                              cellToForgetWeights,
+                                              hasCellToOutputWeights,
+                                              cellToOutputWeights,
+                                              hasInputGateBias,
+                                              inputGateBias,
+                                              forgetGateBias,
+                                              cellBias,
+                                              outputGateBias,
+                                              hasProjectionWeights,
+                                              projectionWeights,
+                                              hasProjectionBias,
+                                              projectionBias,
+                                              hasInputLayerNormWeights,
+                                              inputLayerNormWeights,
+                                              hasForgetLayerNormWeights,
+                                              forgetLayerNormWeights,
+                                              hasCellLayerNormWeights,
+                                              cellLayerNormWeights,
+                                              hasOutputLayerNormWeights,
+                                              outputLayerNormWeights,
+                                              inputValues,
+                                              expectedOutputValues,
+                                              activationFunction,
+                                              clippingThresCell,
+                                              clippingThresProj,
+                                              isTimeMajor);
+}
+
+void UnidirectionalSequenceLstmTimeMajorTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    // cellSize and outputSize have the same size when there is no projection.
+    int32_t numUnits = outputSize;
+
+    std::vector<int32_t> inputShape = {timeSize, batchSize, inputSize};
+    std::vector<int32_t> cellStateInTensorInfo = {batchSize, numUnits};
+    std::vector<int32_t> outputStateInTensorInfo = {batchSize, outputSize};
+
+    std::vector<int32_t> outputTensorInfo = {timeSize, batchSize, outputSize};
+
+    //tensorInfo12
+    bool hasInputToInputWeights = true;
+    std::vector<float> inputToInputWeights = { 0.27277296781539917f, 0.3813590407371521f, -0.394489049911499f,
+                                               0.2782636880874634f, -0.3793870210647583f, -0.018918335437774658f,
+                                               0.2724653482437134f, -0.19314253330230713f, -0.2947450876235962f,
+                                               -0.30253493785858154f, 0.4241350293159485f, -0.22560018301010132f };
+
+    std::vector<float> inputToForgetWeights = { -0.2667974531650543f, -0.05505800247192383f, -0.20932340621948242f,
+                                                -0.14345619082450867f, 0.09666192531585693f, -0.2604355812072754f,
+                                                -0.2681812047958374f, -0.3314584493637085f, 0.4485899806022644f,
+                                                -0.23467743396759033f, 0.5072842240333557f, -0.4192768931388855f };
+
+    std::vector<float> inputToCellWeights = { -0.15782442688941956f, -0.027530014514923096f, 0.4789854884147644f,
+                                              0.23227906227111816f, 0.28259342908859253f, -0.030095696449279785f,
+                                              0.10071521997451782f, -0.08535495400428772f, 0.18563997745513916f,
+                                              -0.3049069046974182f, -0.478048175573349f, 0.025234103202819824f };
+
+    std::vector<float> inputToOutputWeights = { -0.04584759473800659f, -0.2716066539287567f, 0.012970447540283203f,
+                                                -0.4729190170764923f, -0.37422770261764526f, 0.49352723360061646f,
+                                                0.3163864016532898f, -0.436781644821167f, -0.33074596524238586f,
+                                                -0.32885751128196716f, -0.40959352254867554f, -0.2124689817428589f };
+
+    //tensorInfo16
+    bool hasRecurrentToInputWeights = true;
+    std::vector<float> recurrentToInputWeights = { 0.23788475990f, -0.24948765337f, 0.50044941902f, 0.14431896805f,
+                                                   -0.115940228137f, -0.717082679f, -0.17208620906f, 0.17850610617f,
+                                                   -0.16702319684f, -0.11384502053f, -0.309785276245f, -0.3316611672f,
+                                                   0.52380162477f, -0.06839632987f, -0.391478359627f, -0.10756178963f };
+
+    std::vector<float> recurrentToForgetWeights = { 0.11383482068f, 0.1676601767f, -0.08550968004f, 0.03399394089f,
+                                                    0.08042152225f, -0.2133381964f, 0.05182432704f, 0.38161808255f,
+                                                    -0.5018365979f, -0.08043262364f, 0.07894329014f, -0.07547105155f,
+                                                    0.12047368288f, 0.2986997961f, 0.0485043078f, -0.13372567296f };
+
+    std::vector<float> recurrentToCellWeights = { 0.0433832928545f, 0.07587072294f, -0.120520234107f, 0.604576051f,
+                                                  -0.434353142986f, 0.009314475068f, 0.005085289478f, 0.08488202038f,
+                                                  -0.00025437487886f, 0.15245915082f, -0.1936587542f, 0.004754020f,
+                                                  -0.1582719236f, 0.3307867646f, 0.0236605107784f, 0.307716339826f };
+
+    std::vector<float> recurrentToOutputWeights = { -0.079031050201f, 0.041414566286f, -0.583727357285f, 0.1025384515f,
+                                                    -0.172372072937f, 0.09214124082f, 0.178184121827f, -0.2439443916f,
+                                                    0.104485116899f, 0.2600405514f, 0.064414866268f, 0.24141204357f,
+                                                    0.281875759363f, -0.14234502664f, 0.15126448862f, -0.24421440064f };
+    // tensorInfo4
+    bool hasCellToInputWeights = false;
+    std::vector<float> cellToInputWeights;
+    bool hasCellToForgetWeights = false;
+    std::vector<float> cellToForgetWeights;
+    bool hasCellToOutputWeights = false;
+    std::vector<float> cellToOutputWeights;
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias = {0., 0., 0., 0.};
+    std::vector<float> forgetGateBias = {1., 1., 1., 1.};
+    std::vector<float> cellBias = {0., 0., 0., 0.};
+    std::vector<float> outputGateBias = {0., 0., 0., 0.};
+
+    bool hasProjectionWeights = false;
+    std::vector<float> projectionWeights;
+    bool hasProjectionBias = false;
+    std::vector<float> projectionBias;
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues = { 1., 2., 3., 4., 5., 4.,
+                                       3., 2., 1., 2., 3., 4.,
+                                       5., 4., 3., 2., 1., 2. };
+    std::vector<float> expectedOutputValues = { 0.135658f, 0.124673f, 0.021209f, -0.0530204f,
+                                                0.106138f, 0.0404792f, 0.0151644f, -0.00675166f,
+                                                -0.0128514f, 0.0644884f, 0.0709072f, -0.0454045f,
+                                                0.162886f, 0.166494f, 0.0277046f, -0.0369807f,
+                                                0.111716f, 0.043119f, 0.0762981f, -0.0122854f,
+                                                0.104397f, 0.2144f, 0.119192f, -0.0839058f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = true;
+
+    UnidirectionalSequenceLstmTestImpl<float>(backends,
+                                              ::tflite::TensorType_FLOAT32,
+                                              batchSize,
+                                              timeSize,
+                                              inputSize,
+                                              outputSize,
+                                              numUnits,
+                                              hasInputToInputWeights,
+                                              inputToInputWeights,
+                                              inputToForgetWeights,
+                                              inputToCellWeights,
+                                              inputToOutputWeights,
+                                              hasRecurrentToInputWeights,
+                                              recurrentToInputWeights,
+                                              recurrentToForgetWeights,
+                                              recurrentToCellWeights,
+                                              recurrentToOutputWeights,
+                                              hasCellToInputWeights,
+                                              cellToInputWeights,
+                                              hasCellToForgetWeights,
+                                              cellToForgetWeights,
+                                              hasCellToOutputWeights,
+                                              cellToOutputWeights,
+                                              hasInputGateBias,
+                                              inputGateBias,
+                                              forgetGateBias,
+                                              cellBias,
+                                              outputGateBias,
+                                              hasProjectionWeights,
+                                              projectionWeights,
+                                              hasProjectionBias,
+                                              projectionBias,
+                                              hasInputLayerNormWeights,
+                                              inputLayerNormWeights,
+                                              hasForgetLayerNormWeights,
+                                              forgetLayerNormWeights,
+                                              hasCellLayerNormWeights,
+                                              cellLayerNormWeights,
+                                              hasOutputLayerNormWeights,
+                                              outputLayerNormWeights,
+                                              inputValues,
+                                              expectedOutputValues,
+                                              activationFunction,
+                                              clippingThresCell,
+                                              clippingThresProj,
+                                              isTimeMajor);
+}
+
+void UnidirectionalSequenceLstmNoCifgWithPeepholeWithProjectionTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 2;
+    int32_t timeSize = 3;
+    int32_t inputSize = 4;
+    int32_t outputSize = 5;
+    int32_t numUnits = 6;
+
+    std::vector<int32_t> inputShape = {batchSize, timeSize, inputSize};
+    std::vector<int32_t> cellStateInTensorInfo = {batchSize, numUnits};
+    std::vector<int32_t> outputStateInTensorInfo = {batchSize, outputSize};
+
+    std::vector<int32_t> outputTensorInfo = {batchSize, timeSize, outputSize};
+
+    //tensorInfoInputSize,
+    bool hasInputToInputWeights = true;
+    std::vector<float> inputToInputWeights = { 0.021393683f, 0.06124551f, 0.046905167f, -0.014657677f,
+                                               -0.03149463f, 0.09171803f, 0.14647801f, 0.10797193f,
+                                               -0.0057968358f, 0.0019193048f, -0.2726754f, 0.10154029f,
+                                               -0.018539885f, 0.080349885f, -0.10262385f, -0.022599787f,
+                                               -0.09121155f, -0.008675967f, -0.045206103f, -0.0821282f,
+                                               -0.008045952f, 0.015478081f, 0.055217247f, 0.038719587f };
+
+    std::vector<float> inputToForgetWeights = { -0.0018401089f, -0.004852237f, 0.03698424f, 0.014181704f,
+                                                0.028273236f, -0.016726194f, -0.05249759f, -0.10204261f,
+                                                0.00861066f, -0.040979505f, -0.009899187f, 0.01923892f,
+                                                -0.028177269f, -0.08535103f, -0.14585495f, 0.10662567f,
+                                                -0.01909731f, -0.017883534f, -0.0047269356f, -0.045103323f,
+                                                0.0030784295f, 0.076784775f, 0.07463696f, 0.094531395f};
+
+    std::vector<float> inputToCellWeights = { -0.04580283f, -0.09549462f, -0.032418985f, -0.06454633f,
+                                              -0.043528453f, 0.043018587f, -0.049152344f, -0.12418144f,
+                                              -0.078985475f, -0.07596889f, 0.019484362f, -0.11434962f,
+                                              -0.0074034138f, -0.06314844f, -0.092981495f, 0.0062155537f,
+                                              -0.025034338f, -0.0028890965f, 0.048929527f, 0.06235075f,
+                                              0.10665918f, -0.032036792f, -0.08505916f, -0.10843358f };
+
+    std::vector<float> inputToOutputWeights = { -0.0998932f, -0.07201956f, -0.052803773f, -0.15629593f,
+                                                -0.15001918f, -0.07650751f, 0.02359855f, -0.075155355f,
+                                                -0.08037709f, -0.15093534f, 0.029517552f, -0.04751393f,
+                                                0.010350531f, -0.02664851f, -0.016839722f, -0.023121163f,
+                                                0.0077019283f, 0.012851257f, -0.05040649f, -0.0129761f,
+                                                -0.021737747f, -0.038305793f, -0.06870586f, -0.01481247f };
+
+    //tensorInfoOutputSize,
+    bool hasRecurrentToInputWeights = true;
+    std::vector<float> recurrentToInputWeights = { -0.001374326f, -0.078856036f, 0.10672688f, 0.029162422f,
+                                                   -0.11585556f, 0.02557986f, -0.13446963f, -0.035785314f,
+                                                   -0.01244275f, 0.025961924f, -0.02337298f, -0.044228926f,
+                                                   -0.055839065f, -0.046598054f, -0.010546039f, -0.06900766f,
+                                                   0.027239809f, 0.022582639f, -0.013296484f, -0.05459212f,
+                                                   0.08981f, -0.045407712f, 0.08682226f, -0.06867011f,
+                                                   -0.14390695f, -0.02916037f, 0.000996957f, 0.091420636f,
+                                                   0.14283475f, -0.07390571f };
+
+    std::vector<float> recurrentToForgetWeights = { -0.057784554f, -0.026057621f, -0.068447545f, -0.022581743f,
+                                                   0.14811787f, 0.10826372f, 0.09471067f, 0.03987225f,
+                                                   -0.0039523416f, 0.00030638507f, 0.053185795f, 0.10572994f,
+                                                   0.08414449f, -0.022036452f, -0.00066928595f, -0.09203576f,
+                                                   0.032950465f, -0.10985798f, -0.023809856f, 0.0021431844f,
+                                                   -0.02196096f, -0.00326074f, 0.00058621005f, -0.074678116f,
+                                                   -0.06193199f, 0.055729095f, 0.03736828f, 0.020123724f,
+                                                   0.061878487f, -0.04729229f };
+
+    std::vector<float> recurrentToCellWeights = { -0.037322544f, 0.018592842f, 0.0056175636f, -0.06253426f,
+                                                   0.055647098f, -0.05713207f, -0.05626563f, 0.005559383f,
+                                                   0.03375411f, -0.025757805f, -0.088049285f, 0.06017052f,
+                                                   -0.06570978f, 0.007384076f, 0.035123326f, -0.07920549f,
+                                                   0.053676967f, 0.044480428f, -0.07663568f, 0.0071805613f,
+                                                   0.08089997f, 0.05143358f, 0.038261272f, 0.03339287f,
+                                                   -0.027673481f, 0.044746667f, 0.028349208f, 0.020090483f,
+                                                   -0.019443132f, -0.030755889f };
+
+    std::vector<float> recurrentToOutputWeights = { 0.025825322f, -0.05813119f, 0.09495884f,
+                                                    -0.045984812f,-0.01255415f, -0.0026479573f,
+                                                    -0.08196161f, -0.054914974f, -0.0046604523f,
+                                                    -0.029587349f, -0.044576716f, -0.07480124f,
+                                                    -0.082868785f, 0.023254942f, 0.027502948f,
+                                                    -0.0039728214f, -0.08683098f, -0.08116779f,
+                                                    -0.014675607f, -0.037924774f, -0.023314456f,
+                                                    -0.007401714f, -0.09255757f, 0.029460307f,
+                                                    -0.08829125f, -0.005139627f, -0.08989442f,
+                                                    -0.0555066f, 0.13596267f, 0.025062224f };
+    // tensorInfoNumUnits
+    bool hasCellToInputWeights = true;
+    std::vector<float> cellToInputWeights = { 0.040369894f, 0.030746894f, 0.24704495f,
+                                              0.018586371f, -0.037586458f, -0.15312155f };
+    bool hasCellToForgetWeights = true;
+    std::vector<float> cellToForgetWeights = { -0.01998659f, -0.15568835f, -0.24248174f,
+                                               -0.012770197f, 0.041331276f, -0.072311886f };
+    bool hasCellToOutputWeights = true;
+    std::vector<float> cellToOutputWeights = { 0.08286371f, -0.08261836f, -0.51210177f,
+                                               0.002913762f, 0.17764764f, -0.5495371f };
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias = { 0.02234832f, 0.14757581f, 0.18176508f,
+                                         0.10380666f, 0.053110216f, -0.06928846f };
+    std::vector<float> forgetGateBias = { 0.035185695f, -0.042891346f, -0.03032477f,
+                                          0.23027696f, 0.11098921f, 0.08989442f };
+    std::vector<float> cellBias = { -0.024379363f, 0.0055531194f, 0.23377132f,
+                                    0.033463873f, -0.1483596f, 0.029460307f };
+    std::vector<float> outputGateBias =  { 0.046159424f, -0.0012809046f, 0.03563469f,
+                                           0.12648113f, 0.027195795f, 0.35373217f };
+
+    bool hasProjectionWeights = true;
+    std::vector<float> projectionWeights = { -0.009802181f, 0.09401916f, 0.0717386f, -0.13895074f, 0.09641832f,
+                                             0.060420845f, 0.08539281f, 0.054285463f, 0.061395317f, 0.034448683f,
+                                             -0.042991187f, 0.019801661f, -0.16840284f, -0.015726732f, -0.23041931f,
+                                             -0.024478018f, -0.10959692f, -0.013875541f, 0.18600968f, -0.061274476f,
+                                             0.0138165f, -0.08160894f, -0.07661644f, 0.032372914f, 0.16169067f,
+                                             0.22465782f, -0.03993472f, -0.004017731f, 0.08633481f, -0.28869787f };
+
+    bool hasProjectionBias = true;
+    std::vector<float> projectionBias(outputSize, 0.f);
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues = { 1., 2., 3., 4., 5., 4.,
+                                       3., 2., 1., 2., 3., 4.,
+                                       5., 4., 3., 2., 1., 2.,
+                                       1., 2., 3., 4., 5., 4.};
+    std::vector<float> expectedOutputValues = { -0.0135612f, -0.0263441f, 0.0314008f, -0.00883455f, 0.00763052f,
+                                                -0.00126877f, -0.0292959f, 0.0449957f, -0.00976195f, -0.00492338f,
+                                                -0.0175702f, -0.0431753f, 0.0597117f, -0.0169154f, 0.0142087f,
+                                                0.00472515f, -0.0196355f, 0.0342524f, -0.00407936f, -0.0253189f,
+                                                -0.00512944f, -0.0293754f, 0.0512771f, -0.0151874f, -0.0246433f,
+                                                -0.00744986f, -0.0345103f, 0.0450666f, -0.00944991f, 0.0126895f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = false;
+
+    UnidirectionalSequenceLstmTestImpl<float>(backends,
+                                              ::tflite::TensorType_FLOAT32,
+                                              batchSize,
+                                              timeSize,
+                                              inputSize,
+                                              outputSize,
+                                              numUnits,
+                                              hasInputToInputWeights,
+                                              inputToInputWeights,
+                                              inputToForgetWeights,
+                                              inputToCellWeights,
+                                              inputToOutputWeights,
+                                              hasRecurrentToInputWeights,
+                                              recurrentToInputWeights,
+                                              recurrentToForgetWeights,
+                                              recurrentToCellWeights,
+                                              recurrentToOutputWeights,
+                                              hasCellToInputWeights,
+                                              cellToInputWeights,
+                                              hasCellToForgetWeights,
+                                              cellToForgetWeights,
+                                              hasCellToOutputWeights,
+                                              cellToOutputWeights,
+                                              hasInputGateBias,
+                                              inputGateBias,
+                                              forgetGateBias,
+                                              cellBias,
+                                              outputGateBias,
+                                              hasProjectionWeights,
+                                              projectionWeights,
+                                              hasProjectionBias,
+                                              projectionBias,
+                                              hasInputLayerNormWeights,
+                                              inputLayerNormWeights,
+                                              hasForgetLayerNormWeights,
+                                              forgetLayerNormWeights,
+                                              hasCellLayerNormWeights,
+                                              cellLayerNormWeights,
+                                              hasOutputLayerNormWeights,
+                                              outputLayerNormWeights,
+                                              inputValues,
+                                              expectedOutputValues,
+                                              activationFunction,
+                                              clippingThresCell,
+                                              clippingThresProj,
+                                              isTimeMajor);
+}
+
+void UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    // cellSize and outputSize have the same size when there is no projection.
+    int32_t numUnits = outputSize;
+
+    //tensorInfo12
+    bool hasInputToInputWeights = false;
+    std::vector<float> inputToInputWeights{};
+
+    std::vector<float> inputToForgetWeights = { 0.2415594226f, 0.15400093799f, 0.4566498398f,
+                                                -0.3810434485f, 0.268383264f, -0.009807467424f,
+                                                -0.3522925403f, -0.24275735512f, -0.28344226125f,
+                                                0.13512269116f, -0.4932442977f, -0.10039821991f };
+
+    std::vector<float> inputToCellWeights = { -0.2504855627f, 0.184490025045f, -0.2480507493f,
+                                              0.386399507f, -0.259465157985f, -0.16545993089f,
+                                              -0.4230232555f, 0.341664791103f, -0.18127849691f,
+                                              -0.2277662414f, -0.55275535589f, 0.34184026718f };
+
+    std::vector<float> inputToOutputWeights = { 0.2303854227f, 0.5218806862f, -0.4865379333f,
+                                                0.53969591851f, 0.23393625035f, -0.27140527306f,
+                                                0.50009280443f, 0.07511717046f, 0.3998299249f,
+                                                -0.51717478049f, 0.1889653282f, -0.367323637f };
+
+    //tensorInfo16
+    bool hasRecurrentToInputWeights = false;
+    std::vector<float> recurrentToInputWeights{};
+
+    std::vector<float> recurrentToForgetWeights = { -0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+                                                    -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+                                                    -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+                                                    -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f };
+
+    std::vector<float> recurrentToCellWeights = { -0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+                                                  -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+                                                  0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+                                                  0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f };
+
+    std::vector<float> recurrentToOutputWeights = { -0.32921677827f, 0.32624614238f, -0.1388191282f, -0.17879831790f,
+                                                    -0.15185534954f, -0.16918526583f, -0.10087361183f, -0.5436913968f,
+                                                    0.016758225858f, 0.30454617738f, -0.41493862867f, -0.005565764375f,
+                                                    -0.12584099173f, -0.12319286912f, 0.2407919466f, -0.08879069983f };
+    // tensorInfo4
+    bool hasCellToInputWeights = false;
+    std::vector<float> cellToInputWeights;
+    bool hasCellToForgetWeights = true;
+    std::vector<float> cellToForgetWeights =  {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f};
+    bool hasCellToOutputWeights = true;
+    std::vector<float> cellToOutputWeights =  {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f};
+
+    bool hasInputGateBias = false;
+    std::vector<float> inputGateBias;
+    std::vector<float> forgetGateBias = {1., 1., 1., 1.};
+    std::vector<float> cellBias =  {0., 0., 0., 0.};
+    std::vector<float> outputGateBias = {0., 0., 0., 0.};
+
+    bool hasProjectionWeights = false;
+    std::vector<float> projectionWeights;
+    bool hasProjectionBias = false;
+    std::vector<float> projectionBias;
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues = { 1., 2., 3., 4., 5., 4.,
+                                       3., 2., 1., 2., 3., 4.,
+                                       5., 4., 3., 2., 1., 2. };
+    std::vector<float> expectedOutputValues =  { -0.0129257f, -0.070531f, -0.153508f, -0.0392391f,
+                                                 -0.0300169f, -0.195717f, -0.528679f, -0.0818106f,
+                                                 -0.0332748f, 0.155429f, -0.353966f, -0.0801505f,
+                                                 -0.032312f, -0.0407911f, -0.435053f, -0.0932317f,
+                                                 -0.0108233f, 0.165584f, -0.640424f, -0.0447535f,
+                                                 -0.031675f, 0.125987f, -0.526695f, -0.110093f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = false;
+
+    UnidirectionalSequenceLstmTestImpl<float>(backends,
+                                              ::tflite::TensorType_FLOAT32,
+                                              batchSize,
+                                              timeSize,
+                                              inputSize,
+                                              outputSize,
+                                              numUnits,
+                                              hasInputToInputWeights,
+                                              inputToInputWeights,
+                                              inputToForgetWeights,
+                                              inputToCellWeights,
+                                              inputToOutputWeights,
+                                              hasRecurrentToInputWeights,
+                                              recurrentToInputWeights,
+                                              recurrentToForgetWeights,
+                                              recurrentToCellWeights,
+                                              recurrentToOutputWeights,
+                                              hasCellToInputWeights,
+                                              cellToInputWeights,
+                                              hasCellToForgetWeights,
+                                              cellToForgetWeights,
+                                              hasCellToOutputWeights,
+                                              cellToOutputWeights,
+                                              hasInputGateBias,
+                                              inputGateBias,
+                                              forgetGateBias,
+                                              cellBias,
+                                              outputGateBias,
+                                              hasProjectionWeights,
+                                              projectionWeights,
+                                              hasProjectionBias,
+                                              projectionBias,
+                                              hasInputLayerNormWeights,
+                                              inputLayerNormWeights,
+                                              hasForgetLayerNormWeights,
+                                              forgetLayerNormWeights,
+                                              hasCellLayerNormWeights,
+                                              cellLayerNormWeights,
+                                              hasOutputLayerNormWeights,
+                                              outputLayerNormWeights,
+                                              inputValues,
+                                              expectedOutputValues,
+                                              activationFunction,
+                                              clippingThresCell,
+                                              clippingThresProj,
+                                              isTimeMajor);
+}
+
+void UnidirectionalSequenceLstmNoCifgWithPeepholeWithProjectionWithLayerNormTest(
+    std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    int32_t numUnits = 5;
+
+    //tensorInfo15
+    bool hasInputToInputWeights = true;
+    std::vector<float> inputToInputWeights = { -0.49536117f, -0.0556083915f, -0.102400711f,
+                                               -0.117484632f, 0.3298470976f, -0.1179017122f,
+                                               0.214305695f, 0.42135173085f, 0.003878414626f,
+                                               -0.348303917f, -0.1881275477f, 0.0343011027f,
+                                               -0.38837709614f, -0.05636804124f, 0.4259087456f};
+
+    std::vector<float> inputToForgetWeights = { 0.2415594226f, 0.15400093799f, 0.4566498398f,
+                                                -0.3810434485f, 0.268383264f, -0.009807467424f,
+                                                -0.3522925403f, -0.24275735512f, -0.28344226125f,
+                                                0.13512269116f, -0.4932442977f, -0.10039821991f,
+                                                0.2726137042f, 0.09216640889f, -0.06551410215f};
+
+    std::vector<float> inputToCellWeights = { -0.2504855627f, 0.184490025045f, -0.2480507493f,
+                                              0.386399507f, -0.259465157985f, -0.16545993089f,
+                                              -0.4230232555f, 0.341664791103f, -0.18127849691f,
+                                              -0.2277662414f, -0.55275535589f, 0.34184026718f,
+                                              0.3954237699f, -0.19407111404f, 0.30412107706f};
+
+    std::vector<float> inputToOutputWeights = { 0.2303854227f, 0.5218806862f, -0.4865379333f,
+                                                0.53969591851f, 0.23393625035f, -0.27140527306f,
+                                                0.50009280443f, 0.07511717046f, 0.3998299249f,
+                                                -0.51717478049f, 0.1889653282f, -0.367323637f,
+                                                -0.12584099173f, -0.12319286912f, 0.2407919466f};
+
+    //tensorInfo20
+    bool hasRecurrentToInputWeights = true;
+    std::vector<float> recurrentToInputWeights = { -0.128009796112f, 0.1995525098f, -0.07745539397f, 0.1558421701f,
+                                                   -0.265254765766f, -0.38837709614f, -0.05636804124f, 0.4259087456f,
+                                                   0.17628988623f, 0.3877420127f, 0.53300309181f, -0.0959980934f,
+                                                   0.00302857416f, 0.3266998827f, -0.142509296562f, -0.04433270756f,
+                                                   0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f };
+
+    std::vector<float> recurrentToForgetWeights = { -0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+                                                    -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+                                                    -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+                                                    -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f,
+                                                    0.01841056f, -0.32764608f, -0.33027974f, -0.10826075f };
+
+    std::vector<float> recurrentToCellWeights = { -0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+                                                  -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+                                                  0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+                                                  0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f,
+                                                  0.19069612f, -0.03026325f, -0.54532051f, 0.33003211f };
+
+    std::vector<float> recurrentToOutputWeights = { -0.32921677827f, 0.32624614238f, -0.1388191282f, -0.17879831790f,
+                                                    -0.15185534954f, -0.16918526583f, -0.10087361183f, -0.5436913968f,
+                                                    0.016758225858f, 0.30454617738f, -0.41493862867f, -0.005565764375f,
+                                                    -0.12584099173f, -0.12319286912f, 0.2407919466f, -0.08879069983f,
+                                                    0.11178309f, 0.09481031f, -0.26424935f, 0.46261835f };
+    // tensorInfo5
+    bool hasCellToInputWeights = true;
+    std::vector<float> cellToInputWeights = { 0.05f, 0.1f, 0.25f, 0.15f, -0.02f };
+    bool hasCellToForgetWeights = true;
+    std::vector<float> cellToForgetWeights = { -0.02f, -0.15f, -0.25f, -0.03f, 0.15f };
+    bool hasCellToOutputWeights = true;
+    std::vector<float> cellToOutputWeights = { 0.1f, -0.1f, -0.5f, 0.05f, 0.01f };
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias = { 0.03f, 0.15f, 0.22f, 0.38f, 0.05f };
+    std::vector<float> forgetGateBias = { 0.1f, -0.3f, -0.2f, 0.1f, 0.4f };
+    std::vector<float> cellBias = { -0.05f, 0.72f, 0.25f, 0.08f, 0.1f };
+    std::vector<float> outputGateBias = { 0.05f, -0.01f, 0.2f, 0.1f, -0.2f };
+
+    bool hasProjectionWeights = true;
+    std::vector<float> projectionWeights = { -0.1f, 0.2f, 0.01f, -0.2f,
+                                             0.1f, 0.5f,  0.3f, 0.08f,
+                                             0.07f, 0.2f, -0.4f,  0.2f,
+                                             0.5f, -0.4f, 0.3f, -0.2f,
+                                             0.3f, 0.08f, -0.07f, 0.2f}; //{outputSize, numUnits}
+    bool hasProjectionBias = true;
+    std::vector<float> projectionBias(outputSize, 0.f);;
+
+    bool hasInputLayerNormWeights = true;
+    std::vector<float> inputLayerNormWeights = { 0.1f, 0.2f, 0.3f, 0.5f, 0.8f };
+    bool hasForgetLayerNormWeights = true;
+    std::vector<float> forgetLayerNormWeights = { 0.1f, 0.2f, 0.3f, 0.5f, 0.2f };
+    bool hasCellLayerNormWeights = true;
+    std::vector<float> cellLayerNormWeights = { 0.7f, 0.2f, 0.3f, 0.8f, 0.5f };
+    bool hasOutputLayerNormWeights = true;
+    std::vector<float> outputLayerNormWeights = { 0.6f, 0.2f, 0.2f, 0.5f, 0.1f };
+
+    std::vector<float> inputValues = { 1., 2., 3., 4., 5., 4.,
+                                       3., 2., 1., 2., 3., 4.,
+                                       5., 4., 3., 2., 1., 2. };
+    std::vector<float> expectedOutputValues = { 0.0642256f, 0.0343966f, 0.184122f, 0.114717f,
+                                                0.11458f, 0.0407109f, 0.300327f, 0.174301f,
+                                                0.0864761f, 0.0362912f, 0.178635f, 0.115689f,
+                                                0.108008f, 0.0386623f, 0.273471f, 0.167115f,
+                                                0.0859545f, 0.0331481f, 0.186051f, 0.11888f,
+                                                0.106649f, 0.0276847f, 0.229863f, 0.166958f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = false;
+
+    UnidirectionalSequenceLstmTestImpl<float>(backends,
+                                              ::tflite::TensorType_FLOAT32,
+                                              batchSize,
+                                              timeSize,
+                                              inputSize,
+                                              outputSize,
+                                              numUnits,
+                                              hasInputToInputWeights,
+                                              inputToInputWeights,
+                                              inputToForgetWeights,
+                                              inputToCellWeights,
+                                              inputToOutputWeights,
+                                              hasRecurrentToInputWeights,
+                                              recurrentToInputWeights,
+                                              recurrentToForgetWeights,
+                                              recurrentToCellWeights,
+                                              recurrentToOutputWeights,
+                                              hasCellToInputWeights,
+                                              cellToInputWeights,
+                                              hasCellToForgetWeights,
+                                              cellToForgetWeights,
+                                              hasCellToOutputWeights,
+                                              cellToOutputWeights,
+                                              hasInputGateBias,
+                                              inputGateBias,
+                                              forgetGateBias,
+                                              cellBias,
+                                              outputGateBias,
+                                              hasProjectionWeights,
+                                              projectionWeights,
+                                              hasProjectionBias,
+                                              projectionBias,
+                                              hasInputLayerNormWeights,
+                                              inputLayerNormWeights,
+                                              hasForgetLayerNormWeights,
+                                              forgetLayerNormWeights,
+                                              hasCellLayerNormWeights,
+                                              cellLayerNormWeights,
+                                              hasOutputLayerNormWeights,
+                                              outputLayerNormWeights,
+                                              inputValues,
+                                              expectedOutputValues,
+                                              activationFunction,
+                                              clippingThresCell,
+                                              clippingThresProj,
+                                              isTimeMajor);
+}
+
+void UnidirectionalSequenceLstmInt8Test(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    // cellSize and outputSize have the same size when there is no projection.
+    int32_t numUnits = outputSize;
+
+    //tensorInfo12
+    bool hasInputToInputWeights = true;
+    std::vector<int8_t> inputToInputWeights = { -4, -1, -1, -2, 3, -2, 2, 4, 1, -4, -2, 3 };
+
+    std::vector<int8_t> inputToForgetWeights = { 2, 1, 4, -4, 3, -1, -3, -2, -3, 1, -4, -1 };
+
+    std::vector<int8_t> inputToCellWeights = { -2, 1, -2, 4, -3, -2, -4, 3, -2, -2, -6, 3 };
+
+    std::vector<int8_t> inputToOutputWeights = { 2, 5, -4, 5, 2, -3, 5, 7, 3, -5, 1, -4 };
+
+    //tensorInfo16
+    bool hasRecurrentToInputWeights = true;
+    std::vector<int8_t> recurrentToInputWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3, 5, -1, 1, 3, -1, -1 };
+
+    std::vector<int8_t> recurrentToForgetWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3, 5, -1, 1, 3, -2, -1 };
+
+    std::vector<int8_t> recurrentToCellWeights = { -2, -3, -1, -3, -4, 2, 1, -1, 2, 2, 1, 2, 3, -2, 3, -3 };
+
+    std::vector<int8_t> recurrentToOutputWeights = { -3, 3, -1, -2, -2, -2, -1, -5, 1, 3, -4, -1, -1, -1, 2, -1 };
+
+    // tensorInfo4
+    bool hasCellToInputWeights = false;
+    std::vector<int8_t> cellToInputWeights;
+    bool hasCellToForgetWeights = false;
+    std::vector<int8_t> cellToForgetWeights;
+    bool hasCellToOutputWeights = false;
+    std::vector<int8_t> cellToOutputWeights;
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias = { 0., 0., 0., 0. };
+    std::vector<float> forgetGateBias = { 1., 1., 1., 1. };
+    std::vector<float> cellBias = { 0., 0., 0., 0. };
+    std::vector<float> outputGateBias = { 0., 0., 0., 0. };
+
+    bool hasProjectionWeights = false;
+    std::vector<int8_t> projectionWeights;
+    bool hasProjectionBias = false;
+    std::vector<float> projectionBias;
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.4f,
+                                       0.3f, 0.2f, 0.1f, 0.2f, 0.3f, 0.4f,
+                                       0.5f, 0.4f, 0.3f, 0.2f, 0.1f, 0.2f };
+
+    std::vector<float> expectedOutputValues = { -0.0142517f, -0.0198845f, -0.0120569f, -0.0116868f,
+                                                -0.0350714f, -0.0343202f, -0.047504f, -0.0569789f,
+                                                -0.0146346f, 0.0106663f, -0.0247238f, -0.0319502f,
+                                                -0.0294759f, -0.0129935f, -0.0444175f, -0.0444354f,
+                                                -0.0280855f, 0.00545101f, -0.051422f, -0.0463838f,
+                                                -0.0310702f, 0.00915739f, -0.0625207f, -0.0482648f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = false;
+
+    UnidirectionalSequenceLstmTestImpl<int8_t>(backends,
+                                               ::tflite::TensorType_INT8,
+                                               batchSize,
+                                               timeSize,
+                                               inputSize,
+                                               outputSize,
+                                               numUnits,
+                                               hasInputToInputWeights,
+                                               inputToInputWeights,
+                                               inputToForgetWeights,
+                                               inputToCellWeights,
+                                               inputToOutputWeights,
+                                               hasRecurrentToInputWeights,
+                                               recurrentToInputWeights,
+                                               recurrentToForgetWeights,
+                                               recurrentToCellWeights,
+                                               recurrentToOutputWeights,
+                                               hasCellToInputWeights,
+                                               cellToInputWeights,
+                                               hasCellToForgetWeights,
+                                               cellToForgetWeights,
+                                               hasCellToOutputWeights,
+                                               cellToOutputWeights,
+                                               hasInputGateBias,
+                                               inputGateBias,
+                                               forgetGateBias,
+                                               cellBias,
+                                               outputGateBias,
+                                               hasProjectionWeights,
+                                               projectionWeights,
+                                               hasProjectionBias,
+                                               projectionBias,
+                                               hasInputLayerNormWeights,
+                                               inputLayerNormWeights,
+                                               hasForgetLayerNormWeights,
+                                               forgetLayerNormWeights,
+                                               hasCellLayerNormWeights,
+                                               cellLayerNormWeights,
+                                               hasOutputLayerNormWeights,
+                                               outputLayerNormWeights,
+                                               inputValues,
+                                               expectedOutputValues,
+                                               activationFunction,
+                                               clippingThresCell,
+                                               clippingThresProj,
+                                               isTimeMajor,
+                                               0.1f);
+}
+
+void UnidirectionalSequenceLstmInt8TimeMajorTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    // cellSize and outputSize have the same size when there is no projection.
+    int32_t numUnits = outputSize;
+
+    //tensorInfo12
+    bool hasInputToInputWeights = true;
+    std::vector<int8_t> inputToInputWeights = { -4, -1, -1, -2, 3, -2, 2, 4, 1, -4, -2, 3 };
+
+    std::vector<int8_t> inputToForgetWeights = { 2, 1, 4, -4, 3, -1, -3, -2, -3, 1, -4, -1 };
+
+    std::vector<int8_t> inputToCellWeights = { -2, 1, -2, 4, -3, -2, -4, 3, -2, -2, -6, 3 };
+
+    std::vector<int8_t> inputToOutputWeights = { 2, 5, -4, 5, 2, -3, 5, 7, 3, -5, 1, -4 };
+
+    //tensorInfo16
+    bool hasRecurrentToInputWeights = true;
+    std::vector<int8_t> recurrentToInputWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3, 5, -1, 1, 3, -1, -1 };
+
+    std::vector<int8_t> recurrentToForgetWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3, 5, -1, 1, 3, -2, -1 };
+
+    std::vector<int8_t> recurrentToCellWeights = { -2, -3, -1, -3, -4, 2, 1, -1, 2, 2, 1, 2, 3, -2, 3, -3 };
+
+    std::vector<int8_t> recurrentToOutputWeights = { -3, 3, -1, -2, -2, -2, -1, -5, 1, 3, -4, -1, -1, -1, 2, -1 };
+
+    // tensorInfo4
+    bool hasCellToInputWeights = false;
+    std::vector<int8_t> cellToInputWeights;
+    bool hasCellToForgetWeights = false;
+    std::vector<int8_t> cellToForgetWeights;
+    bool hasCellToOutputWeights = false;
+    std::vector<int8_t> cellToOutputWeights;
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias = { 0., 0., 0., 0. };
+    std::vector<float> forgetGateBias = { 1., 1., 1., 1. };
+    std::vector<float> cellBias = { 0., 0., 0., 0. };
+    std::vector<float> outputGateBias = { 0., 0., 0., 0. };
+
+    bool hasProjectionWeights = false;
+    std::vector<int8_t> projectionWeights;
+    bool hasProjectionBias = false;
+    std::vector<float> projectionBias;
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.4f,
+                                       0.3f, 0.2f, 0.1f, 0.2f, 0.3f, 0.4f,
+                                       0.5f, 0.4f, 0.3f, 0.2f, 0.1f, 0.2f };
+
+    std::vector<float> expectedOutputValues = { -0.0142517f, -0.0198845f, -0.0120122f, -0.0116868f,
+                                                -0.0261295f, -0.0188487f, -0.0345463f, -0.049733f,
+                                                -0.0146346f, 0.0106663f, -0.0247238f, -0.0319502f,
+                                                -0.0291863f, -0.0369402f, -0.0354071f, -0.0296529f,
+                                                -0.0419539f, -0.00617731f, -0.0814796f, -0.0804005f,
+                                                -0.0244737f, 0.0119905f, -0.0457527f, -0.0331862f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = true;
+
+    UnidirectionalSequenceLstmTestImpl<int8_t>(backends,
+                                               ::tflite::TensorType_INT8,
+                                               batchSize,
+                                               timeSize,
+                                               inputSize,
+                                               outputSize,
+                                               numUnits,
+                                               hasInputToInputWeights,
+                                               inputToInputWeights,
+                                               inputToForgetWeights,
+                                               inputToCellWeights,
+                                               inputToOutputWeights,
+                                               hasRecurrentToInputWeights,
+                                               recurrentToInputWeights,
+                                               recurrentToForgetWeights,
+                                               recurrentToCellWeights,
+                                               recurrentToOutputWeights,
+                                               hasCellToInputWeights,
+                                               cellToInputWeights,
+                                               hasCellToForgetWeights,
+                                               cellToForgetWeights,
+                                               hasCellToOutputWeights,
+                                               cellToOutputWeights,
+                                               hasInputGateBias,
+                                               inputGateBias,
+                                               forgetGateBias,
+                                               cellBias,
+                                               outputGateBias,
+                                               hasProjectionWeights,
+                                               projectionWeights,
+                                               hasProjectionBias,
+                                               projectionBias,
+                                               hasInputLayerNormWeights,
+                                               inputLayerNormWeights,
+                                               hasForgetLayerNormWeights,
+                                               forgetLayerNormWeights,
+                                               hasCellLayerNormWeights,
+                                               cellLayerNormWeights,
+                                               hasOutputLayerNormWeights,
+                                               outputLayerNormWeights,
+                                               inputValues,
+                                               expectedOutputValues,
+                                               activationFunction,
+                                               clippingThresCell,
+                                               clippingThresProj,
+                                               isTimeMajor,
+                                               0.1);
+}
+
+void UnidirectionalSequenceLstmInt8NoCifgWithPeepholeWithProjectionTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    int32_t numUnits = 4;
+
+    bool hasInputToInputWeights = true;
+    std::vector<int8_t> inputToInputWeights = { -4, -1, -1, -2, 3, -2, 2, 4, 1, -4, -2, 3 };
+
+    std::vector<int8_t> inputToForgetWeights = { 2, 1, 4, -4, 3, -1, -3, -2, -3, 1, -4, -1 };
+
+    std::vector<int8_t> inputToCellWeights = { -2, 1, -2, 4, -3, -2, -4, 3, -2, -2, -6, 3 };
+
+    std::vector<int8_t> inputToOutputWeights = { 2, 5, -4, 5, 2, -3, 5, 7, 3, -5, 1, -4 };
+
+    //tensorInfo16
+    bool hasRecurrentToInputWeights = true;
+    std::vector<int8_t> recurrentToInputWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3, 5, -1, 1, 3, -1, -1 };
+
+    std::vector<int8_t> recurrentToForgetWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3, 5, -1, 1, 3, -2, -1 };
+
+    std::vector<int8_t> recurrentToCellWeights = { -2, -3, -1, -3, -4, 2, 1, -1, 2, 2, 1, 2, 3, -2, 3, -3 };
+
+    std::vector<int8_t> recurrentToOutputWeights = { -3, 3, -1, -2, -2, -2, -1, -5, 1, 3, -4, -1, -1, -1, 2, -1 };
+
+    // tensorInfo4
+    bool hasCellToInputWeights = true;
+    std::vector<int8_t> cellToInputWeights = { 5, 10, 25, 15 };
+    bool hasCellToForgetWeights = true;
+    std::vector<int8_t> cellToForgetWeights = { -5, 15, 25, 3 };
+    bool hasCellToOutputWeights = true;
+    std::vector<int8_t> cellToOutputWeights = { 10, -10, -5, 50 };
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias = { 0.02234832f,  0.14757581f,   0.18176508f,  0.10380666f};
+    std::vector<float> forgetGateBias = { 0.035185695f, -0.042891346f, -0.3032477f, 0.23027696f};
+    std::vector<float> cellBias = { -0.124379363f, 0.55531194f, 0.23377132f,   0.033463873f };
+    std::vector<float> outputGateBias = { 0.046159424f,  -0.12809046f, 0.03563469f, 0.12648113f };
+
+    bool hasProjectionWeights = true;
+    std::vector<int8_t> projectionWeights = { -25, 51, 3, -5, 25, 127, 77, 20, 18, 51, -10, 51, -25, 88, 77, -13 };
+    bool hasProjectionBias = true;
+    std::vector<float> projectionBias(outputSize, 0.f);
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.4f,
+                                       0.3f, 0.2f, 0.1f, 0.2f, 0.3f, 0.4f,
+                                       0.5f, 0.4f, 0.3f, 0.2f, 0.1f, 0.2f };
+
+    std::vector<float> expectedOutputValues = { 0.612103f, 1.56788f, 0.31966f, 1.42956f,
+                                                0.909718f, 3.07916f, -0.560586f, 3.8907f,
+                                                0.753671f, 1.77485f, 0.365122f, 1.60077f,
+                                                0.812644f, 2.79092f, -0.605396f, 3.61742f,
+                                                0.791857f, 1.64353f, 0.316588f, 1.55192f,
+                                                0.807265f, 2.47012f, -0.539598f, 3.25654f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = false;
+
+    UnidirectionalSequenceLstmTestImpl<int8_t>(backends,
+                                               ::tflite::TensorType_INT8,
+                                               batchSize,
+                                               timeSize,
+                                               inputSize,
+                                               outputSize,
+                                               numUnits,
+                                               hasInputToInputWeights,
+                                               inputToInputWeights,
+                                               inputToForgetWeights,
+                                               inputToCellWeights,
+                                               inputToOutputWeights,
+                                               hasRecurrentToInputWeights,
+                                               recurrentToInputWeights,
+                                               recurrentToForgetWeights,
+                                               recurrentToCellWeights,
+                                               recurrentToOutputWeights,
+                                               hasCellToInputWeights,
+                                               cellToInputWeights,
+                                               hasCellToForgetWeights,
+                                               cellToForgetWeights,
+                                               hasCellToOutputWeights,
+                                               cellToOutputWeights,
+                                               hasInputGateBias,
+                                               inputGateBias,
+                                               forgetGateBias,
+                                               cellBias,
+                                               outputGateBias,
+                                               hasProjectionWeights,
+                                               projectionWeights,
+                                               hasProjectionBias,
+                                               projectionBias,
+                                               hasInputLayerNormWeights,
+                                               inputLayerNormWeights,
+                                               hasForgetLayerNormWeights,
+                                               forgetLayerNormWeights,
+                                               hasCellLayerNormWeights,
+                                               cellLayerNormWeights,
+                                               hasOutputLayerNormWeights,
+                                               outputLayerNormWeights,
+                                               inputValues,
+                                               expectedOutputValues,
+                                               activationFunction,
+                                               clippingThresCell,
+                                               clippingThresProj,
+                                               isTimeMajor,
+                                               0.1f);
+}
+
+void UnidirectionalSequenceLstmInt8WithCifgWithPeepholeNoProjectionTest(std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    // cellSize and outputSize have the same size when there is no projection.
+    int32_t numUnits = outputSize;
+
+    //tensorInfo12,
+    bool hasInputToInputWeights = false;
+    std::vector<int8_t> inputToInputWeights;
+
+    std::vector<int8_t> inputToForgetWeights = { 2, 1, 4, -4, 3, -1, -3, -2, -3, 1, -4, -1 };
+
+    std::vector<int8_t> inputToCellWeights = { -2, 1, -2, 4, -3, -2, -4, 3, -2, -2, -6, 3 };
+
+    std::vector<int8_t> inputToOutputWeights = { 2, 5, -4, 5, 2, -3, 5, 7, 3, -5, 1, -4 };
+
+    //tensorInfo16,
+    bool hasRecurrentToInputWeights = false;
+    std::vector<int8_t> recurrentToInputWeights;
+    std::vector<int8_t> recurrentToForgetWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3, 5, -1, 1, 3, -2, -1 };
+
+    std::vector<int8_t> recurrentToCellWeights = { -2, -3, -1, -3, -4, 2, 1, -1, 2, 2, 1, 2, 3, -2, 3, -3 };
+
+    std::vector<int8_t> recurrentToOutputWeights = { -3, 3, -1, -2, -2, -2, -1, -5, 1, 3, -4, -1, -1, -1, 2, -1 };
+
+    // tensorInfo4
+    bool hasCellToInputWeights = false;
+    std::vector<int8_t> cellToInputWeights;
+    bool hasCellToForgetWeights = true;
+    std::vector<int8_t> cellToForgetWeights = { 47, -52, -24, 31 };
+    bool hasCellToOutputWeights = true;
+    std::vector<int8_t> cellToOutputWeights = { -17, 82, 85, -77 };
+
+    bool hasInputGateBias = false;
+    std::vector<float> inputGateBias;
+    std::vector<float> forgetGateBias = { 1., 1., 1., 1. };
+    std::vector<float> cellBias = { 0., 0., 0., 0. };
+    std::vector<float> outputGateBias = { 0., 0., 0., 0. };
+
+   bool hasProjectionWeights = false;
+    std::vector<int8_t> projectionWeights;
+    bool hasProjectionBias = false;
+    std::vector<float> projectionBias;
+
+    bool hasInputLayerNormWeights = false;
+    std::vector<float> inputLayerNormWeights;
+    bool hasForgetLayerNormWeights = false;
+    std::vector<float> forgetLayerNormWeights;
+    bool hasCellLayerNormWeights = false;
+    std::vector<float> cellLayerNormWeights;
+    bool hasOutputLayerNormWeights = false;
+    std::vector<float> outputLayerNormWeights;
+
+    std::vector<float> inputValues = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.4f,
+                                       0.3f, 0.2f, 0.1f, 0.2f, 0.3f, 0.4f,
+                                       0.5f, 0.4f, 0.3f, 0.2f, 0.1f, 0.2f };
+
+    std::vector<float> expectedOutputValues = { -0.0072104f, -0.00991171f, -0.00650478f, -0.00713055f,
+                                                -0.0191782f, -0.0161269f, -0.0233683f, -0.054299f,
+                                                -0.00783725f, 0.00635271f, -0.0126718f, -0.022613f,
+                                                -0.0161351f, -0.00775868f, -0.021054f, -0.0339778f,
+                                                -0.0146392f, 0.00330261f, -0.0258733f, -0.0407797f,
+                                                -0.0174297f, 0.0050105f, -0.0266275f, -0.0362564f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = false;
+
+    UnidirectionalSequenceLstmTestImpl<int8_t>(backends,
+                                               ::tflite::TensorType_INT8,
+                                               batchSize,
+                                               timeSize,
+                                               inputSize,
+                                               outputSize,
+                                               numUnits,
+                                               hasInputToInputWeights,
+                                               inputToInputWeights,
+                                               inputToForgetWeights,
+                                               inputToCellWeights,
+                                               inputToOutputWeights,
+                                               hasRecurrentToInputWeights,
+                                               recurrentToInputWeights,
+                                               recurrentToForgetWeights,
+                                               recurrentToCellWeights,
+                                               recurrentToOutputWeights,
+                                               hasCellToInputWeights,
+                                               cellToInputWeights,
+                                               hasCellToForgetWeights,
+                                               cellToForgetWeights,
+                                               hasCellToOutputWeights,
+                                               cellToOutputWeights,
+                                               hasInputGateBias,
+                                               inputGateBias,
+                                               forgetGateBias,
+                                               cellBias,
+                                               outputGateBias,
+                                               hasProjectionWeights,
+                                               projectionWeights,
+                                               hasProjectionBias,
+                                               projectionBias,
+                                               hasInputLayerNormWeights,
+                                               inputLayerNormWeights,
+                                               hasForgetLayerNormWeights,
+                                               forgetLayerNormWeights,
+                                               hasCellLayerNormWeights,
+                                               cellLayerNormWeights,
+                                               hasOutputLayerNormWeights,
+                                               outputLayerNormWeights,
+                                               inputValues,
+                                               expectedOutputValues,
+                                               activationFunction,
+                                               clippingThresCell,
+                                               clippingThresProj,
+                                               isTimeMajor,
+                                               0.1);
+}
+
+void UnidirectionalSequenceLstmInt8NoCifgWithPeepholeWithProjectionWithLayerNormTest(
+    std::vector<armnn::BackendId>& backends)
+{
+    int32_t batchSize = 3;
+    int32_t timeSize = 2;
+    int32_t inputSize = 3;
+    int32_t outputSize = 4;
+    int32_t numUnits = 5;
+
+    bool hasInputToInputWeights = true;
+    std::vector<int8_t> inputToInputWeights = { -4, -1, -1, -2, 3, -2, 2, 4, 1, -4, -2, 3, 2, 2, -4 };
+
+    std::vector<int8_t> inputToForgetWeights = { 2, 1, 4, -4, 3, -1, -3, -2, -3, 1, -4, -1, -3, -2, -4 };
+
+    std::vector<int8_t> inputToCellWeights = { -2, 1, -2, 4, -3, -2, -4, 3, -2, -2, -6, 3, 2, 5, -4 };
+
+    std::vector<int8_t> inputToOutputWeights = { 2, 5, -4, 5, 2, -3, 5, 7, 3, -5, 1, -4, -4, -1, -1 };
+
+    bool hasRecurrentToInputWeights = true;
+    std::vector<int8_t> recurrentToInputWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3,
+                                                    5, -1, 1, 3, -1, -1, -1, 4, 2, 3 };
+
+    std::vector<int8_t> recurrentToForgetWeights = { -1, 1, -1, 1, -3, -4, -1, 4, 2, 3,
+                                                     5, -1, 1, 3, -2, -1, -1, 2, 2, 1 };
+
+    std::vector<int8_t> recurrentToCellWeights = { -2, -3, -1, -3, -4, 2, 1, -1, 2, 2,
+                                                   1, 2, 3, -2, 3, -3,  -1, -5, 1, 3 };
+
+    std::vector<int8_t> recurrentToOutputWeights = { -3, 3, -1, -2, -2, -2, -1, -5, 1, 3,
+                                                     -4, -1, -1, -1, 2, -1, 5, 1, -3, -4 };
+
+    // tensorInfo5
+    bool hasCellToInputWeights = true;
+    std::vector<int8_t> cellToInputWeights = { 5, 3, 8, -5, 2 };
+    bool hasCellToForgetWeights = true;
+    std::vector<int8_t> cellToForgetWeights = { -2, -7, 5, -3, 4 };
+    bool hasCellToOutputWeights = true;
+    std::vector<int8_t> cellToOutputWeights = { 9, -10 , -5, 5, 1 };
+
+    bool hasInputGateBias = true;
+    std::vector<float> inputGateBias = { 0.03f, 0.15f, 0.22f, 0.38f, 0.05f };
+    std::vector<float> forgetGateBias = { 0.1f, -0.3f, -0.2f, 0.1f, 0.4f };
+    std::vector<float> cellBias = { -0.05f, 0.72f, 0.25f, 0.08f, 0.1f };
+    std::vector<float> outputGateBias = { 0.05f, -0.01f, 0.2f, 0.1f, -0.2f };
+
+    bool hasProjectionWeights = true;
+    std::vector<int8_t> projectionWeights = { -1, 2, 1, -2, 1, 5, 3, 8, 7, 2,
+                                              -4, 2, 5, -4, 3, -2, 3, 8, -7, 2 };
+    bool hasProjectionBias = true;
+    std::vector<float> projectionBias(outputSize, 0.f);
+
+    bool hasInputLayerNormWeights = true;
+    std::vector<float> inputLayerNormWeights = { 0.1f, 0.2f, -0.3f, -0.1f, 0.5f };
+    bool hasForgetLayerNormWeights = true;
+    std::vector<float> forgetLayerNormWeights = { -0.1f, 0.2f, 0.3f, 0.5f, 0.2f };
+    bool hasCellLayerNormWeights = true;
+    std::vector<float> cellLayerNormWeights = { 0.5f, 0.2f, 0.3f, 0.4f, -0.5f };
+    bool hasOutputLayerNormWeights = true;
+    std::vector<float> outputLayerNormWeights = { 0.6f, -0.2f, -0.2f, 0.5f, 0.1f };
+
+    std::vector<float> inputValues = { 1., 8., 3., 4., 5., 4.,
+                                       3., 2., 1., 2., 3., 4.,
+                                       5., 4., 3., 2., 1., 2. };
+
+    std::vector<float> expectedOutputValues = { 0.0471276f, 0.0168155f, 0.0789885f, 0.16550f,
+                                                0.0643133f, -0.0400722f, 0.100593f, 0.197722f,
+                                                0.0465562f, -0.0600682f, 0.0622087f, 0.115053f,
+                                                0.056287f, -0.0566218f, 0.0856832f, 0.148484f,
+                                                0.0457859f, -0.0588112f, 0.0623636f, 0.114333f,
+                                                0.0509271f, -0.0754262f, 0.058600f, 0.0801288f };
+
+    tflite::ActivationFunctionType activationFunction = tflite::ActivationFunctionType_TANH;
+    float clippingThresCell = 10.f;
+    float clippingThresProj = 0.f;
+    bool isTimeMajor = false;
+
+    UnidirectionalSequenceLstmTestImpl<int8_t>(backends,
+                                               ::tflite::TensorType_INT8,
+                                               batchSize,
+                                               timeSize,
+                                               inputSize,
+                                               outputSize,
+                                               numUnits,
+                                               hasInputToInputWeights,
+                                               inputToInputWeights,
+                                               inputToForgetWeights,
+                                               inputToCellWeights,
+                                               inputToOutputWeights,
+                                               hasRecurrentToInputWeights,
+                                               recurrentToInputWeights,
+                                               recurrentToForgetWeights,
+                                               recurrentToCellWeights,
+                                               recurrentToOutputWeights,
+                                               hasCellToInputWeights,
+                                               cellToInputWeights,
+                                               hasCellToForgetWeights,
+                                               cellToForgetWeights,
+                                               hasCellToOutputWeights,
+                                               cellToOutputWeights,
+                                               hasInputGateBias,
+                                               inputGateBias,
+                                               forgetGateBias,
+                                               cellBias,
+                                               outputGateBias,
+                                               hasProjectionWeights,
+                                               projectionWeights,
+                                               hasProjectionBias,
+                                               projectionBias,
+                                               hasInputLayerNormWeights,
+                                               inputLayerNormWeights,
+                                               hasForgetLayerNormWeights,
+                                               forgetLayerNormWeights,
+                                               hasCellLayerNormWeights,
+                                               cellLayerNormWeights,
+                                               hasOutputLayerNormWeights,
+                                               outputLayerNormWeights,
+                                               inputValues,
+                                               expectedOutputValues,
+                                               activationFunction,
+                                               clippingThresCell,
+                                               clippingThresProj,
+                                               isTimeMajor,
+                                               0.1);
+}
+
+TEST_SUITE("UnidirectionalSequenceLstmTest_CpuRefTests")
+{
+
+TEST_CASE ("UnidirectionalSequenceLstmTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmTest(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmTimeMajorTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmTimeMajorTest(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmNoCifgWithPeepholeWithProjectionTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmNoCifgWithPeepholeWithProjectionTest(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTest(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmNoCifgWithPeepholeWithProjectionWithLayerNormTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmNoCifgWithPeepholeWithProjectionWithLayerNormTest(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmInt8Test_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmInt8Test(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmTimeInt8TimeMajorTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmInt8TimeMajorTest(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmInt8NoCifgWithPeepholeWithProjectionTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmInt8NoCifgWithPeepholeWithProjectionTest(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmInt8WithCifgWithPeepholeNoProjectionTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmInt8WithCifgWithPeepholeNoProjectionTest(backends);
+}
+
+TEST_CASE ("UnidirectionalSequenceLstmInt8NoCifgWithPeepholeWithProjectionWithLayerNormTest_CpuRef_Test")
+{
+    std::vector <armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    UnidirectionalSequenceLstmInt8NoCifgWithPeepholeWithProjectionWithLayerNormTest(backends);
+}
+
+} //End of TEST_SUITE("UnidirectionalSequenceLstmTest_CpuRef")
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/UnidirectionalSequenceLstmTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/UnidirectionalSequenceLstmTestHelper.hpp
new file mode 100644
index 0000000000..9d6ef87e3f
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/UnidirectionalSequenceLstmTestHelper.hpp
@@ -0,0 +1,722 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+#include <tensorflow/lite/c/common.h>
+
+#include <doctest/doctest.h>
+
+
+#include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/NumericCast.hpp>
+#include <armnn/TypesUtils.hpp>
+
+#include <armnn/Types.hpp>
+
+#include <initializer_list>
+#include <iterator>
+#include <vector>
+
+namespace
+{
+
+template <typename T>
+std::vector<char> CreateUnidirectionalSequenceLstmTfLiteModel(tflite::TensorType tensorType,
+                                                              int32_t batchSize,
+                                                              int32_t timeSize,
+                                                              int32_t inputSize,
+                                                              int32_t outputSize,
+                                                              int32_t numUnits,
+                                                              bool hasInputToInputWeights,
+                                                              const std::vector<T>& inputToInputWeights,
+                                                              const std::vector<T>& inputToForgetWeights,
+                                                              const std::vector<T>& inputToCellWeights,
+                                                              const std::vector<T>& inputToOutputWeights,
+                                                              bool hasRecurrentToInputWeights,
+                                                              const std::vector<T>& recurrentToInputWeights,
+                                                              const std::vector<T>& recurrentToForgetWeights,
+                                                              const std::vector<T>& recurrentToCellWeights,
+                                                              const std::vector<T>& recurrentToOutputWeights,
+                                                              bool hasCellToInputWeights,
+                                                              const std::vector<T>& cellToInputWeights,
+                                                              bool hasCellToForgetWeights,
+                                                              const std::vector<T>& cellToForgetWeights,
+                                                              bool hasCellToOutputWeights,
+                                                              const std::vector<T>& cellToOutputWeights,
+                                                              bool hasInputGateBias,
+                                                              const std::vector<float>& inputGateBias,
+                                                              const std::vector<float>& forgetGateBias,
+                                                              const std::vector<float>& cellBias,
+                                                              const std::vector<float>& outputGateBias,
+                                                              bool hasProjectionWeights,
+                                                              const std::vector<T>& projectionWeights,
+                                                              bool hasProjectionBias,
+                                                              const std::vector<float>& projectionBias,
+                                                              bool hasInputLayerNormWeights,
+                                                              const std::vector<float>& inputLayerNormWeights,
+                                                              bool hasForgetLayerNormWeights,
+                                                              const std::vector<float>& forgetLayerNormWeights,
+                                                              bool hasCellLayerNormWeights,
+                                                              const std::vector<float>& cellLayerNormWeights,
+                                                              bool hasOutputLayerNormWeights,
+                                                              const std::vector<float>& outputLayerNormWeights,
+                                                              tflite::ActivationFunctionType activationFunction,
+                                                              float clippingThresCell,
+                                                              float clippingThresProj,
+                                                              bool isTimeMajor,
+                                                              float quantScale,
+                                                              int quantOffset  = 0)
+{
+
+    std::vector<int32_t> tensorInfo0{};
+    std::vector<int32_t> tensorInfoNumUnits{numUnits};
+    std::vector<int32_t> tensorInfoInputSize{numUnits, inputSize};
+    std::vector<int32_t> tensorInfoOutputSize{numUnits, outputSize};
+
+    std::vector<int32_t> inputShape;
+    std::vector<int32_t> outputShape;
+    if (isTimeMajor)
+    {
+        inputShape  = {timeSize, batchSize, inputSize};
+        outputShape = {timeSize, batchSize, outputSize};
+    }
+    else
+    {
+        inputShape  = {batchSize, timeSize, inputSize};
+        outputShape = {batchSize, timeSize, outputSize};
+    }
+    std::vector<int32_t> outputStateInDimensions{batchSize, outputSize};
+    std::vector<int32_t> cellStateInDimensions{batchSize, numUnits};
+    std::vector<int32_t> projectionWeightDimensions{outputSize, numUnits};
+    std::vector<int32_t> projectionBiasDimensions{outputSize};
+
+    std::vector<int> operatorInputs;
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    std::vector<flatbuffers::Offset<Tensor>> tensors;
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ 1.0f }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ 0 }));
+
+    auto weightQuantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(inputShape.data(),
+                                                                           inputShape.size()),
+                                   ::tflite::TensorType_FLOAT32,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("input_0")));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasInputToInputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToInputWeights.data()),
+                                                        sizeof(T) * inputToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoInputSize.data(),
+                                                                               tensorInfoInputSize.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputToInputWeights"),
+                                       weightQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToForgetWeights.data()),
+                                                    sizeof(T) * inputToForgetWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoInputSize.data(),
+                                                                           tensorInfoInputSize.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToForgetWeights"),
+                                   weightQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToCellWeights.data()),
+                                                    sizeof(T) * inputToCellWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoInputSize.data(),
+                                                                           tensorInfoInputSize.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToCellWeights"),
+                                   weightQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(inputToOutputWeights.data()),
+                                                    sizeof(T) * inputToOutputWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoInputSize.data(),
+                                                                           tensorInfoInputSize.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("inputToOutputWeights"),
+                                   weightQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasRecurrentToInputWeights)
+    {
+        buffers.push_back(CreateBuffer(
+            flatBufferBuilder,
+            flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(recurrentToInputWeights.data()),
+                                           sizeof(T) * recurrentToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoOutputSize.data(),
+                                                                               tensorInfoOutputSize.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("recurrentToInputWeights"),
+                                       weightQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToForgetWeights.data()),
+                                                    sizeof(T) * recurrentToForgetWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoOutputSize.data(),
+                                                                           tensorInfoOutputSize.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("recurrentToForgetWeights"),
+                                   weightQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToCellWeights.data()),
+                                                    sizeof(T) * recurrentToCellWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoOutputSize.data(),
+                                                                           tensorInfoOutputSize.size()),
+                                   tensorType,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("recurrentToCellWeights"),
+                                   weightQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(recurrentToOutputWeights.data()),
+                                                    sizeof(T) * recurrentToOutputWeights.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoOutputSize.data(),
+                                                                           tensorInfoOutputSize.size()),
+                                   tensorType,
+                                   buffers.size() - 1 ,
+                                   flatBufferBuilder.CreateString("recurrentToOutputWeights"),
+                                   weightQuantizationParameters));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasCellToInputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToInputWeights.data()),
+                                                        sizeof(T) * cellToInputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                               tensorInfoNumUnits.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToInputWeights"),
+                                       weightQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellToForgetWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToForgetWeights.data()),
+                                                        sizeof(T) * cellToForgetWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                               tensorInfoNumUnits.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToForgetWeights"),
+                                       weightQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellToOutputWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cellToOutputWeights.data()),
+                                                        sizeof(T) * cellToOutputWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                               tensorInfoNumUnits.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellToOutputWeights"),
+                                       weightQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasInputGateBias)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(inputGateBias.data()),
+                                                        sizeof(float) * inputGateBias.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                               tensorInfoNumUnits.size()),
+                                       ::tflite::TensorType_FLOAT32,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputGateBias")));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(forgetGateBias.data()),
+                                                    sizeof(float) * forgetGateBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                           tensorInfoNumUnits.size()),
+                                   ::tflite::TensorType_FLOAT32,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("forgetGateBias")));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(cellBias.data()),
+                                                    sizeof(float) * cellBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                           tensorInfoNumUnits.size()),
+                                   ::tflite::TensorType_FLOAT32,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("cellBias")));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(
+        CreateBuffer(flatBufferBuilder,
+                     flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(outputGateBias.data()),
+                                                    sizeof(float) * outputGateBias.size())));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                           tensorInfoNumUnits.size()),
+                                   ::tflite::TensorType_FLOAT32,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("outputGateBias")));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasProjectionWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(projectionWeights.data()),
+                                                        sizeof(T) * projectionWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(projectionWeightDimensions.data(),
+                                                                               projectionWeightDimensions.size()),
+                                       tensorType,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("projectionWeights"),
+                                       weightQuantizationParameters));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasProjectionBias)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(projectionBias.data()),
+                                                        sizeof(float) * projectionBias.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(projectionBiasDimensions.data(),
+                                                                               projectionBiasDimensions.size()),
+                                       ::tflite::TensorType_FLOAT32,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("projectionBias")));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(outputStateInDimensions.data(),
+                                                                           outputStateInDimensions.size()),
+                                   ::tflite::TensorType_FLOAT32,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("outputStateInInfo"),
+                                   quantizationParameters,
+                                   true));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(cellStateInDimensions.data(),
+                                                                           cellStateInDimensions.size()),
+                                   ::tflite::TensorType_FLOAT32,
+                                   buffers.size() - 1,
+                                   flatBufferBuilder.CreateString("cellStateInInfo"),
+                                   quantizationParameters,
+                                   true));
+    operatorInputs.push_back(buffers.size() - 1);
+
+    if (hasInputLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                                              reinterpret_cast<const uint8_t *>(inputLayerNormWeights.data()),
+                                              sizeof(float) * inputLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                               tensorInfoNumUnits.size()),
+                                       ::tflite::TensorType_FLOAT32,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("inputLayerNormWeights")));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasForgetLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                                              reinterpret_cast<const uint8_t *>(forgetLayerNormWeights.data()),
+                                              sizeof(float) * forgetLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                               tensorInfoNumUnits.size()),
+                                       ::tflite::TensorType_FLOAT32,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("forgetLayerNormWeights")));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasCellLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t *>(cellLayerNormWeights.data()),
+                                                        sizeof(float) * cellLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                               tensorInfoNumUnits.size()),
+                                       ::tflite::TensorType_FLOAT32,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("cellLayerNormWeights")));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+
+    if (hasOutputLayerNormWeights)
+    {
+        buffers.push_back(
+            CreateBuffer(flatBufferBuilder,
+                         flatBufferBuilder.CreateVector(
+                             reinterpret_cast<const uint8_t *>(outputLayerNormWeights.data()),
+                             sizeof(float) * outputLayerNormWeights.size())));
+        tensors.push_back(CreateTensor(flatBufferBuilder,
+                                       flatBufferBuilder.CreateVector<int32_t>(tensorInfoNumUnits.data(),
+                                                                               tensorInfoNumUnits.size()),
+                                       ::tflite::TensorType_FLOAT32,
+                                       buffers.size() - 1,
+                                       flatBufferBuilder.CreateString("outputLayerNormWeights")));
+        operatorInputs.push_back(buffers.size() - 1);
+    }
+    else
+    {
+        operatorInputs.push_back(kTfLiteOptionalTensor);
+    }
+    int outputBufferId = buffers.size();
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+    tensors.push_back(CreateTensor(flatBufferBuilder,
+                                   flatBufferBuilder.CreateVector<int32_t>(outputShape.data(),
+                                                                           outputShape.size()),
+                                   ::tflite::TensorType_FLOAT32,
+                                   outputBufferId,
+                                   flatBufferBuilder.CreateString("output")));
+    std::vector<int> operatorOutputs;
+    operatorOutputs.push_back(buffers.size() - 1);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateUnidirectionalSequenceLSTMOptions(flatBufferBuilder,
+                          activationFunction,
+                          clippingThresCell,
+                          clippingThresProj,
+                          isTimeMajor).Union();
+
+    flatbuffers::Offset<Operator> lstmOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType, operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       flatBufferBuilder.CreateVector(&lstmOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: UnidirectionalSequenceLSTM Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode =
+        CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void UnidirectionalSequenceLstmTestImpl(std::vector<armnn::BackendId>& backends,
+                                        tflite::TensorType tensorType,
+                                        int32_t batchSize,
+                                        int32_t timeSize,
+                                        int32_t inputSize,
+                                        int32_t outputSize,
+                                        int32_t numUnits,
+                                        bool hasInputToInputWeights,
+                                        const std::vector<T>& inputToInputWeights,
+                                        const std::vector<T>& inputToForgetWeights,
+                                        const std::vector<T>& inputToCellWeights,
+                                        const std::vector<T>& inputToOutputWeights,
+                                        bool hasRecurrentToInputWeights,
+                                        const std::vector<T>& recurrentToInputWeights,
+                                        const std::vector<T>& recurrentToForgetWeights,
+                                        const std::vector<T>& recurrentToCellWeights,
+                                        const std::vector<T>& recurrentToOutputWeights,
+                                        bool hasCellToInputWeights,
+                                        const std::vector<T>& cellToInputWeights,
+                                        bool hasCellToForgetWeights,
+                                        const std::vector<T>& cellToForgetWeights,
+                                        bool hasCellToOutputWeights,
+                                        const std::vector<T>& cellToOutputWeights,
+                                        bool hasInputGateBias,
+                                        const std::vector<float>& inputGateBias,
+                                        const std::vector<float>& forgetGateBias,
+                                        const std::vector<float>& cellBias,
+                                        const std::vector<float>& outputGateBias,
+                                        bool hasProjectionWeights,
+                                        const std::vector<T>& projectionWeights,
+                                        bool hasProjectionBias,
+                                        const std::vector<float>& projectionBias,
+                                        bool hasInputLayerNormWeights,
+                                        const std::vector<float>& inputLayerNormWeights,
+                                        bool hasForgetLayerNormWeights,
+                                        const std::vector<float>& forgetLayerNormWeights,
+                                        bool hasCellLayerNormWeights,
+                                        const std::vector<float>& cellLayerNormWeights,
+                                        bool hasOutputLayerNormWeights,
+                                        const std::vector<float>& outputLayerNormWeights,
+                                        std::vector<float>& inputValues,
+                                        std::vector<float>& expectedOutputValues,
+                                        tflite::ActivationFunctionType activationFunction,
+                                        float clippingThresCell,
+                                        float clippingThresProj,
+                                        bool isTimeMajor,
+                                        float quantScale = 0.1f)
+{
+    using namespace tflite;
+
+    std::vector<char> modelBuffer = CreateUnidirectionalSequenceLstmTfLiteModel(tensorType,
+                                                          batchSize,
+                                                          timeSize,
+                                                          inputSize,
+                                                          outputSize,
+                                                          numUnits,
+                                                          hasInputToInputWeights,
+                                                          inputToInputWeights,
+                                                          inputToForgetWeights,
+                                                          inputToCellWeights,
+                                                          inputToOutputWeights,
+                                                          hasRecurrentToInputWeights,
+                                                          recurrentToInputWeights,
+                                                          recurrentToForgetWeights,
+                                                          recurrentToCellWeights,
+                                                          recurrentToOutputWeights,
+                                                          hasCellToInputWeights,
+                                                          cellToInputWeights,
+                                                          hasCellToForgetWeights,
+                                                          cellToForgetWeights,
+                                                          hasCellToOutputWeights,
+                                                          cellToOutputWeights,
+                                                          hasInputGateBias,
+                                                          inputGateBias,
+                                                          forgetGateBias,
+                                                          cellBias,
+                                                          outputGateBias,
+                                                          hasProjectionWeights,
+                                                          projectionWeights,
+                                                          hasProjectionBias,
+                                                          projectionBias,
+                                                          hasInputLayerNormWeights,
+                                                          inputLayerNormWeights,
+                                                          hasForgetLayerNormWeights,
+                                                          forgetLayerNormWeights,
+                                                          hasCellLayerNormWeights,
+                                                          cellLayerNormWeights,
+                                                          hasOutputLayerNormWeights,
+                                                          outputLayerNormWeights,
+                                                          activationFunction,
+                                                          clippingThresCell,
+                                                          clippingThresProj,
+                                                          isTimeMajor,
+                                                          quantScale);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        tfLiteDelageInputData[i] = inputValues[i];
+    }
+
+    auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateInputId);
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        armnnDelegateInputData[i] = inputValues[i];
+    }
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+
+    if (tensorType == ::tflite::TensorType_INT8)
+    {
+        // Allow 2% tolerance for Quantized weights
+        armnnDelegate::CompareData(expectedOutputValues.data(), armnnDelegateOutputData,
+                                   expectedOutputValues.size(), 2);
+        armnnDelegate::CompareData(expectedOutputValues.data(), tfLiteDelagateOutputData,
+                                   expectedOutputValues.size(), 2);
+        armnnDelegate::CompareData(tfLiteDelagateOutputData, armnnDelegateOutputData,
+                                   expectedOutputValues.size(), 2);
+    }
+    else
+    {
+        armnnDelegate::CompareData(expectedOutputValues.data(), armnnDelegateOutputData, expectedOutputValues.size());
+        armnnDelegate::CompareData(expectedOutputValues.data(), tfLiteDelagateOutputData, expectedOutputValues.size());
+        armnnDelegate::CompareData(tfLiteDelagateOutputData, armnnDelegateOutputData, expectedOutputValues.size());
+    }
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/UnpackTest.cpp b/arch/arm/ARMnn/delegate/src/test/UnpackTest.cpp
new file mode 100644
index 0000000000..c036f649ef
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/UnpackTest.cpp
@@ -0,0 +1,179 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "UnpackTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+template <typename T>
+void UnpackAxis0Num4Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 4, 1, 6 };
+    std::vector<int32_t> expectedOutputShape { 1, 6 };
+
+    std::vector<T> inputValues { 1, 2, 3, 4, 5, 6,
+                                 7, 8, 9, 10, 11, 12,
+                                 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24 };
+
+    std::vector<T> expectedOutputValues0 { 1, 2, 3, 4, 5, 6 };
+    std::vector<T> expectedOutputValues1 { 7, 8, 9, 10, 11, 12 };
+    std::vector<T> expectedOutputValues2 { 13, 14, 15, 16, 17, 18 };
+    std::vector<T> expectedOutputValues3 { 19, 20, 21, 22, 23, 24 };
+
+    std::vector<std::vector<T>> expectedOutputValues{ expectedOutputValues0,
+                                                      expectedOutputValues1,
+                                                      expectedOutputValues2,
+                                                      expectedOutputValues3 };
+
+    UnpackTest<T>(tflite::BuiltinOperator_UNPACK,
+                  tensorType,
+                  backends,
+                  inputShape,
+                  expectedOutputShape,
+                  inputValues,
+                  expectedOutputValues,
+                  0);
+}
+
+template <typename T>
+void UnpackAxis2Num6Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 4, 1, 6 };
+    std::vector<int32_t> expectedOutputShape { 4, 1 };
+
+    std::vector<T> inputValues { 1, 2, 3, 4, 5, 6,
+                                 7, 8, 9, 10, 11, 12,
+                                 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24 };
+
+    std::vector<T> expectedOutputValues0 { 1, 7, 13, 19 };
+    std::vector<T> expectedOutputValues1 { 2, 8, 14, 20 };
+    std::vector<T> expectedOutputValues2 { 3, 9, 15, 21 };
+    std::vector<T> expectedOutputValues3 { 4, 10, 16, 22 };
+    std::vector<T> expectedOutputValues4 { 5, 11, 17, 23 };
+    std::vector<T> expectedOutputValues5 { 6, 12, 18, 24 };
+
+    std::vector<std::vector<T>> expectedOutputValues{ expectedOutputValues0,
+                                                      expectedOutputValues1,
+                                                      expectedOutputValues2,
+                                                      expectedOutputValues3,
+                                                      expectedOutputValues4,
+                                                      expectedOutputValues5 };
+
+    UnpackTest<T>(tflite::BuiltinOperator_UNPACK,
+                  tensorType,
+                  backends,
+                  inputShape,
+                  expectedOutputShape,
+                  inputValues,
+                  expectedOutputValues,
+                  2);
+}
+
+TEST_SUITE("Unpack_CpuRefTests")
+{
+
+// Fp32
+TEST_CASE ("Unpack_Fp32_Axis0_Num4_CpuRef_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+UnpackAxis0Num4Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Unpack_Fp32_Axis2_Num6_CpuRef_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+UnpackAxis2Num6Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Unpack_Uint8_Axis0_Num4_CpuRef_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+UnpackAxis0Num4Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Unpack_Uint8_Axis2_Num6_CpuRef_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+UnpackAxis2Num6Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+} // End of Unpack_CpuRefTests
+
+TEST_SUITE("Unpack_CpuAccTests")
+{
+
+// Fp32
+TEST_CASE ("Unpack_Fp32_Axis0_Num4_CpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+UnpackAxis0Num4Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Unpack_Fp32_Axis2_Num6_CpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+UnpackAxis2Num6Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Unpack_Uint8_Axis0_Num4_CpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+UnpackAxis0Num4Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Unpack_Uint8_Axis2_Num6_CpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+UnpackAxis2Num6Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+} // End of Unpack_CpuAccTests
+
+TEST_SUITE("Unpack_GpuAccTests")
+{
+
+// Fp32
+TEST_CASE ("Unpack_Fp32_Axis0_Num4_GpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+UnpackAxis0Num4Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Unpack_Fp32_Axis2_Num6_GpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+UnpackAxis2Num6Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Unpack_Uint8_Axis0_Num4_GpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+UnpackAxis0Num4Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Unpack_Uint8_Axis2_Num6_GpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+UnpackAxis2Num6Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+} // End of Unpack_GpuAccTests
+
+// End of Unpack Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/arch/arm/ARMnn/delegate/src/test/UnpackTestHelper.hpp b/arch/arm/ARMnn/delegate/src/test/UnpackTestHelper.hpp
new file mode 100644
index 0000000000..848713498f
--- /dev/null
+++ b/arch/arm/ARMnn/delegate/src/test/UnpackTestHelper.hpp
@@ -0,0 +1,185 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreateUnpackTfLiteModel(tflite::BuiltinOperator unpackOperatorCode,
+                                          tflite::TensorType tensorType,
+                                          std::vector<int32_t>& inputTensorShape,
+                                          const std::vector <int32_t>& outputTensorShape,
+                                          const int32_t outputTensorNum,
+                                          unsigned int axis = 0,
+                                          float quantScale = 1.0f,
+                                          int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    const std::vector<int32_t> operatorInputs{ 0 };
+    std::vector<int32_t> operatorOutputs{};
+    const std::vector<int> subgraphInputs{ 0 };
+    std::vector<int> subgraphOutputs{};
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors(outputTensorNum + 1);
+
+    // Create input tensor
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                                                                      tensorType,
+                                                                      0,
+                                                                      flatBufferBuilder.CreateString("input"),
+                                                                      quantizationParameters);
+
+    for (int i = 0; i < outputTensorNum; ++i)
+    {
+        tensors[i + 1] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                          outputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("output" + std::to_string(i)),
+                                  quantizationParameters);
+
+        operatorOutputs.push_back(i + 1);
+        subgraphOutputs.push_back(i + 1);
+    }
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_UnpackOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateUnpackOptions(flatBufferBuilder, outputTensorNum, axis).Union();
+
+    flatbuffers::Offset <Operator> unpackOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&unpackOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Unpack Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, unpackOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void UnpackTest(tflite::BuiltinOperator unpackOperatorCode,
+              tflite::TensorType tensorType,
+              std::vector<armnn::BackendId>& backends,
+              std::vector<int32_t>& inputShape,
+              std::vector<int32_t>& expectedOutputShape,
+              std::vector<T>& inputValues,
+              std::vector<std::vector<T>>& expectedOutputValues,
+              unsigned int axis = 0,
+              float quantScale = 1.0f,
+              int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateUnpackTfLiteModel(unpackOperatorCode,
+                                                            tensorType,
+                                                            inputShape,
+                                                            expectedOutputShape,
+                                                            expectedOutputValues.size(),
+                                                            axis,
+                                                            quantScale,
+                                                            quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                                    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    for (unsigned int i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                            armnnDelegateInterpreter,
+                                            expectedOutputShape,
+                                            expectedOutputValues[i],
+                                            i);
+    }
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/docker/README.md b/arch/arm/ARMnn/docker/README.md
new file mode 100644
index 0000000000..beb201836d
--- /dev/null
+++ b/arch/arm/ARMnn/docker/README.md
@@ -0,0 +1,34 @@
+# ARMNN Docker Files
+
+
+## [Android NDK to build ArmNN](https://github.com/ARM-software/armnn/blob/branches/armnn_20_02/BuildGuideAndroidNDK.md):</br>
+
+<b>armnn-android</b> folder has the docker file to build a Android NDK container to build ARMNN.
+
+## [ArmNN on x86_64 for arm64](https://github.com/ARM-software/armnn/blob/branches/armnn_20_02/BuildGuideCrossCompilation.md)
+
+<b>x86_64</b> folder has the docker file to build ArmNN under an x86_64 system to target an Arm64 system.
+
+# To build a docker images
+```bash
+docker build --rm --build-arg proxy=$http_proxy --rm --tag armnn:v1 .
+```
+
+# To Run docker images
+```bash
+docker run -v /etc/localtime:/etc/localtime:ro --rm -it -e http_proxy -e https_proxy -e ftp_proxy -v `pwd`:/work armnn:v1 bash
+```
+
+# To run a docker with X11 support for GUI Application
+```bash
+docker run -v /etc/localtime:/etc/localtime:ro --rm -it -e DISPLAY=$DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix -e http_proxy -e https_proxy -e ftp_proxy -v `pwd`:/work armnn:v1 bash
+```
+
+# To mount the camera and access camera from docker env
+```bash
+docker run -v /etc/localtime:/etc/localtime:ro --rm -it --device /dev/video0 -e http_proxy -e https_proxy -e ftp_proxy -v `pwd`:/work armnn:v1 bash
+```
+
+
+
+
diff --git a/arch/arm/ARMnn/docker/armnn-android/Dockerfile b/arch/arm/ARMnn/docker/armnn-android/Dockerfile
new file mode 100644
index 0000000000..a2f753ffed
--- /dev/null
+++ b/arch/arm/ARMnn/docker/armnn-android/Dockerfile
@@ -0,0 +1,164 @@
+FROM ubuntu:18.04
+ENV TERM linux
+ENV DEBIAN_FRONTEND noninteractive
+
+# Forward system proxy setting
+# ARG proxy
+# ENV http_proxy $proxy
+# ENV https_proxy $proxy
+
+# Basic apt update
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends locales ca-certificates &&  rm -rf /var/lib/apt/lists/*
+
+# Set the locale to en_US.UTF-8, because the Yocto build fails without any locale set.
+RUN locale-gen en_US.UTF-8 && update-locale LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LC_ALL en_US.UTF-8
+
+# Again, off the certificare
+RUN echo "check_certificate = off" >> ~/.wgetrc
+RUN echo "[global] \n\
+trusted-host = pypi.python.org \n \
+\t               pypi.org \n \
+\t              files.pythonhosted.org" >> /etc/pip.conf
+
+# Get basic packages
+RUN apt-get update && apt-get install -y \
+    apparmor \
+    aufs-tools \
+    automake \
+    bash-completion \
+    btrfs-tools \
+    build-essential \
+    cmake \
+    createrepo \
+    curl \
+    dpkg-sig \
+    g++ \
+    gcc \
+    git \
+    iptables \
+    jq \
+    libapparmor-dev \
+    libc6-dev \
+    libcap-dev \
+    libsystemd-dev \
+    libyaml-dev \
+    mercurial \
+    net-tools \
+    parallel \
+    pkg-config \
+    python-dev \
+    python-mock \
+    python-pip \
+    python-setuptools \
+    python-websocket \
+    golang-go \
+    iproute2 \
+    iputils-ping \
+    vim-common \
+    vim \
+    wget \
+    libtool \
+    unzip \
+    scons \
+    curl \
+    autoconf \
+    libtool \
+    build-essential \
+    g++ \ 
+    cmake && rm -rf /var/lib/apt/lists/*
+
+# Download the Android NDK and make a standalone toolchain
+RUN mkdir -p /home/armnn-devenv/toolchains && \
+    cd /home/armnn-devenv/toolchains && \
+    wget https://dl.google.com/android/repository/android-ndk-r17b-linux-x86_64.zip && \
+    unzip android-ndk-r17b-linux-x86_64.zip
+
+ENV NDK /home/armnn-devenv/toolchains/android-ndk-r17b 
+
+RUN $NDK/build/tools/make_standalone_toolchain.py \
+    --arch arm64 \
+    --api 26 \
+    --stl=libc++ \
+    --install-dir=/home/armnn-devenv/toolchains/aarch64-android-r17b
+
+ENV PATH=/home/armnn-devenv/toolchains/aarch64-android-r17b/bin:$PATH
+
+# Build the Boost C++ libraries
+RUN mkdir /home/armnn-devenv/boost && \
+    cd /home/armnn-devenv/boost && \
+    wget https://dl.bintray.com/boostorg/release/1.64.0/source/boost_1_64_0.tar.bz2 && \
+    tar xvf boost_1_64_0.tar.bz2
+
+RUN echo "using gcc : arm : aarch64-linux-android-clang++ ;" > /home/armnn-devenv/boost/user-config.jam && \
+    cd /home/armnn-devenv/boost/boost_1_64_0 && \
+    ./bootstrap.sh --prefix=/home/armnn-devenv/boost/install && \
+    ./b2 install --user-config=/home/armnn-devenv/boost/user-config.jam \
+    toolset=gcc-arm link=static cxxflags=-fPIC --with-filesystem \
+    --with-test --with-log --with-program_options -j8
+
+# Build the Compute Library
+RUN cd /home/armnn-devenv && \
+    git clone https://github.com/ARM-software/ComputeLibrary.git && \
+    cd ComputeLibrary && \
+    scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" \
+    benchmark_tests=0 validation_tests=0 os=android -j8
+
+# RUN mkdir /home/armnn-devenv/google && \
+RUN mkdir -p /home/armnn-devenv/google && \
+    cd /home/armnn-devenv/google && \
+    git clone https://github.com/google/protobuf.git && \
+    cd protobuf && \
+    git checkout -b v3.5.2 v3.5.2 && \
+    ./autogen.sh && \
+    mkdir x86_build && \
+    cd x86_build && \
+    ../configure --prefix=/home/armnn-devenv/google/x86_pb_install && \
+    make install -j8
+    
+RUN cd /home/armnn-devenv/google/protobuf && \
+    mkdir arm64_build && cd arm64_build && \
+    CC=aarch64-linux-android-clang \
+    CXX=aarch64-linux-android-clang++ \
+    CFLAGS="-fPIE -fPIC" LDFLAGS="-pie -llog" \
+    ../configure --host=aarch64-linux-android \
+    --prefix=/home/armnn-devenv/google/arm64_pb_install \
+    --with-protoc=/home/armnn-devenv/google/x86_pb_install/bin/protoc && \
+    make install -j8
+
+# clone Tensorflow
+RUN cd /home/armnn-devenv/google/ && \
+    git clone https://github.com/tensorflow/tensorflow.git 
+
+# Clone ARMNN
+RUN cd /home/armnn-devenv/ && \
+    git clone https://github.com/ARM-software/armnn.git 
+
+# Generate TensorFlow protobuf definitions
+RUN cd /home/armnn-devenv/google/tensorflow && \
+    git checkout a0043f9262dc1b0e7dc4bdf3a7f0ef0bebc4891e && \
+    /home/armnn-devenv/armnn/scripts/generate_tensorflow_protobuf.sh \
+    /home/armnn-devenv/google/tf_pb /home/armnn-devenv/google/x86_pb_install
+
+ENV PATH=/home/armnn-devenv/toolchains/android-ndk-r17b:$PATH
+# Build Google Flatbuffers for ARMNN TFLite Parser
+RUN cd /home/armnn-devenv/ && \
+    git clone https://github.com/google/flatbuffers.git && \
+    cd flatbuffers && \
+    cd android && cd jni && \
+    rm -rf Application.mk && \
+    echo "APP_STL := c++_static" >> Application.mk && \
+    echo "NDK_TOOLCHAIN_VERSION" := clang >> Application.mk && \
+    echo "APP_CPPFLAGS :=-std=c++11" >> Application.mk && \
+    echo "APP_ABI := arm64-v8a" >> Application.mk && \
+    echo "APP_PLATFORM := android-23" >> Application.mk && \
+    echo "NDK_PLATFORM=android-23" >> Application.mk && \
+    cd ../ && ndk-build -B
+
+COPY ./docker-entrypoint.sh /usr/bin
+RUN chmod +x /usr/bin/docker-entrypoint.sh
+ENTRYPOINT ["/usr/bin/docker-entrypoint.sh"]
+
+#To do:
+# 1. Flatbuffers build Application.mk hardcode value need to fix.
\ No newline at end of file
diff --git a/arch/arm/ARMnn/docker/armnn-android/docker-entrypoint.sh b/arch/arm/ARMnn/docker/armnn-android/docker-entrypoint.sh
new file mode 100644
index 0000000000..3a8258937b
--- /dev/null
+++ b/arch/arm/ARMnn/docker/armnn-android/docker-entrypoint.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+set -e
+
+dArmNN=/work
+dComputeLib=/home/armnn-devenv/ComputeLibrary
+dTensorflow=/home/armnn-devenv/google/tensorflow
+dFlatBuffer=/home/armnn-devenv/flatbuffers
+
+#Function to build ARMNN
+function buildArmNN()
+{
+    mkdir -p ${dArmNN}/armnn-devenv && cd ${dArmNN}/armnn-devenv
+    git clone https://github.com/ARM-software/armnn.git && cd armnn/
+    mkdir build && cd build
+    CXX=aarch64-linux-android-clang++ \
+    CC=aarch64-linux-android-clang \
+    CXX_FLAGS="-fPIE -fPIC" \
+    cmake .. \
+    -DCMAKE_SYSTEM_NAME=Android \
+    -DCMAKE_ANDROID_ARCH_ABI=arm64-v8a \
+    -DCMAKE_ANDROID_STANDALONE_TOOLCHAIN=/home/armnn-devenv/toolchains/aarch64-android-r17b/ \
+    -DCMAKE_EXE_LINKER_FLAGS="-pie -llog" \
+    -DARMCOMPUTE_ROOT=/home/armnn-devenv/ComputeLibrary/ \
+    -DARMCOMPUTE_BUILD_DIR=/home/armnn-devenv/ComputeLibrary/build \
+    -DBOOST_ROOT=/home/armnn-devenv/boost/install/ \
+    -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1 -DARMNNREF=1 \
+    -DTF_GENERATED_SOURCES=/home/armnn-devenv/google/tf_pb/ -DBUILD_TF_PARSER=1 \
+    -DPROTOBUF_ROOT=/home/armnn-devenv/google/arm64_pb_install/
+    make -j8
+}
+
+# Function to update Compute Lib
+function updateComputeLib()
+{
+    pushd ${dComputeLib}
+    git pull 
+    scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" \
+    benchmark_tests=0 validation_tests=0 os=android -j8
+    echo "Compute Lib updated"
+    popd
+}
+
+# Function to update FlatBuffer Lib
+function updateFlatBuffer()
+{
+    pushd ${dFlatBuffer}
+    git pull 
+    cd android && cd jni && \
+    rm -rf Application.mk && \
+    echo "APP_STL := c++_static" >> Application.mk && \
+    echo "NDK_TOOLCHAIN_VERSION" := clang >> Application.mk && \
+    echo "APP_CPPFLAGS :=-std=c++11" >> Application.mk && \
+    echo "APP_ABI := arm64-v8a" >> Application.mk && \
+    echo "APP_PLATFORM := android-23" >> Application.mk && \
+    echo "NDK_PLATFORM=android-23" >> Application.mk && \
+    cd ../ && ndk-build -B
+    echo "Compute Lib updated"
+    popd 
+}
+
+# Main
+if [ ! -d "/work/armnn-devenv/armnn/" ];
+then 
+    buildArmNN
+fi
+
+# Check Compute Library changes from repo
+cd ${dComputeLib}
+if [ $(git rev-parse HEAD) = $(git ls-remote $(git rev-parse --abbrev-ref @{u} | 
+sed 's/\// /g') | cut -f1) ]
+then
+    echo "Compute Lib Up-to-date"
+else 
+    echo "New changes are availble for Compute Library repo."
+    echo "Do you wanna update (y/n)?"
+    read answer
+    if [ "$answer" != "${answer#[Yy]}" ] ;then
+        updateComputeLib
+    fi
+fi 
+
+# Check Tensorflow changes from repo
+cd ${dTensorflow}
+if [ $(git rev-parse HEAD) = $(git ls-remote $(git rev-parse --abbrev-ref @{u} | 
+sed 's/\// /g') | cut -f1) ]
+then
+    echo "Tensrflow Lib Up-to-date"
+else 
+    echo "Tensrflow Lib Not Up-to-date"
+    echo "Skipping for now. Issue: #267"
+    #echo "New changes are availble for Compute Library repo."
+    #echo "Do you wanna update (y/n)?"
+    #read answer
+    #if [ "$answer" != "${answer#[Yy]}" ] ;then
+    # 
+    #fi
+fi 
+
+# Check FlatBuffer changes from repo
+cd ${dFlatBuffer}
+if [ $(git rev-parse HEAD) = $(git ls-remote $(git rev-parse --abbrev-ref @{u} | 
+sed 's/\// /g') | cut -f1) ]
+then
+    echo "FlatBuffer Up-to-date"
+else 
+    echo "FlatBuffer Not Up-to-date"
+    echo "New changes are availble for Compute Library repo."
+    echo "Do you wanna update (y/n)?"
+    read answer
+    if [ "$answer" != "${answer#[Yy]}" ] ;then
+        updateFlatBuffer
+    fi
+fi
+
+exec "$@"
\ No newline at end of file
diff --git a/arch/arm/ARMnn/docker/x86_64/Dockerfile b/arch/arm/ARMnn/docker/x86_64/Dockerfile
new file mode 100644
index 0000000000..769e716024
--- /dev/null
+++ b/arch/arm/ARMnn/docker/x86_64/Dockerfile
@@ -0,0 +1,213 @@
+FROM ubuntu:18.04
+ENV TERM linux
+ENV DEBIAN_FRONTEND noninteractive
+
+# Forward system proxy setting
+# ARG proxy
+# ENV http_proxy $proxy
+# ENV https_proxy $proxy
+
+# Basic apt update
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends locales ca-certificates &&  rm -rf /var/lib/apt/lists/*
+
+# Set the locale to en_US.UTF-8, because the Yocto build fails without any locale set.
+RUN locale-gen en_US.UTF-8 && update-locale LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LC_ALL en_US.UTF-8
+
+# Again, off the certificare
+RUN echo "check_certificate = off" >> ~/.wgetrc
+RUN echo "[global] \n\
+trusted-host = pypi.python.org \n \
+\t               pypi.org \n \
+\t              files.pythonhosted.org" >> /etc/pip.conf
+
+# Get basic packages
+RUN apt-get update && apt-get install -y \
+    apparmor \
+    aufs-tools \
+    automake \
+    bash-completion \
+    btrfs-tools \
+    build-essential \
+    cmake \
+    createrepo \
+    curl \
+    dpkg-sig \
+    g++ \
+    gcc \
+    git \
+    iptables \
+    jq \
+    libapparmor-dev \
+    libc6-dev \
+    libcap-dev \
+    libsystemd-dev \
+    libyaml-dev \
+    mercurial \
+    net-tools \
+    parallel \
+    pkg-config \
+    python-dev \
+    python-mock \
+    python-pip \
+    python-setuptools \
+    python-websocket \
+    golang-go \
+    iproute2 \
+    iputils-ping \
+    vim-common \
+    vim \
+    wget \
+    libtool \
+    unzip \
+    scons \
+    curl \
+    autoconf \
+    libtool \
+    build-essential \
+    libssl-dev \
+    g++ && rm -rf /var/lib/apt/lists/*
+
+# Install Cross-compiling ToolChain
+RUN apt-get update && apt-get install -y crossbuild-essential-arm64
+
+# Install Cmake 3.19
+RUN cd $HOME && \
+    wget -O cmake-3.19.0.tar.gz https://cmake.org/files/v3.19/cmake-3.19.0.tar.gz && \
+    tar -xzf cmake-3.19.0.tar.gz && \
+    cd $HOME/cmake-3.19.0 && \
+    ./bootstrap && \
+    make && \
+    make install
+
+# Build and install Google's Protobuf library
+# Download and Extract
+RUN mkdir -p $HOME/google && \
+    cd $HOME/google && \
+    wget https://github.com/protocolbuffers/protobuf/releases/download/v3.12.0/protobuf-all-3.12.0.tar.gz && \
+    tar -zxvf protobuf-all-3.12.0.tar.gz
+
+# Build a native (x86_64) version
+RUN cd $HOME/google/protobuf-3.12.0 && \
+    mkdir x86_build && cd x86_build && \
+    ../configure --prefix=$HOME/armnn-devenv/google/x86_64_pb_install && \
+    make install -j$(nproc)
+
+# Build the arm64 version of the protobuf libraries
+RUN cd $HOME/google/protobuf-3.12.0 && \
+    mkdir arm64_build && cd arm64_build && \
+    export CC=aarch64-linux-gnu-gcc && \
+    export CXX=aarch64-linux-gnu-g++ && \
+    ../configure --host=aarch64-linux \
+    --prefix=$HOME/armnn-devenv/google/arm64_pb_install \
+    --with-protoc=$HOME/armnn-devenv/google/x86_64_pb_install/bin/protoc && \
+    make install -j$(nproc)
+
+# Dep Error - Bug ARMNN 
+RUN apt-get update && apt-get install -y \
+     python-numpy
+
+# Setup Env
+# ENV PATH=$HOME/armnn-devenv/google/x86_64_pb_install/bin/:$PATH
+# ENV LD_LIBRARY_PATH=$HOME/armnn-devenv/google/x86_64_pb_install/lib/:$LD_LIBRARY_PATH
+
+# Download ArmNN
+RUN cd $HOME/armnn-devenv && git clone "https://review.mlplatform.org/ml/armnn" && \
+    cd armnn && git checkout master
+
+# Build Compute Library
+RUN cd $HOME/armnn-devenv/ && git clone https://review.mlplatform.org/ml/ComputeLibrary && \
+    cd ComputeLibrary && \
+    git checkout $($HOME/armnn-devenv/armnn/scripts/get_compute_library.sh -p) && \
+    scons Werror=0 arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j$(nproc)
+
+# Download Tensorflow (Checkout latest tested version of TF using get_tensorflow.sh)
+RUN cd $HOME/armnn-devenv && \
+    git clone https://github.com/tensorflow/tensorflow.git && \
+    cd tensorflow && \
+    git checkout $($HOME/armnn-devenv/armnn/scripts/get_tensorflow.sh -p)
+
+# Build TF Lite
+RUN cd $HOME/armnn-devenv && \
+    curl -LO https://storage.googleapis.com/mirror.tensorflow.org/developer.arm.com/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz && \
+    mkdir tflite-toolchains && \
+    tar xvf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C tflite-toolchains && \
+    mkdir -p tflite/build && \
+    cd tflite/build && \
+    ARMCC_PREFIX=$HOME/armnn-devenv/tflite-toolchains/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/bin/aarch64-linux-gnu- && \
+    ARMCC_FLAGS="-funsafe-math-optimizations" && \
+    cmake -DCMAKE_C_COMPILER=${ARMCC_PREFIX}gcc \
+          -DCMAKE_CXX_COMPILER=${ARMCC_PREFIX}g++ \
+          -DCMAKE_C_FLAGS="${ARMCC_FLAGS}" -DCMAKE_CXX_FLAGS="${ARMCC_FLAGS}" \
+          -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON  -DCMAKE_SYSTEM_NAME=Linux \
+          -DTFLITE_ENABLE_XNNPACK=OFF \
+          -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
+          $HOME/armnn-devenv/tensorflow/tensorflow/lite/ && \
+    cmake --build .
+
+# Download Flatbuffers
+RUN cd $HOME/armnn-devenv && \
+    wget -O flatbuffers-1.12.0.tar.gz https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz && \
+    tar xf flatbuffers-1.12.0.tar.gz && \
+    cd flatbuffers-1.12.0 && \
+    rm -f CMakeCache.txt
+
+# Build native x86_64 version of Flatbuffers
+RUN cd $HOME/armnn-devenv && cd flatbuffers-1.12.0 && \
+    mkdir build && \
+    cd build && \
+    cmake .. -DFLATBUFFERS_BUILD_FLATC=1 \
+         -DCMAKE_INSTALL_PREFIX:PATH=$HOME/armnn-devenv/flatbuffers \
+         -DFLATBUFFERS_BUILD_TESTS=0 && \
+    make all install -j$(nproc)
+
+# Build arm64 version of Flatbuffers
+RUN cd $HOME/armnn-devenv&& cd flatbuffers-1.12.0  && \
+    mkdir build-arm64 && \
+    cd build-arm64 && \
+    CXXFLAGS="-fPIC" cmake .. -DCMAKE_C_COMPILER=/usr/bin/aarch64-linux-gnu-gcc \
+         -DCMAKE_CXX_COMPILER=/usr/bin/aarch64-linux-gnu-g++ \
+         -DFLATBUFFERS_BUILD_FLATC=1 \
+         -DCMAKE_INSTALL_PREFIX:PATH=$HOME/armnn-devenv/flatbuffers-arm64 \
+         -DFLATBUFFERS_BUILD_TESTS=0 && \
+    make all install -j$(nproc)
+
+# Build onnx
+RUN cd $HOME/armnn-devenv && git clone https://github.com/onnx/onnx.git && \
+    cd onnx && \
+    git fetch https://github.com/onnx/onnx.git 553df22c67bee5f0fe6599cff60f1afc6748c635 && git checkout FETCH_HEAD && \
+    LD_LIBRARY_PATH=$HOME/armnn-devenv/google/x86_64_pb_install/lib:$LD_LIBRARY_PATH \
+    $HOME/armnn-devenv/google/x86_64_pb_install/bin/protoc \
+    onnx/onnx.proto --proto_path=. --proto_path=../google/x86_64_pb_install/include --cpp_out $HOME/armnn-devenv/onnx
+
+# Generate TF Lite Schema
+RUN cd $HOME/armnn-devenv && \
+    cd tflite && \
+    cp ../tensorflow/tensorflow/lite/schema/schema.fbs . && \
+    ../flatbuffers-1.12.0/build/flatc -c --gen-object-api --reflect-types --reflect-names schema.fbs
+
+# Build ArmNN
+RUN cd $HOME/armnn-devenv && \
+    cd armnn && mkdir build && cd build && \
+    export CXX=aarch64-linux-gnu-g++ && \
+    export CC=aarch64-linux-gnu-gcc && \
+    cmake .. \
+    -DCMAKE_CXX_FLAGS=-w \
+    -DBUILD_TESTS=1 \
+    -DARMCOMPUTE_ROOT=$HOME/armnn-devenv/ComputeLibrary \
+    -DARMCOMPUTE_BUILD_DIR=$HOME/armnn-devenv/ComputeLibrary/build/ \
+    -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1 -DARMNNREF=1 \
+    -DONNX_GENERATED_SOURCES=$HOME/armnn-devenv/onnx \
+    -DBUILD_ONNX_PARSER=1 \
+    -DBUILD_TF_LITE_PARSER=1 \
+    -DBUILD_ARMNN_TFLITE_DELEGATE=1 \
+    -DTENSORFLOW_ROOT=$HOME/armnn-devenv/tensorflow \
+    -DTFLITE_LIB_ROOT=$HOME/armnn-devenv/tflite/build \
+    -DTF_LITE_SCHEMA_INCLUDE_PATH=$HOME/armnn-devenv/tflite \
+    -DFLATBUFFERS_ROOT=$HOME/armnn-devenv/flatbuffers-arm64 \
+    -DFLATC_DIR=$HOME/armnn-devenv/flatbuffers-1.12.0/build \
+    -DPROTOBUF_ROOT=$HOME/armnn-devenv/google/x86_64_pb_install \
+    -DPROTOBUF_LIBRARY_DEBUG=$HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so.23.0.0 \
+    -DPROTOBUF_LIBRARY_RELEASE=$HOME/armnn-devenv/google/arm64_pb_install/lib/libprotobuf.so.23.0.0 && \
+    make -j$(nproc)
diff --git a/arch/arm/ARMnn/docs/01_00_quick_start.dox b/arch/arm/ARMnn/docs/01_00_quick_start.dox
new file mode 100644
index 0000000000..a2a1efa50d
--- /dev/null
+++ b/arch/arm/ARMnn/docs/01_00_quick_start.dox
@@ -0,0 +1,25 @@
+/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved.
+///
+/// SPDX-License-Identifier: MIT
+///
+
+namespace armnn
+{
+/**
+@page quickstart Quick Start Guide
+
+On this page you can find guides which allow you to get setup and ready to run models with Arm NN quickly.
+These guides rely on using the apt packages or prebuilt binaries from our release notes. We only have added a quick start guide that uses TfLite-Delegate.
+More guides will be added in the future.
+
+ - @subpage md_delegate_DelegateQuickStartGuide
+**/
+}
+
+namespace armnn
+{
+/**
+
+@page md_delegate_DelegateQuickStartGuide TfLite Delegate Quick Start Guide
+**/
+}
diff --git a/arch/arm/ARMnn/docs/02_operator_list.dox b/arch/arm/ARMnn/docs/02_operator_list.dox
new file mode 100644
index 0000000000..e1eec58e4e
--- /dev/null
+++ b/arch/arm/ARMnn/docs/02_operator_list.dox
@@ -0,0 +1,3370 @@
+/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved.
+///
+/// SPDX-License-Identifier: MIT
+///
+
+namespace armnn
+{
+/**
+@page operator_list Arm NN Operators
+
+@tableofcontents
+
+@section S5_1_operator_list Arm NN Operators
+
+Arm NN supports operators that are listed in below table.
+
+Arm NN supports a wide list of data-types.
+The main data-types that the Machine Learning functions support are the following:
+  <ul>
+    <li><b>BFLOAT16:</b> 16-bit non-standard brain floating point
+    <li><b>QASYMMU8:</b> 8-bit unsigned asymmetric quantized
+    <li><b>QASYMMS8:</b> 8-bit signed asymmetric quantized
+    <li><b>QUANTIZEDSYMM8PERAXIS:</b> 8-bit signed symmetric quantized
+    <li><b>QSYMMS8:</b> 8-bit unsigned symmetric quantized
+    <li><b>QSYMMS16:</b> 16-bit unsigned symmetric quantized
+    <li><b>FLOAT32:</b> 32-bit single precision floating point
+    <li><b>FLOAT16:</b> 16-bit half precision floating point
+    <li><b>SIGNED32:</b> 32-bit signed integer
+    <li><b>BOOLEAN:</b> 8-bit unsigned char
+    <li><b>All:</b> Agnostic to any specific data type
+  </ul>
+
+Arm NN supports the following data layouts (fast changing dimension from right to left):
+  <ul>
+    <li><b>NHWC:</b> Layout where channels are in the fastest changing dimension
+    <li><b>NCHW:</b> Layout where width is in the fastest changing dimension
+    <li><b>All:</b> Agnostic to any specific data layout
+  </ul>
+where N = batches, C = channels, H = height, W = width
+
+<table>
+<caption id="multi_row"></caption>
+<tr>
+  <th>Operator
+  <th>Description
+  <th>Equivalent Android NNAPI Operator
+  <th>Backends
+  <th>Data Layouts
+  <th>Data Types
+<tr>
+  <td rowspan="3">AbsLayer
+  <td rowspan="3"> Layer to perform absolute operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_ABS
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+       <tr><th>
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>SIGNED32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+       <tr><th>
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ActivationLayer
+  <td rowspan="3" style="width:200px;"> Layer to simulate an activation layer with the specified activation function.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_ABS
+       <li>ANEURALNETWORKS_ELU
+       <li>ANEURALNETWORKS_HARD_SWISH
+       <li>ANEURALNETWORKS_LOGISTIC
+       <li>ANEURALNETWORKS_PRELU
+       <li>ANEURALNETWORKS_RELU
+       <li>ANEURALNETWORKS_RELU1
+       <li>ANEURALNETWORKS_RELU6
+       <li>ANEURALNETWORKS_SQRT
+       <li>ANEURALNETWORKS_TANH
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+    <ul>
+      <li>All
+    </ul>
+  <td>
+    <table>
+      <tr><th>
+      <tr><td>QASYMMU8
+      <tr><td>QASYMMS8
+      <tr><td>QSYMMS16
+      <tr><td>FLOAT16
+      <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+      <tr><th>
+      <tr><td>QASYMMU8
+      <tr><td>QASYMMS8
+      <tr><td>QSYMMS16
+      <tr><td>FLOAT16
+      <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">AdditionLayer
+  <td rowspan="3" style="width:200px;"> Layer to add 2 tensors.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_ADD
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ArgMinMaxLayer
+  <td rowspan="3" style="width:200px;"> Layer to calculate the index of the minimum or maximum values in a tensor
+  based on an axis.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_ARGMAX
+       <li>ANEURALNETWORKS_ARGMIN
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+       <tr><td>SIGNED64
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">BatchNormalizationLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform batch normalization.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT32
+    <tr><td>FLOAT16
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT32
+    <tr><td>FLOAT16
+    </table>
+<tr>
+  <td rowspan="3">BatchToSpaceNdLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform a batch to space transformation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_BATCH_TO_SPACE_ND
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">CastLayer
+  <td rowspan="3" style="width:200px;"> Layer to cast a tensor to a type.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_CAST
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QSYMMS8
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMS8
+    <tr><td>QASYMMU8
+    <tr><td>FLOAT16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMS8
+    <tr><td>QASYMMU8
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ChannelShuffleLayer
+  <td rowspan="3" style="width:200px;"> Layer to reorganize the channels of a tensor.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_CHANNEL_SHUFFLE
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QSYMMS8
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMS8
+    <tr><td>QASYMMU8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMS8
+    <tr><td>QASYMMU8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ComparisonLayer
+  <td rowspan="3" style="width:200px;"> Layer to compare 2 tensors.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_EQUAL
+       <li>ANEURALNETWORKS_GREATER
+       <li>ANEURALNETWORKS_GREATER_EQUAL
+       <li>ANEURALNETWORKS_LESS
+       <li>ANEURALNETWORKS_LESS_EQUAL
+       <li>ANEURALNETWORKS_NOT_EQUAL
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>BOOLEAN
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+   <td>CpuAcc
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>All
+      </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">ConcatLayer
+  <td rowspan="3" style="width:200px;"> Layer to concatenate tensors along a given axis.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_CONCATENATION
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ConstantLayer
+  <td rowspan="3" style="width:200px;"> Layer to provide a constant tensor.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">ConvertBf16ToFp32Layer
+  <td rowspan="3" style="width:200px;"> Layer to convert BFloat16 tensor to Float32 tensor.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>BFLOAT16
+         <tr><td>FLOAT32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+         <tr><th>
+         <tr><td>BFLOAT16
+         <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+         <tr><th>
+         <tr><td>BFLOAT16
+         <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ConvertFp16ToFp32Layer
+  <td rowspan="3" style="width:200px;"> Layer to convert Float16 tensor to Float32 tensor.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>FLOAT16
+         <tr><td>FLOAT32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+         <tr><th>
+         <tr><td>FLOAT16
+         <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+         <tr><th>
+         <tr><td>FLOAT16
+         <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ConvertFp32ToBf16Layer
+  <td rowspan="3" style="width:200px;"> Layer to convert Float32 tensor to BFloat16 tensor.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>BFLOAT16
+         <tr><td>FLOAT32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+         <tr><th>
+         <tr><td>BFLOAT16
+         <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+         <tr><th>
+         <tr><td>BFLOAT16
+         <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ConvertFp32ToFp16Layer
+  <td rowspan="3" style="width:200px;"> Layer to convert Float32 tensor to Float16 tensor.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>FLOAT16
+         <tr><td>FLOAT32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+         <tr><th>
+         <tr><td>FLOAT16
+         <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+         <tr><th>
+         <tr><td>FLOAT16
+         <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">Convolution2dLayer
+  <td rowspan="3" style="width:200px;"> Layer to compute a convolution operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_CONV_2D
+       <li>ANEURALNETWORKS_GROUPED_CONV_2D
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QUANTIZEDSYMM8PERAXIS
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QUANTIZEDSYMM8PERAXIS
+    </table>
+<tr>
+  <td rowspan="3">Convolution3dLayer
+  <td rowspan="3" style="width:200px;"> Layer to compute a 3D convolution operation.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>NDHWC
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>N/A
+      </ul>
+  <td>
+      <ul>
+       <li>N/A
+      </ul>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>N/A
+      </ul>
+  <td>
+      <ul>
+       <li>N/A
+      </ul>
+<tr>
+  <td rowspan="1">DebugLayer
+  <td rowspan="1" style="width:200px;"> Layer to print out inter layer tensor information.
+  <td rowspan="1">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td rowspan="3">DepthToSpaceLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform Depth to Space transformation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_DEPTH_TO_SPACE
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">DepthwiseConvolution2dLayer
+  <td rowspan="3" style="width:200px;"> Layer to compute a deconvolution or transpose convolution.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_DEPTHWISE_CONV_2D
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QUANTIZEDSYMM8PERAXIS
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QUANTIZEDSYMM8PERAXIS
+    </table>
+<tr>
+  <td rowspan="3">DequantizeLayer
+  <td rowspan="3" style="width:200px;"> Layer to dequantize the values in a tensor.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_DEQUANTIZE
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QUANTIZEDSYMM8PERAXIS
+    <tr><td>QSYMMS8
+    <tr><td>QSYMMS16
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QUANTIZEDSYMM8PERAXIS
+    <tr><td>QSYMMS8
+    <tr><td>QSYMMS16
+    </table>
+<tr>
+  <td rowspan="2">DetectionPostProcessLayer
+  <td rowspan="2" style="width:200px;"> Layer to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS).
+  <td rowspan="2">
+      <ul>
+       <li>ANEURALNETWORKS_DETECTION_POSTPROCESSING
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">DivisionLayer
+  <td rowspan="3" style="width:200px;"> Layer to divide 2 tensors.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_DIV
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ElementwiseBaseLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform Add - Div - Max - Min - Mul operations.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_ADD
+       <li>ANEURALNETWORKS_DIV
+       <li>ANEURALNETWORKS_MAXIMUM
+       <li>ANEURALNETWORKS_MINIMUM
+       <li>ANEURALNETWORKS_MUL
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ElementwiseUnaryLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform Rsqrt - Exp - Neg - Log - Abs - Sin - Sqrt operations.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_ABS
+       <li>ANEURALNETWORKS_EXP
+       <li>ANEURALNETWORKS_LOG
+       <li>ANEURALNETWORKS_NEG
+       <li>ANEURALNETWORKS_RSQRT
+       <li>ANEURALNETWORKS_SIN
+       <li>ANEURALNETWORKS_SQRT
+      </ul>
+  <td>CpuRef
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+    </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="1">FakeQuantizationLayer
+  <td rowspan="1" style="width:200px;"> Layer to quantize float values and dequantize afterwards. The current implementation does not dequantize the values.
+  <td rowspan="1">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>FLOAT32
+      </table>
+<tr>
+  <td rowspan="3">FillLayer
+  <td rowspan="3" style="width:200px;"> Layer to set the values of a tensor with a given value.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_FILL
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">FloorLayer
+  <td rowspan="3" style="width:200px;"> Layer to round the value to the lowest whole number.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_FLOOR
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT32
+    <tr><td>FLOAT16
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT32
+    <tr><td>FLOAT16
+    </table>
+<tr>
+  <td rowspan="3">FullyConnectedLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform a fully connected / dense operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_FULLY_CONNECTED
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    </table>
+<tr>
+  <td rowspan="3">GatherLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform the gather operation along the chosen axis.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_GATHER
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="1">InputLayer
+  <td rowspan="1" style="width:200px;"> Special layer used to provide input data to the computational network.
+  <td rowspan="1">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>All
+   <td>
+         <ul>
+          <li>All
+         </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">InstanceNormalizationLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform an instance normalization on a given axis.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_INSTANCE_NORMALIZATION
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">L2NormalizationLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform an L2 normalization on a given axis.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_L2_NORMALIZATION
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">LogSoftmaxLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform the log softmax activations given logits.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">LogicalBinaryLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform Logical AND - Logical NOT - Logical OR operations.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_LOGICAL_AND
+       <li>ANEURALNETWORKS_LOGICAL_NOT
+       <li>ANEURALNETWORKS_LOGICAL_OR
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>BOOLEAN
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>BOOLEAN
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>BOOLEAN
+    </table>
+<tr>
+  <td rowspan="3">LstmLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform a single time step in a Long Short-Term Memory (LSTM) operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_LSTM
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">MapLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform map operation on tensor.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>All
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">MaximumLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform an elementwise maximum of two tensors.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    </table>
+<tr>
+  <td rowspan="3">MeanLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform reduce mean operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_MEAN
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">MemCopyLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform memory copy operation.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>BOOLEAN
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">MemImportLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform memory import operation.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>All
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">MergeLayer
+  <td rowspan="3" style="width:200px;"> Layer to concatenate tensors along a given axis.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_CONCATENATION
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">MinimumLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform an elementwise minimum of two tensors.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_MINIMUM
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    </table>
+<tr>
+  <td rowspan="3">MultiplicationLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform an elementwise multiplication of two tensors.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_MUL
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    </table>
+<tr>
+  <td rowspan="3">NormalizationLayer
+  <td rowspan="3" style="width:200px;"> Layer to compute normalization operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT32
+    <tr><td>FLOAT16
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT32
+    <tr><td>FLOAT16
+    </table>
+<tr>
+  <td rowspan="1">OutputLayer
+  <td rowspan="1" style="width:200px;"> A special layer providing access to a user supplied buffer into which the output of a network can be written.
+  <td rowspan="1">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>All
+   <td>
+         <ul>
+          <li>All
+         </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">PadLayer
+  <td rowspan="3" style="width:200px;"> Layer to pad a tensor.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_PAD
+       <li>ANEURALNETWORKS_PAD_V2
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">PermuteLayer
+  <td rowspan="3" style="width:200px;"> Layer to transpose an ND tensor.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_TRANSPOSE
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">Pooling2dLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform 2D pooling with the specified pooling operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_AVERAGE_POOL_2D
+       <li>ANEURALNETWORKS_L2_POOL_2D
+       <li>ANEURALNETWORKS_MAX_POOL_2D
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">Pooling3dLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform 3D pooling with the specified pooling operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_AVERAGE_POOL_3D
+       <li>ANEURALNETWORKS_L2_POOL_3D
+       <li>ANEURALNETWORKS_MAX_POOL_3D
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>NDHWC
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NA
+      </ul>
+  <td>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NDHWC
+      </ul>
+<tr>
+  <td rowspan="1">PreCompiledLayer
+  <td rowspan="1" style="width:200px;"> Opaque layer provided by a backend which provides an executable representation of a subgraph from the original network.
+  <td rowspan="1">
+      <ul>  
+       <li>N/A
+      </ul>
+   <td>N/A
+   <td>N/A
+   <td>N/A
+<tr>
+  <td rowspan="3">PreluLayer
+  <td rowspan="3" style="width:200px;"> Layer to compute the activation layer with the PRELU activation function.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_PRELU
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">QLstmLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform quantized LSTM (Long Short-Term Memory) operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_QUANTIZED_LSTM
+       <li>ANEURALNETWORKS_QUANTIZED_16BIT_LSTM
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>All
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMS8
+    <tr><td>QASYMMU8
+    <tr><td>SIGNED32
+    <tr><td>QSYMMS16
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMS8
+    <tr><td>QASYMMU8
+    <tr><td>SIGNED32
+    <tr><td>QSYMMS16
+    </table>
+<tr>
+  <td rowspan="3">QuantizeLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform quantization operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_QUANTIZE
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QASYMM16
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QASYMM16
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">QuantizedLstmLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform quantized LSTM (Long Short-Term Memory) operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_QUANTIZED_LSTM
+       <li>ANEURALNETWORKS_QUANTIZED_16BIT_LSTM
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>All
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>SIGNED32
+    <tr><td>QASYMMU8
+    <tr><td>QSYMMS16
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>SIGNED32
+    <tr><td>QASYMMU8
+    <tr><td>QSYMMS16
+    </table>
+<tr>
+  <td rowspan="3">RankLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform a rank operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_RANK
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>All
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">ReduceLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform reduce with the following operations - ARG_IDX_MAX: Index of the max value - ARG_IDX_MIN: Index of the min value - MEAN_SUM:    Mean of sum - PROD:        Product - SUM_SQUARE:  Sum of squares - SUM:         Sum - MIN:         Min - MAX:         Max
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_REDUCE_MAX
+       <li>ANEURALNETWORKS_REDUCE_MIN
+       <li>ANEURALNETWORKS_REDUCE_SUM
+       <li>ANEURALNETWORKS_REDUCE_PROD
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    </table>
+<tr>
+  <td rowspan="3">ReshapeLayer
+  <td rowspan="3" style="width:200px;"> Layer to reshape a tensor.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_RESHAPE
+       <li>ANEURALNETWORKS_SQUEEZE
+       <li>ANEURALNETWORKS_EXPAND_DIMS
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+       <tr><td>BOOLEAN
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">ResizeLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform resize of a tensor using one of the interpolation methods: - Bilinear -  Nearest Neighbor.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_RESIZE_BILINEAR
+       <li>ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">RsqrtLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform Rsqrt operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_RSQRT
+      </ul>
+  <td>CpuRef
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+    </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>SIGNED32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">ShapeLayer
+  <td rowspan="3" style="width:200px;"> Layer to return the shape of the input tensor.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+  <td>CpuRef
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">SliceLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform tensor slicing.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_SLICE
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">SoftmaxLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform softmax, log-softmax operation over the specified axis.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_LOG_SOFTMAX
+       <li>ANEURALNETWORKS_SOFTMAX
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">SpaceToBatchNdLayer
+  <td rowspan="3" style="width:200px;"> Layer to divide spatial dimensions of the tensor into a grid of blocks and interleaves these blocks with the batch dimension.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_SPACE_TO_BATCH_ND
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">SpaceToDepthLayer
+  <td rowspan="3" style="width:200px;"> Layer to rearrange blocks of spatial data into depth.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_SPACE_TO_DEPTH
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">SplitterLayer
+  <td rowspan="3" style="width:200px;"> Layer to split a tensor along a given axis.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_SPLIT
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">StackLayer
+  <td rowspan="3" style="width:200px;"> Layer to stack tensors along an axis.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="1">StandInLayer
+  <td rowspan="1" style="width:200px;"> A layer to represent "unknown" or "unsupported" operations in the input graph. It has a configurable number of input and output slots and an optional name.
+  <td rowspan="1">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>N/A
+   <td>N/A
+   <td>N/A
+<tr>
+  <td rowspan="3">StridedSliceLayer
+  <td rowspan="3" style="width:200px;"> Layer to extract a strided slice of a tensor.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_STRIDED_SLICE
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">SubtractionLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform an elementwise subtract of 2 tensors.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_SUB
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QSYMMS16
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td rowspan="3">TransposeConvolution2dLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform 2D transpose convolution (deconvolution) operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_TRANSPOSE_CONV_2D
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QUANTIZEDSYMM8PERAXIS
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>SIGNED32
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    <tr><td>QASYMMU8
+    <tr><td>QASYMMS8
+    <tr><td>QUANTIZEDSYMM8PERAXIS
+    </table>
+<tr>
+  <td rowspan="3">TransposeLayer
+  <td rowspan="3" style="width:200px;"> Layer to transpose a tensor.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_TRANSPOSE
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>BFLOAT16
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td rowspan="3">UnidirectionalSquenceLstmLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform unidirectional sequence LSTM operation.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>Input Types
+         <tr><td>FLOAT32
+      </table>
+      <table>
+         <tr><th>Weight Types
+         <tr><td>FLOAT32
+         <tr><td>QASYMMS8
+      </table>
+<tr>
+  <td rowspan="3">UnmapLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform unmap operation on tensor.
+  <td rowspan="3">
+      <ul>
+       <li>N/A
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+         <tr><th>
+         <tr><td>All
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>NHWC
+       <li>NCHW
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>All
+    </table>
+</table>
+
+*/
+} // namespace
\ No newline at end of file
diff --git a/arch/arm/ARMnn/docs/03_build_guides.dox b/arch/arm/ARMnn/docs/03_build_guides.dox
new file mode 100644
index 0000000000..f193c9f0bd
--- /dev/null
+++ b/arch/arm/ARMnn/docs/03_build_guides.dox
@@ -0,0 +1,35 @@
+/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved.
+///
+/// SPDX-License-Identifier: MIT
+///
+
+namespace armnn
+{
+/**
+@page buildguides Build Guides
+@tableofcontents
+
+This page links all guides to build and/or install Arm NN tools hosted in our repository.
+You can find additional tutorials on
+https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides
+
+Arm NN is written using portable C++14 and the build system uses [CMake](https://cmake.org/), therefore it is possible
+to build for a wide variety of target platforms, from a wide variety of host environments.
+
+Arm NN is managed by a single CMake project. That allows you to build multiple tools at once by combining the CMake
+options from different build guides.
+
+## Installation via APT Repository
+ - @subpage md_InstallationViaAptRepository
+
+## TfLite Delegate build guide
+ - @subpage md_delegate_BuildGuideNative
+
+## Android NDK and Arm NN
+ - @subpage md_BuildGuideAndroidNDK
+
+## Crosscompile Guide for Arm NN
+ - @subpage md_BuildGuideCrossCompilation
+
+**/
+}
\ No newline at end of file
diff --git a/arch/arm/ARMnn/docs/04_contributor.dox b/arch/arm/ARMnn/docs/04_contributor.dox
new file mode 100644
index 0000000000..5cbb6c3b8c
--- /dev/null
+++ b/arch/arm/ARMnn/docs/04_contributor.dox
@@ -0,0 +1,39 @@
+/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved.
+///
+/// SPDX-License-Identifier: MIT
+///
+
+namespace armnn
+{
+/**
+
+@page contribguides Contribution Guides
+@tableofcontents
+
+This is a collection of guides that should help you contribute code to Arm NN. Before you get started, please
+take a look into our /ref md_Contributor_Guide section.
+
+ - @subpage md_src_backends_README \n
+ This guide explains how to add your own backend to Arm NN. This might be useful if you would like to accelerate neural
+ networks on hardware that Arm NN currently doesn't support.
+
+
+ - @subpage md_src_dynamic_README \n
+ Arm NN allows you to load a backend dynamically on runtime. To find out how that can be done take a look at this guide.
+
+**/
+}
+
+
+/// Create pages for each tool so they appear nicely in the doxygen tree-view. Subpages are not listed there.
+/// Also we can overwrite the page name this way.
+namespace armnn
+{
+/**
+
+@page md_src_backends_README Backend Developer Guide
+
+@page md_src_dynamic_README Dynamically loadable Backend
+
+**/
+}
diff --git a/arch/arm/ARMnn/docs/05_00_software_components.dox b/arch/arm/ARMnn/docs/05_00_software_components.dox
new file mode 100644
index 0000000000..2d65daed1e
--- /dev/null
+++ b/arch/arm/ARMnn/docs/05_00_software_components.dox
@@ -0,0 +1,46 @@
+/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved.
+///
+/// SPDX-License-Identifier: MIT
+///
+
+namespace armnn
+{
+/**
+@page swtools Software Components
+
+On this page you can find all software components contained in the Arm NN repository. You will find links to how-to guides and
+other helpful information in each section.
+
+ - @subpage delegate
+ - @subpage parsers
+ - @subpage md_python_pyarmnn_README
+ - @subpage serializer
+ - @subpage deserializer
+ - @subpage md_src_armnnConverter_README
+ - @subpage md_tests_ImageCSVFileGenerator_README
+ - @subpage md_tests_ImageTensorGenerator_README
+ - @subpage md_tests_ModelAccuracyTool-Armnn_README
+**/
+}
+
+
+/// Create pages for each tool so they appear nicely in the doxygen tree-view. Subpages are not listed there.
+///
+/// Note: The parser, serializer and deserializer pages are created in 01_parsers.dox or 02_deserializer_serializer.dox
+namespace armnn
+{
+/**
+
+@page md_python_pyarmnn_README PyArmNN
+
+@page md_src_armnnConverter_README Converter
+
+@page md_tests_ModelAccuracyTool-Armnn_README ModelAccuracyTool
+
+@page md_tests_ImageCSVFileGenerator_README ImageCSVFileGenerator
+
+@page md_tests_ImageTensorGenerator_README ImageTensorGenerator
+
+**/
+}
+
diff --git a/arch/arm/ARMnn/docs/05_01_parsers.dox b/arch/arm/ARMnn/docs/05_01_parsers.dox
new file mode 100644
index 0000000000..a4526e04d5
--- /dev/null
+++ b/arch/arm/ARMnn/docs/05_01_parsers.dox
@@ -0,0 +1,208 @@
+/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved.
+///
+/// SPDX-License-Identifier: MIT
+///
+
+namespace armnn
+{
+/**
+@page parsers Parsers
+
+@tableofcontents
+Execute models from different machine learning platforms efficiently with our parsers. Simply choose a parser according
+to the model you want to run e.g. If you've got a model in onnx format (<model_name>.onnx) use our onnx-parser.
+
+If you would like to run a Tensorflow Lite (TfLite) model you probably also want to take a look at our @ref delegate.
+
+All parsers are written in C++ but it is also possible to use them in python. For more information on our python
+bindings take a look into the @ref md_python_pyarmnn_README section.
+
+<br/><br/>
+
+
+
+
+@section S5_onnx_parser Arm NN Onnx Parser
+
+`armnnOnnxParser` is a library for loading neural networks defined in ONNX protobuf files into the Arm NN runtime.
+
+## ONNX operators that the Arm NN SDK supports
+
+This reference guide provides a list of ONNX operators the Arm NN SDK currently supports.
+
+The Arm NN SDK ONNX parser currently only supports fp32 operators.
+
+### Fully supported
+
+- Add
+  - See the ONNX [Add documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Add) for more information
+
+- AveragePool
+  - See the ONNX [AveragePool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#AveragePool) for more information.
+
+- Concat
+  - See the ONNX [Concat documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Concat) for more information.
+
+- Constant
+  - See the ONNX [Constant documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Constant) for more information.
+
+- Clip
+  - See the ONNX [Clip documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Clip) for more information.
+
+- Flatten
+  - See the ONNX [Flatten documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Flatten) for more information.
+
+- Gather
+  - See the ONNX [Gather documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Gather) for more information.
+
+- GlobalAveragePool
+  - See the ONNX [GlobalAveragePool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#GlobalAveragePool) for more information.
+
+- LeakyRelu
+  - See the ONNX [LeakyRelu documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#LeakyRelu) for more information.
+
+- MaxPool
+  - See the ONNX [max_pool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#MaxPool) for more information.
+
+- Relu
+  - See the ONNX [Relu documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Relu) for more information.
+
+- Reshape
+  - See the ONNX [Reshape documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Reshape) for more information.
+
+- Shape
+  - See the ONNX [Shape documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Shape) for more information.
+
+- Sigmoid
+  - See the ONNX [Sigmoid documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Sigmoid) for more information.
+
+- Tanh
+  - See the ONNX [Tanh documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Tanh) for more information.
+
+- Unsqueeze
+  - See the ONNX [Unsqueeze documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Unsqueeze) for more information.
+
+### Partially supported
+
+- Conv
+  - The parser only supports 2D convolutions with a group = 1 or group = #Nb_of_channel (depthwise convolution)
+- BatchNormalization
+  - The parser does not support training mode. See the ONNX [BatchNormalization documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#BatchNormalization) for more information.
+- Gemm
+  - The parser only supports constant bias or non-constant bias where bias dimension = 1. See the ONNX [Gemm documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Gemm) for more information.
+- MatMul
+  - The parser only supports constant weights in a fully connected layer. See the ONNX [MatMul documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#MatMul) for more information.
+
+## Tested networks
+
+Arm tested these operators with the following ONNX fp32 neural networks:
+- Mobilenet_v2. See the ONNX [MobileNet documentation](https://github.com/onnx/models/tree/master/vision/classification/mobilenet) for more information.
+- Simple MNIST. This is no longer directly documented by ONNX. The model and test data may be downloaded [from the ONNX model zoo](https://onnxzoo.blob.core.windows.net/models/opset_8/mnist/mnist.tar.gz).
+
+More machine learning operators will be supported in future releases.
+<br/><br/><br/><br/>
+
+
+
+
+@section S6_tf_lite_parser Arm NN Tf Lite Parser
+
+`armnnTfLiteParser` is a library for loading neural networks defined by TensorFlow Lite FlatBuffers files
+into the Arm NN runtime.
+
+## TensorFlow Lite operators that the Arm NN SDK supports
+
+This reference guide provides a list of TensorFlow Lite operators the Arm NN SDK currently supports.
+
+### Fully supported
+The Arm NN SDK TensorFlow Lite parser currently supports the following operators:
+
+- ABS
+- ADD
+- ARG_MAX
+- ARG_MIN
+- AVERAGE_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+- BATCH_TO_SPACE
+- CONCATENATION, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+- CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+- CONV_3D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+- DEPTH_TO_SPACE
+- DEPTHWISE_CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+- DEQUANTIZE
+- DIV
+- ELU
+- EQUAL
+- EXP
+- EXPAND_DIMS
+- FULLY_CONNECTED, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+- GATHER
+- GREATER
+- GREATER_EQUAL
+- HARD_SWISH
+- LEAKY_RELU
+- LESS
+- LESS_EQUAL
+- LOGICAL_NOT
+- LOGISTIC
+- L2_NORMALIZATION
+- MAX_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+- MAXIMUM
+- MEAN
+- MINIMUM
+- MIRROR_PAD
+- MUL
+- NEG
+- NOT_EQUAL
+- PACK
+- PAD
+- PADV2
+- PRELU
+- QUANTIZE
+- RELU
+- RELU6
+- REDUCE_MAX
+- REDUCE_MIN
+- REDUCE_PROD
+- RESHAPE
+- RESIZE_BILINEAR
+- RESIZE_NEAREST_NEIGHBOR
+- RSQRT
+- SHAPE
+- SLICE
+- SOFTMAX
+- SPACE_TO_BATCH
+- SPLIT
+- SPLIT_V
+- SQUEEZE
+- STRIDED_SLICE
+- SUB
+- SUM
+- TANH
+- TRANSPOSE
+- TRANSPOSE_CONV
+- UNPACK
+
+### Custom Operator
+- TFLite_Detection_PostProcess
+
+## Tested networks
+Arm tested these operators with the following TensorFlow Lite neural network:
+- [Quantized MobileNet](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224_quant.tgz)
+- [Quantized SSD MobileNet](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz)
+- DeepSpeech v1 converted from [TensorFlow model](https://github.com/mozilla/DeepSpeech/releases/tag/v0.4.1)
+- DeepSpeaker
+- [DeepLab v3+](https://www.tensorflow.org/lite/models/segmentation/overview)
+- FSRCNN
+- EfficientNet-lite
+- RDN converted from [TensorFlow model](https://github.com/hengchuan/RDN-TensorFlow)
+- Quantized RDN (CpuRef)
+- [Quantized Inception v3](http://download.tensorflow.org/models/tflite_11_05_08/inception_v3_quant.tgz)
+- [Quantized Inception v4](http://download.tensorflow.org/models/inception_v4_299_quant_20181026.tgz) (CpuRef)
+- Quantized ResNet v2 50 (CpuRef)
+- Quantized Yolo v3 (CpuRef)
+
+More machine learning operators will be supported in future releases.
+
+**/
+}
+
diff --git a/arch/arm/ARMnn/docs/05_02_deserializer_serializer.dox b/arch/arm/ARMnn/docs/05_02_deserializer_serializer.dox
new file mode 100644
index 0000000000..84324d89d5
--- /dev/null
+++ b/arch/arm/ARMnn/docs/05_02_deserializer_serializer.dox
@@ -0,0 +1,185 @@
+/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved.
+///
+/// SPDX-License-Identifier: MIT
+///
+
+namespace armnn
+{
+/**
+@page serializer Serializer
+@tableofcontents
+
+The `armnnSerializer` is a library for serializing an Arm NN network to a stream.
+
+@section serializersupport Supported Layers
+
+This reference guide provides a list of layers which can be serialized by the Arm NN SDK.
+
+@subsection serializersupportflully Fully supported
+
+The Arm NN SDK Serializer currently supports the following layers:
+
+- Activation
+- Addition
+- ArgMinMax
+- BatchToSpaceNd
+- BatchNormalization
+- Cast
+- ChannelShuffle
+- Comparison
+- Concat
+- Constant
+- Convolution2d
+- Convolution3d
+- DepthToSpace
+- DepthwiseConvolution2d
+- Dequantize
+- DetectionPostProcess
+- Division
+- ElementwiseUnary
+- Fill
+- Floor
+- FullyConnected
+- Gather
+- Input
+- InstanceNormalization
+- L2Normalization
+- Logical
+- LogSoftmax
+- Lstm
+- Maximum
+- Mean
+- Merge
+- Minimum
+- Multiplication
+- Normalization
+- Output
+- Pad (Constant, Symmetric, Reflect)
+- Permute
+- Pooling2d
+- Pooling3d
+- Prelu
+- QLstm
+- Quantize
+- QuantizedLstm
+- Rank
+- Reduce
+- Reshape
+- Resize
+- Shape
+- Slice
+- Softmax
+- SpaceToBatchNd
+- SpaceToDepth
+- Splitter
+- Stack
+- StandIn
+- StridedSlice
+- Subtraction
+- Switch
+- Transpose
+- TransposeConvolution2d
+- UnidirectionalSequenceLstm
+
+More machine learning layers will be supported in future releases.
+
+@subsection serializersupportdeprecated Deprecated layers
+
+Some layers have been deprecated and replaced by others layers. In order to maintain backward compatibility, serializations of these deprecated layers will deserialize to the layers that have replaced them, as follows:
+
+- Abs will deserialize as ElementwiseUnary
+- Equal will deserialize as Comparison
+- Greater will deserialize as Comparison
+- Merger will deserialize as Concat
+- ResizeBilinear will deserialize as Resize
+- Rsqrt will deserialize as ElementwiseUnary
+<br/><br/><br/><br/>
+
+@page deserializer Deserializer
+@tableofcontents
+
+The `armnnDeserializer` is a library for loading neural networks defined by Arm NN FlatBuffers files
+into the Arm NN runtime.
+
+@section deserializersupport Supported Layers
+
+This reference guide provides a list of layers which can be deserialized by the Arm NN SDK.
+
+@subsection deserializersupportfully Fully supported
+
+The Arm NN SDK Deserialize parser currently supports the following layers:
+
+- Abs
+- Activation
+- Addition
+- ArgMinMax
+- BatchToSpaceNd
+- BatchNormalization
+- Cast
+- ChannelShuffle
+- Concat
+- Comparison
+- Constant
+- Convolution2d
+- DepthToSpace
+- DepthwiseConvolution2d
+- Dequantize
+- DetectionPostProcess
+- Division
+- ElementwiseUnary
+- Fill
+- Floor
+- FullyConnected
+- Gather
+- Input
+- InstanceNormalization
+- L2Normalization
+- Logical
+- LogSoftmax
+- Lstm
+- Maximum
+- Mean
+- Merge
+- Minimum
+- Multiplication
+- Normalization
+- Output
+- Pad
+- Permute
+- Pooling2d
+- Prelu
+- Quantize
+- QLstm
+- QuantizedLstm
+- Rank
+- Reduce
+- Reshape
+- Resize
+- ResizeBilinear
+- Slice
+- Softmax
+- SpaceToBatchNd
+- SpaceToDepth
+- Splitter
+- Stack
+- StandIn
+- StridedSlice
+- Subtraction
+- Switch
+- Transpose
+- TransposeConvolution2d
+- UnidirectionalSequenceLstm
+
+More machine learning layers will be supported in future releases.
+
+@subsection deserializersupportdeprecated Deprecated layers
+
+Some layers have been deprecated and replaced by others layers. In order to maintain backward compatibility, serializations of these deprecated layers will deserialize to the layers that have replaced them, as follows:
+
+- Equal will deserialize as Comparison
+- Merger will deserialize as Concat
+- Greater will deserialize as Comparison
+- ResizeBilinear will deserialize as Resize
+
+**/
+}
\ No newline at end of file
diff --git a/arch/arm/ARMnn/docs/05_03_delegate.dox b/arch/arm/ARMnn/docs/05_03_delegate.dox
new file mode 100644
index 0000000000..b3caf8cbf8
--- /dev/null
+++ b/arch/arm/ARMnn/docs/05_03_delegate.dox
@@ -0,0 +1,178 @@
+/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved.
+///
+/// SPDX-License-Identifier: MIT
+///
+
+namespace armnn
+{
+/**
+@page delegate TfLite Delegate
+@tableofcontents
+
+
+@section delegateintro About the delegate
+'armnnDelegate' is a library for accelerating certain TensorFlow Lite (TfLite) operators on Arm hardware. It can be
+integrated in TfLite using its delegation mechanism. TfLite will then delegate the execution of operators supported by
+Arm NN to Arm NN.
+
+The main difference to our @ref S6_tf_lite_parser is the amount of operators you can run with it. If none of the active
+backends support an operation in your model you won't be able to execute it with our parser. In contrast to that, TfLite
+only delegates operations to the armnnDelegate if it does support them and otherwise executes them itself. In other
+words, every TfLite model can be executed and every operation in your model that we can accelerate will be accelerated.
+That is the reason why the armnnDelegate is our recommended way to accelerate TfLite models.
+
+If you need help building the armnnDelegate, please take a look at our [build guide](delegate/BuildGuideNative.md).
+An example how to setup TfLite to integrate the armnnDelegate can be found in this
+guide: [Integrate the delegate into python](delegate/IntegrateDelegateIntoPython.md)
+
+
+@section delegatesupport Supported Operators
+This reference guide provides a list of TensorFlow Lite operators the Arm NN SDK currently supports.
+
+@subsection delegatefullysupported Fully supported
+
+The Arm NN SDK TensorFlow Lite delegate currently supports the following operators:
+
+- ABS
+
+- ADD
+
+- ARGMAX
+
+- ARGMIN
+
+- AVERAGE_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+
+- BATCH_TO_SPACE_ND
+
+- CAST
+
+- CONCATENATION, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+
+- CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+
+- CONV_3D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+
+- DEPTH_TO_SPACE
+
+- DEPTHWISE_CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+
+- DEQUANTIZE
+
+- DIV
+
+- EQUAL
+
+- ELU
+
+- EXP
+
+- FULLY_CONNECTED, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+
+- FLOOR
+
+- GATHER
+
+- GREATER
+
+- GREATER_OR_EQUAL
+
+- HARD_SWISH
+
+- LESS
+
+- LESS_OR_EQUAL
+
+- LOCAL_RESPONSE_NORMALIZATION
+
+- LOGICAL_AND
+
+- LOGICAL_NOT
+
+- LOGICAL_OR
+
+- LOGISTIC
+
+- LOG_SOFTMAX
+
+- LSTM
+
+- L2_NORMALIZATION
+
+- L2_POOL_2D
+
+- MAXIMUM
+
+- MAX_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
+
+- MEAN
+
+- MINIMUM
+
+- MIRROR_PAD
+
+- MUL
+
+- NEG
+
+- NOT_EQUAL
+
+- PACK
+
+- PAD
+
+- PRELU
+
+- QUANTIZE
+
+- RANK
+
+- REDUCE_MAX
+
+- REDUCE_MIN
+
+- RESHAPE
+
+- RESIZE_BILINEAR
+
+- RESIZE_NEAREST_NEIGHBOR
+
+- RELU
+
+- RELU6
+
+- RSQRT
+
+- SHAPE
+
+- SOFTMAX
+
+- SPACE_TO_BATCH_ND
+
+- SPACE_TO_DEPTH
+
+- SPLIT
+
+- SPLIT_V
+
+- SQRT
+
+- STRIDED_SLICE
+
+- SUB
+
+- SUM
+
+- TANH
+
+- TRANSPOSE
+
+- TRANSPOSE_CONV
+
+- UNIDIRECTIONAL_SEQUENCE_LSTM
+
+- UNPACK
+
+More machine learning operators will be supported in future releases.
+**/
+}
\ No newline at end of file
diff --git a/arch/arm/ARMnn/docs/Arm_NN_horizontal_blue.png b/arch/arm/ARMnn/docs/Arm_NN_horizontal_blue.png
new file mode 100644
index 0000000000000000000000000000000000000000..3d271852407102410d4e805311e2a38cf84efe08
GIT binary patch
literal 16540
zcmbV!1yqz>*C<1yG%5`vDbfwY03t}YNOuo8lysvA(jh&(N*R<OT_d7|z$o1eAtQ|B
z2#C~u#_#>!``x?#|K7W<Ybnn#`#HPg?6Z$ahWZ**S6HqP5D-vlX{s3$5D*3f&$Z;F
zz`rKWg4Mte3b>|40099bJ^nAqt#k~4BDH{-m<O8c=|Y@*y+s_HeH~pyLcHMsGywrr
zIRx(D<mnO!c64!r`6zPjcJy$8Va|$NX3~0MdT>=2cbI0FzsvnFeG{iJPbYb2E@dS!
zGz0=H;O!FV01ol?@(F;1D02N-7Xm!ve=W)d{sR)|smP^*M+i38GX$&p`n!OoMP!7X
z#HGZ*vhpJ0GGbEF;zD2vF>zT@F=<gT8DTMTh?pEiLLB_}A1(l!zq2dESWW$JT)-zq
zF89DdI7C!5I5=1&SW?8--%V6pUS3{QOhQybLKuJ$4hZ!LbO;gl3E=+s3TiF^PW~`>
zAk5bXj9<~g(Kjeikqe;oUo3dT|5?^2;BPSj0uv2!fQyQYh~ZiK1L*AZ4;(zm-|G)@
zXD3k?FBfkYpTGbBR{S4WxVvwlZ-Be+f3x&IU;i%%0MY8{{e$Dbq{Z9&9~1$B_Z|Xl
z{4J3G5<0*n6z(Ew>=NJ`<nQEi?;$`YH=Y|fMAhHLA<);~#MjsB--<H)H_2dCRq!o6
z2Pc>ho&|ybc)>-@A<#vU3$Ha{adBZW8540Ch?p2eN>&)qotW6aK=pi`VXmS7C8#7s
zOzM9D1x&`-A<*If3he9zarO20b^sWLc{{kdh{Ao`xWNC25u)nr<?9b12B?$#&);jQ
zsv7$Hy28AGF9M7;?t-=Msmh4U%g6{zh=~6oS5FV3<r5I-;N#??rKZRQ2uB14bB4&s
z%1Ju8I*JL)JIhH5OUX$&2|LP3$_YC<xk^Y$NISYpIsfy%ny*t3-URUP|H~VkeVu?c
z{wI0PPI7WC5|U!Vj#6@t_|<?X7e_f^X?a&M7kNop85c>3e^X=N4+9+1!RtR)#Z%=B
ztSIXsFXJpB?<_3uAmJ)3CFLLuJc&67OFB5a%FBp3N{LH|<IVn$B|$V{0f3G}|8}JN
zE`EPMdBMPcj0fW2gm(%>E+@P-xHxnD{cG5NA&>tj%zwWh?Ct_A`hT#Je_;>sbqx%5
z@OM#h17!DqwL;PVmUv&Ehc5pAK|b8U-vNk4F8%?ET(17U-e3nf+zaO9fag{8p^x+5
zF8+^pz=6Kt|7leJEuH{}hyQ00{lA&azj*&IMf9KE@&6;Dzs2w5?%?C*0z`6AE<7`$
z_?Y)E<%|CBcJb%C|A>$O1_y#J{_&q^4}AD1U%2=HOZx-a;Q7Pb(*y+an_6lrCLzV!
zi*EU>lat8(o@tVl)SIMt#d(>Ny)6lh>}XY)X(<$GE!|_kP^nT#|D*;D8|qTfB7fQv
z#XYBd!C>SV#vFEa#NI2}`z%UM!gi`da8aP}>&}TH4JSRSB)mGeNh5eiv5H196Vn3v
zOej--O^ofAv=|c;xh_FB;Sz~02?E6NDX4pxXGtjJq8j-cIXB=b&1O!(MEDa_39cLe
zIsH8p4MNLXyLph(30CA7zXnA%P=k#SJuLM^_0nP!@Sjp>Eqft2doAuk?^hQT?vwya
zr~oP=isxGR7Jz0!U$Mr_5|`cA7N{^G3{&^VoUnp42;cAo_9r2k?4@dPes_<D(|Cf4
zKpI4!c|$M~aKc)gdEBuH{sqdKl7-L<a&&}!Nseni0o&E$c&x4#EG;nfB5T0&ooRBT
zH5FR|HllEq4FJ#axP#>jMkrsAV_^D{FpD5<ejl{t(`!8l+wuHKQcqMHV8F0pj9-`0
zQnU?sHX9=tBU}W<h48!=lnu2FKqwQmvdzfE&!)c*N<-P(0^DkU^%yL{pg;>0*Cdne
zCF&V!1H9F7)0mU^n0W&J54S{!kKPAG4T+KBC1d9C84~k&FnLkYId2B^ldQHEcbVfr
zX_I}u!)j3GEzt}EQ;8oX9)H~g%V5bwO$Ud$FxD_*`iBQ}c@&bNZ+)dYFNjw9bmb(`
znLPoZbaMWkx<8o)0jmqUn!<T3cK__8fqLBy(ZgRKIpq)tmq7HWPpRAjkauZaWGkTT
zuFHvF#USFM{_Z!!5z1_G3c#nYjIB3NrHa@)06VXiUf4~60zczUL5#G@6eZ*-L`uZn
z8trD3WYzh{!Ye0MD-hYfP6KNUsIjlJze<y}IXEMpyOHoIks!D5;0sIISJy)fJNJO4
z1SX$Els-E=Q_RTDGtwpTQ9F80(mJ}Xztcl7_S%cn{_7K51i5+ZXgXbg|Jq5>&BR+T
z@>AZ?v&LhgiyzNVxyYoBNMusvP$u@owK&(l{XYscjLKH9XbQh-F4y~ABUTY!l1H^M
z$0)~a@m8XVv6exaBDEh^%40d-)E{f(S7JuB(GdIjN%LwbMQx*1f**bfi44rTk3xA(
z?!IRE^{EN+0Fn3{D`r<Y3E~?cf-0F&%!!yf3Sv5STk;>QuPO>BSUM?i{({-D4y~hZ
zp8{^<_2>|)WcW59LEH@uIn~hqL1691R!_Q@xuW?-rgrvWir9vuwOidlm%k0hbA7)W
zW;|Q!1#$hO1Z^ci38jqY)9?w!J7?`4wK(drxnZf3S&^d!zICGpnE1E;%=1V40~fu*
z;s$ZjZFsjZeui4t=Qt_olL7tmbWFczgP|B!5UMU6SV)yr{0O-|BYco-qfv{KZ!q}U
zH`LVMj%QO_%Qm~CiLy?ab6VWy-fN6A56mm)5k@o%PPu3?zxVqMtCIa#t5qx`WvwrF
zQU5gXj!9kS^tw-Iyk`~2i9Q_}^oeMiCGEG(s$ZZt>;pDic|=gKf{`iBQf6NcuinGP
zPQG9rChJq7T3nR2U;b3OWRgNH9qDuwHMGE(EtuI>8Jc?w4@t5v-j=Os!MFW-OxyCs
zLil@=7vC4oNT-#uEemc;pU?`dFT5l%PErNPHon7=t<cy<jovnS5wn@@+9S>+?gohs
z99j<w<F5g8E!dq7<8>e~<m5lSycd?eE|^?LS<9Ypc}=ot1U>i8s;r<G>Am4u4j?{+
zgzE0N?ttdI<n*>#_XXoUdvt`83kr(YrL~C5@;{>IeD6QKi^tlgyQ7<RBj)E&`oM*9
z_PSq$c4pwG^&m^3rdtVvD)i%}bpRU`l<T5x*+-^)6v)@GM}*a`A(3NEN9{Po`zFD)
zlgjiC*0mEI0#z#Y{Jt7I+o~rzJAGKv>Dnh48XbW&J<7Uh;oMuRC~0zY(F(a{vCi&-
zx=a9W61y(lCb=2*g=|J~pBx&kKYaR8sWgPmCVX`7_c?v~o!|32Fu5TqAAnG8<^fOt
z<BXNV)3p;EAKQnUF<P7x!SIq`hP3AjG%U7uy*NFqXOoSx_4V%$l?e=Z*y8b&&kX`^
z5saBu8>z-T-8)lC8s0e}LEC>YI_8xKUb7`ul3@$b+Ro670Aj*skLN0#HRe=p!whc}
zRR;wNZKM79b-LbH*jU4TP<FJf?BCtw4z^|jiZap|%$U;}Vv6?8KyNpW&O$Sqo#H7q
z(o&`W2yAP}&7Z#(a0qwkr3qAdZuv7#gYY`?XUwPhCclxRI&vpAv=TIWM^iHFVimcT
zpJ#T$g0t-c(B??wVAgja=yUQlY7&l@yf;(#&t#y{zG!B&EZqiFv1zGh5%BMf2kQ4_
z_MnKK&=1_u9!^9L&7RQLtqq$PtUJgF5Z7&&!5>CCPcgd`*0g><oza%)QY1s`=!E``
zvTXhJdQma1RVXK@=VQ>A&;F!5%8o;;^~G-*xHz1@7N^C3Y<Eo{xIv-by-%Kx;6^Pj
zbOM}<*H?ZgfAD!mpG!f0FiU!lStw_IG8>YKI)&%Lvz{GD0K?&ZYbSYx9X#-=<Cx$~
z8ISnP>+N>@@a1@TNh5u0I$k4N1Ht}e=#w`@=@}@Ef#VfOMRhz9hat2DZ45$k^r1A>
z30U1h2ra<GYuN#(-k&<PI41aAAgHh2x5re+6Ci`S?_3eN%n7>19&y#HBB=yV8T7<p
zr;p-}JQh4xK<Q;AIGaUNyGEqp#;&fQ3{qHu_&T#c!NU~+X@fuU<^?&lB<PmrSOZb5
z)7(Ao$l3sUv<tiX$SrHkv+8(5fXuL>Da{=qz|o()%s#pTSDjJ`T%%n7qG=j+m%ig=
zRWV*3*Hy7R!D9+Xd(|HZo)I>qAWwXX34e~b?<66oT1QWqN9ypt7*><L@IFbCA{#1q
z9VP4Cm8*oZ%l$!*@<S%Kj_UmjNWhY}E&Fa)hbutu)r|B9eaz4vn%N&MMG1i<0%%B`
zt5^?Sc5|@Qb^WYoos;Vd<fu!%DV2APMD_j*m?H8sB6G@VdG_gog}$~ws(KV<L1vhT
z@uj-fNL|Lz$Tc`p+CWW=;-oB|ODL!$ypHxjt6O8Re8aEs9!nHzMRm%H;(Su9Z_@`~
zZ$nBWS&+LiA7f8q<UOO6n14~inhxp5><$&s&E!V4kFdmapLqLm2A|Hv_xq6^NPpyk
zrtjjT7!}P`2*ocJnAQ=?80n6YHD=y9&J-ZrgyjISz)r>HCbtz}?`Q26k#JH=(k#w)
zNLWu;o2?fNa3h>WfO3>O5N#d(Hb{mKVy}e01*1wZqEEagWP_Z``}Eo3s?i0%*z2wO
zPO{*n8%c|j$XtQG7kJQ=Z?bLi<1w?8{b!7=JO?SYI3;*kLMA`@BtQ)zdmCkUb#uYN
z<8s)P1uvZ3*AYN9bQz6&NT=T3Wa{EqOxo!y=gNW3&&L(+pLMGVM*36L;zGt&fZXMQ
zxcfHn&U5pj*Y=S*0@rj{ZzNanHl&pm9JFBcx0)15s-5KyZdkiLO>mqcZJmNoINQUl
zF^6i7FZ*%F1R_#w(iBW-*ocg~YOixcFU0KM&PA(c#!ClGUHnu)lc9;wL1<oU%u~u2
zUWy02UM|k3pES$WT`wP+oyGiMppG?*t%8@Jir8J95j|q{gkx=PD)!)mZe|)?f(tSz
z88i_aF_RGTn?MI#cbJ6~TknFhiV?ulfARYN^&{x7E1hvzT^+^dgzL!_iLo@X>~C`Y
zp9%8^)ENqGr%2s3Z?qIhWKJBv7Ao|pfdP;x4RV=2WhKo1T7CQb{0pSJ{RiT;@5LBY
zQm2t#No7*W>vKYDI&)TY2Kfg*%{54z9oY`(f(n{}Ob$h@p2*Zo){Beft;{gSD8#%e
zJaYM(fDxrQyM9vb86n+=vs3%+Z}$}2Szr$(@_6e-=|wQ?8<KdrauN)h21PyP4Vwos
zT=_U=JE`54ySiiY+`KTpma&A-Oz>NhxQ{YrD?oJ3gPI(xi(~ue<}8}Br`xzDplWX9
zc9GS+^h3m9*)rimf?!@_UAwa7HOMg^i;f0DnPp}fKGA{HwC?|je6l3~jpi9c_*kJ^
zD3(qX(T-?GYfK|Ve1U{AO%BE1nzji$s$HGAy7_@^a;bGxx?~~v0ouzxm+9DN7laP9
z#&kc{2)XFU=fg^NCSrx=KSn-9QpB+DFOBOF`H-c33i@=f`zZ!R)rz#u=lfz4&K_!K
z{Rk+`zaw6~tXa^zv`DEn9U^mO(&qvg4QY?sU9zpok$oDnToM~Sd4Um(v8l=FxU3#(
z18h#_;9}Qg$#f?T34MFYBqzVGX^pXfUiza&A9HK0okV$XQUXcK;(=7qOTLF)IheZG
z36(QdZAqb&Y97usO!z=vP3_jr!aDlVJ)b_mk^)|JHgNhc*Nn-s>=+`ghoZQ4WP8X3
zd+TT=Ra!y(_vn^_)8NnCr?{9AuBFB@rKWK2I7}%STN+l^Bjg<h6$gDKpYq-AT6+U=
z7BO2KCcQU^L-*GOO#UxJA-&h8LmIB^npc@9poX`eB?B7lzp;u!#a=<R#1L_%K!rZ`
zwkL9*s_UkJEe|GNHi{bz3AT<N0^WNtj5F)QajLf`m*>tj$&#PM9o<HhUK}NKW~^7m
z@vy-nD_t8625_<(uW7xW^)AlGoX2>0KWNJgF;(|?iqJervI*yoCEGg-@JtF9{1yJ(
z93q4TTlPr|aL>&)Qu7F(Xzje1^bBWVD!MDw8>DovJGBT>c{BjFKwO498zd#c@|AlZ
z)C}Vi(I<@HOQlt%yH6dN;Eagfpo6F4qlkjD{WF)0_DtAO5JLN1_Wpjzg&$IQ08}KE
zN`UCYP1iD@E|6=;y__|e@3lR0gt7qxCS@(>?%Q`}KzM$V8#{mXV-{?%=~+hl`Y3K1
zB$F;S=H0t7{S|5T*l9!UsFJ52cgp?dVWr5&AcUUOja)lK52T^tE&uH9t3{ey)6@id
zvhro-S(uYI1nKtOxqTBOt)mJ8sC^xTGO1j9k|Z@~r}kXrJCn|`CA2J7NISZk5Nae?
zy?o`<wd9JD2I?30s;L&tZ#}bg32Jtj<qt+Ir@L!?o@^b>rqGa-oIatGyVX0e=&`(&
zEpa6~8@9@N=h*(|4PdVr!t_h8IrgmA@=nDgQzaH@`qrn+-yFvRT^Tm=-{aEnQ42`T
z-$75=L0{w=OPIM{GYEf5NK<j3#>buqM4K8s<}*-hbJZl}uu@z0luf197oHbQd`~|G
z`H)3?Ffw{diPDV{R~N{NaNPddur=uHvc`tCJ_y#lVh=;xLZdfEe%D}W|6=a>&iNq-
z;nNEprz)EznT>{#!>=Ng&y8l?UtE7@@WD~jEA&iu%7_*I;JNu65B82_A7`xK)sMzg
z1`71q-avrpAxujbJIJZL)^}EMp4lbJH&yGcnT91EHt6Y6xp5k35-?&uJ0puV+%gBX
zKCV^{utYKIAw+4CK9i%Jq0zDj$K429>dNcDzE$v=j->LIX~`^gX!#;HeP^|@d;0sc
z`P~kwQ*n|QY-d8K26>F6r^ay^U^SVOdiTjDIRxGyw3F#Bv(Ku%CK_+n8v<sLA)Dd&
zsq1U^fOa4UdmN_d#vE$s4!RaF8jq>icTW_PrtFl08+3M@?LI2esdE%NbTya7)&a@w
z#;eHdlk)kO#XBE@c2dNvJgE2`Z4-@}(i;6rfc(zI?VWFC;Qma;68mc4&R^EIo|z6n
zGH&d*0O7=IN~I4dJ328m>N4jvsvfi>am8t;gdv<EK-Casxl>J!Q;x$f<Qu-*u*T>>
zFPV5&B>adG(egkeVq1L9H&bsnfT(e*_T<Vh;t-<g6M6#FG9k|R#{Hx`-3?XR(MO$m
z8zin8M`BU&6YLQTA&m1|H9fzOxE6Jfp;w4a9rOWESlL)oq3jr^L&4ZPEK<Z;dsDAW
zhrV{i{p9Eej5TRe`-IbijQ&9<P~^1f#T;hvl-uf*2{d&l)zQ}2TIJQq6n-ak-h8MP
zFCL$n+Il~;OM|GRYYwMO%t2!Mly*6Pj(dHIJ^{yFWI3(Aq;tqIeR+pgDgK>XJL`NH
z-#2pH2OJ}j$RLC=_x073cA1w#DR-5kEk7<GfvWoT;Nj7Bo4)8Zon=$OZ&w?rd6M0M
z28U=mo7fD!m;k{H&8>+G$smeaTttH3pnr)#$OY)mEWCUWk?4S&dVEZF(j4uT;5SRr
zOO;U`89B_MjEZ~LU`ZtR7#u;;O5S>HUi*!c){*nK<J)63qcja!XIB~up<Ozk!RuXZ
z#Dj+iCX%Ogu;W~}UGT@E<$G7|qX>l_MTmbsF!`CTX|Tr_i;5A@Wbb|16WrcN2Sl00
zsG$=HN?2D@Sd;Mu7;A5d%)k3q>ba$WU2YJ#ayUz2(=&|ai+lQq^7S{iMc9Qz%rb>n
zF;FwJo#VI{JFoTzWyFhr<_+x6)a>djBk#Q*6Hs4+xGxs`I8w7#AW|cpXqxEqQ&9V*
ztLQf2v`RNx-ub?D^Ys+ftiM`feKWn8dI+OZt5-!whV49Rfw&5D&7k^Cx7h(9oj&oc
zG8<TV>z#3%{Hm;|l&wnh?}aDWz$H-=(@q+VqgQ->Tn(J{bdoG%Ey<5!6Fn&q&xT&_
zCf|Mg)27L|TONBySL)e<R&YXD=A6ETiN6xZ^oboQ?jh#R`d9zUYNA)6bQWzxEnv3f
zYHhEgG0K3O-?%UC(q3Gj*Y5ce)EIJ6r&UGyfqj56o{@!P1#)w2_a{9x+QIoSi1VO>
zqPu{P07JyM2JsCJuK@a2UL-%>r{q?AeWlJgQic?19c5j#{RUAcFhifTfO*4YbmVIX
z2bRVG{}Kpt3}=|Rj_SG5n&xa;Y9-!ABrx=2`Uu@LHso|;E`zJT2q;xL8mTLd4mslo
z$&R@PhAeI*EwZlfYj#E$7MBDuZL=93e-QRmX=T}iZt_0d7A7$v@9m*WP<-J-2fsF`
zbnu%14lmJ5rSC|y#wc22z|Mz15#@27MzTat*C2`0dO*ts<7tg4Z5>V4%nH6)i*p=i
z%JO-9VPL=A4kW@_Nsz^)UO>|tD^IOtiqffdzeaPc?#o~pQ8L8>H<cae@U^y3YL1q8
z&GR{i$u~vwoUr>-64FE+20PGb66edi&!qjS2mHF48DgQ;x0Y;AkwbOh1r~C5pOVHX
zq;kw>ir&uZdiAxFtKc^ZT5UAuGGE7|vWC5$RC^qM${tQcL#)(yuB<_1S0P&OX0;po
zCe7{BHSO4J+-}!PHYkh2mWhB`_pN4G#qv=UMW49Awx91(XW~!Kx)x~{7oPW$vkJ=$
z=ggtMCre_H+m9RtYP`jhwtlN3anl4GgbhC^<U#JPSoZtN+?P)po*no37@#d1!V|i<
z;gTvdLnF^=c-p3SS?ryx``@`bMb=7EO*2z@shsD=Bi6yzRW6_T0v*`u6>24g57@0S
zR_OXZu0n_9nXZpE82`Tt?+AV7gQxdB5d0?55N5gibczMZ{>brFt}^G^Z<blrGs_Yz
zDbX>I+LkLgjiDv{!DD+DA`Oa%#0MeIt>^QyMEyPX)$Ko4tAa**anrzNP@iO_G~-;u
zLYZZe$>RNI2z9`iTR>jI&O^p}r>!?DwE?VS@@2BsXq%_l;@luwff?566Q$VG1<HtG
zWMpMHeXG>-a9+RLp=yN3n)d+DAUg54+Ff30N9uLSzkjo>7#P#CWep*ot#db~uGw_e
z2XuJr;3`;@V)6j_pvhDx)c=a{nkdb0BH6W*r*7;6qQnj$f%(j!qK?gG-ucS!M(1?$
z4$RY@$mXVgm(6VA-7l5Hxh0|*VQz{ek*%X;NefyWo3=#*U45BvO&Ov$UDqje*-m~h
zZ2tvB8LA(P$O=}X%iGs<x{lVvYoNoV6U*_kvJG*0{Tc^%`t91Y?%1dVq}R(ja-)4M
z$o3^s%fxbWK-ZOtav=eOvOFGgCGVAwVIA0@)elbNBA*_CkgFE)!_x<mrcM3MBY}LO
z%QCd6B;or}1H<_VyT|r-k3)suhwCZ-qH7`eeWsO=Rq0U?L6;gNGDqF|(opD|w|E82
zg9O2j#zFF^Rp~oP#?yza`>*G+XR6&!*YCsS4M3jEaj$khW3za6HbDfvUJukL4Bki=
zVh2wz@G}vyXic=f+e3>YA-#s)2C_rhhb=F(c#cEF4ER9Dd=V&~qh`=T7W(yK;AN70
z)RNi@O^YUDj@07TIyW=RH^DxuuX7LH3~+Rgdh(6#S)t$GeVg-bHbH+xa|e29HZa&L
zXRF%%$~U#R-iE`RL4YMxz*rpm8l;J>t1clQW^_7h8Q@Ot%8*4JAhid!hW+|3|B`Pt
z^@1oI$jR&sj`C;FywJ;xO#RHgT&WI{rSomP=w0WhsKp!)e^+LkWJGFF^wFnI^hs{_
zFI@4dtvUNCMKiTE$*Ykp5uJ%F$<l{BzYrJ<iz?B)re|1_W1z-DH__+|l*_!^AqUk{
z@hu5C3!9^>nsKsH=Py~NyPxb|s3|j_lEZ;6ry*msEl8y9y6Vyufj-MR8gI9$_qgdD
zS45!JNY6{H&y1RmA%2XC+?PZPq-;+yqNzM#(J5l6n7J8mu1^IC*-xTkRz1)mUf4sC
zi!%^9A`bqr+<j+7P2pDnfy#FWtI)bH-KK=ca@_I#Z`o7Tudw%H-AY7ToZ0Vak8O>g
z(d}O$yHxWRjM<L77I5~L+6(tAK=bCnCNNG&ED%!<WR9RFW2Gi$zi&oO#!;^gg}l>>
zRlnErpN(lQFC7AV-mb~+%8XnE6lYw-Ug2-h?8`P&Rf;^U?x&zy^((^&P6wq7eP#>!
zV)t*ib_gG@LEK~~C>Mfk`+}P9aAa!Cq8cB#z6n-ol{oP3pWsJAW~Po7c^*p6Oc8rw
zP1egTa}?zo3CYx$roAp11J2Ud9av+r5t=CN$CWqjJi=dK?zj6c@N7|8yE$@Yo*6H+
z9{Af2_bClr{0`FxItr(i?79db?2u)S>8DOmb}EWW6IJyHw0t}Z{6TCVX^kOVQ>3lM
zvDZo_yHk9#47nH}Ut7)WGEh)s3RSA7sCBx~><!u>H7&asEJ^!K(7}qzOSyH=e3{Vn
zee<xHvp3eMeC}+99JlifI|m#BDeQZ?FBWPmcDQFj#S5B(!@jFr$!PY6J?YR!m|FXt
z{^DDt;|ZG74rOU**?CMVqFe<u3kv-;+e~Y6c2$~-CItnYh*~zDmk<A3_u>EGrwp<}
zL-xDd+DGLz<6-wJ_xT)jhVz!|)RVnG2@w8<)N?eLq54Yq&o-0@vsJHH0j+g5czw<H
zkAHc4-fpPF>3_&n%}l!z+9b4ZM&hH*m64FMDgP+dZUho@tt)#haxH2wL^ku5tLY_7
zo5rLjr;6+9jnoJqTcEma){!r_eq%b+GG;!4Jj^t8DR`Cx`+&o!nkMQNr(hRK4JrF?
zdcV@;F=FYRaD6<Th(sh6|L`v{bFbDn3dcEK1f+T@mh(JKvZ=2Lc0>tLERE(zUrleQ
zduy?ZkU?l8hB8`5gOT?w5;HU2(Wh3QXT+Zu*t{|;<SIP7h`euBlM}6GTE3+y4I>f(
zD#y@Q+l5J%BS`fvOCLh(0++by$0v`skF^fgOR#4N&`VjZxMxUQk(rfxyHT~b+?H=r
zlzZ!_`MP~_lIwJUWMt}2U$d^+e7mca9oH$BUjC;AUBB#(Jin5CE#j=d5LpYneg~52
zV-XW<$IgBRk(7v69u;To4Lu!8JtG&Ij8B8c(r@<wL5M`kG&oqnUe{EbXg;IcpHH2C
zBFmL@I+Q|Qz=LZ?J%Hh}{X;IC%kZUAs{1;5c=($tTGe3PWgS>?Z~K)K!m*bQ#4B6^
zeVGZZgEk8d5Ln1fECGGi=xyVpF)tm)7D?@fZx&GkjQnhk#x*(oH_28IH36__{QkxH
zx5aXqiQqR$SoQqZojZIr6!QexA1KB%BAsEaV|%|{kAft(+2v#&-7Nx3BQD(Fk&WRT
zg~eYYug-@-_sP9HufS7RUUnXl7b|T13e&4^+<Q0$dg)VP;RV@=`fS@KTWjIL3zhjR
zYOyC4x%kKt_U&icP#q8qpztuFydAq+&-8T_5NUG+*|jvHXb9inqohPJPl=^rpX-(9
zYgu#YzjVeJCf3z9<3gf>n?kb~5^zq<%OWMyx8}NVbDUy6=he`itKs}u0+W`pThbCX
zg@V4fLBPI5lIMN+o6!o@3}W8`0A<sMn5(R<JG+k@H|yWU!|JH=$I-uk&M~%)PR9%;
z4f3$Ok{xm;abejPL<GAPuN?X%KQ0uranZ&}OGK8+V5KFlSI7l~hYEdJxGz(8+&6kH
zv}(|S0T3Kb=8o2(HhhF?0pR7}_cytqrYwnY?)lg)`3gg!dji*QXtBD!5r);w>A4+z
z9{6eE%Es5@+@sYQgc6PP3W;p=5G%NV>RxiBHVTi+1p8&beAj0aG-5ZKi3pGo-?<QX
z5sBW#aJ;q@=t*R2u0B?XwneW1`%c3xm2W1(@8=6X4e6%>F-f6{#V{2{RU>tj+2nR$
z-dynDJ<cR{;iub|>TF0QF8{20{8?40Z&sG#ukxHNX~5`;G?jP55)&VTXn+!l1hB+X
z4{4&yl)$prm@xoB(9q4Vk7xNNTUk4?;+x*{!`EZ>NvEl655MSr?tDBgiw%+}aqBjD
zK|p?YT;phMoSYlvXBMN^F<Oqog?n(-n55sY$?3oOXsPE_z~+lPZGbSg4VDimJxSB_
zI$Z5p52M|;g>y~8As1dqUn=z!1A(m8fWwbJh#LgPvr9%^_q#w03i(a+WKYrzxStTG
zq{}O%Q;pz$lUoZ+CIN0Fm4%)R7wVRR11WK6Id6#XRc_8KQ%L)01#r4NUd=p1I^jE}
zqzXbAV98qevY4cYcy?O1O4zn;mGrhEmizaXN-HPaJ0v-sn)&%g?&eq4^=zVAELN3F
zZR9d<WgTLxi43(1fbDKNmzGYKvAiIdV##Y$Bi8^3v|M9x)wX&li_F5<H<ff6tgedm
zr-bw6bmYb(vucu_Azh9k=cV_P?$X<5kFt$mqhVIc4$t7C_h>If_T}R0X*PCw$4h-1
z)plD1Z|LzPI~P=s3Rd{PuoO^^`AJ=uhptKH5pJyHe4sSxL0X$ACG~yu&br^jo108?
zG5nPMY#AY{VJJIdT>1W>n$!2CXmP7tOA3^M)8UTc2drLnvtPle+wWsopKJc)owMSz
z+J%ob_gk##Rd|NWzcd-&)&P{xj<kCj4fgRyKL$3T;PaOUuS_ND)tsb>cJ<l;<?Q{n
zlD~qC(jo;Fr$Xa>COTS2izxcPNCVxckUn@$-uL%cP|LKxx>jD2(HAL~oHX;2XL~v^
zBs7BDn9{;%=e19-g_r=L@VaMYCXOdpgI4?1#!J&7Gj8vuty!kl2B7%cmV5~V2q7Ct
zT2uERT`F=OLCOGoD0JutKmN57K|cCe%x-FRbABs#X4`m40P`?I>S*Xi9Ma`+*(ai0
zwGH2e^ZZ?gj%2XzR>CCH@afwn#Fquw%a(FGR59{4MPFvQ>#>5VHg7dh`)WHqg<5e}
zC>K7+5)1~ADrk-%!+2e85O4@x)8VtKYnSR0iiO!tTf{^I2c!Lki?~bTVpfOs8FQR^
zfZ~Q;)A0u!O}g|O3@ZI+j`ZS_sK}v@Ic`AICu(6%vm|aU0;eqTa*?0XCR7nBFsGf}
zz=agS*!F22l!q-WJ;8`-DA9!8s-Of)hJaZ0sGe=&;!=OPHq$rEt<-xiB|40ElkXNi
zgBA?}`B`W~uK)wO9dhyMQ)5_^mGEX8HIA?WwE{sgp_ucq5ugP&hxNgbi;p4QCmY5n
zJCfP{8x_QQ+~+%Ev@T1zG5r+1E~y%%EB0uQ7u(AnSLTO&CH<!hbk7;g4u9mAX*627
zvd0+TmIp$l3HlwW$l@DAp_0Of&`^MyUt}zAn7{YeFyBhb#YRw{N%sDdk5$+^!|t7Z
zWMx@7@w`d#c~A{Y3BQ{lt|g!8ZUcEpWZR2t@p}0LFB(kgRp{etuYz{6=}@KK)?Z$N
z1RUvN{uF3s;+0KehjYhSm6>$#0*Cx&yLZA#B|#Ow{_>;wI85VJs0Mx;av?O(LsVfx
z1LP<gqES!unCjJ!W6R@d{3t+W_Xoyv@OE;&%GLh#eq9w|2W<`G<`mL2<U$o$f{e}>
zK`(RZ%?hJ0V}w0aG#>DNRy3}~Ng$%7>mw7s-hgxOi6EjGXu;pCyYFxWh!R}OP-W;L
zdx4NBBxSqq=VhGMclH<c!1KcR*mLq;g&2uFwmpgi%?KZR;)duIg8|PQle{yg3&z|d
zaM}9k;c&J`nB`=wb12+7{N1M#W5vlRq?ac3*AfPR<USz1J^X$1+x;eEYT(qMa=<M-
zI8nrmbVuZx4t6@3o3-F=@;aoHR#f17JoG+Cs@HlnO=$NS>4DB~rd8gD<1t<ElIrj*
zjuZ>C^t2lw>bunLosYd5&S*nOL(AI)zA`ApT!oi-r0nm{PN@!*jlFbL$!gKbNVrf%
zOFHN7m?C*?9gmseYAO38Kgsv4280tMK%XCDXb96#UQrb$uK{UFtrKrcQ&}su1C?Ut
z-<6_9^DEt&fXyvriIMhDV1!zT(!njU^wDAGIvb5F;@#39CYzANGag|wE<vBrce2fg
z52jS4)Q0wQ7+S~Op~`Qj>@*lena#AnSvMIYk|&TIjj^BcUBY+C=Pe09kyY~2P7=<J
z<{Ky8%^NJi>Z&AD$iCr(l_7YPwXU{~_QVJ_P?yDX#tQqD)HPBQ6U_hSJg^-Kt7t02
zy{#!~pzi(hjbgz`2|Y%<<D{M(%DM0}G}l-UDCxc(8=ia1duvj4o)vGP?$o%VZK<r=
z#rH(B3u7gHcY;f3GTH<9*qO(E%G8H$uiGPj=yjXQ2M?b4d1?5!I+{#5ybIuLcroL#
zqY`%fsfib{+^MERZLt{RHEXDSNZ3T|>$G(l<)vy`K4qZ%`V2TaVI(4xf^R-V-b<aL
z{BgEkiG?Qr_&NxA{+r3X=jfJ{9dHU=e>3v8^L*Yr15PG6^lqzuv7*oB+nTHHKVC-6
z0pncAW?j$guBf2OtcBO}n;{o=^zCNT(B->Zl2ghj)j3;69%KYEYDZbYRmyOy=jILL
zK%wYCrkm!49<zxJU}<xkc9FNn=&vU-1WN{+_!7R6Z5qjc^{raoA}o_+bNaYRcf%Kp
zW8l8Q*1tz$HtKxwDRnsHLZV)#Id?K#o`OS6{CT*|V+?23mo8VnMNf_oomNkbUnc@F
z+KyYk?`e<-JNbeUP|!^2RlNf2MN6ugR^l~~?iQ!514>R~r8Cst6zq_gegvk{4mvCK
zt9<8w2sQ1&Nk@KL-*-b)T!uWeTxWOP?4hg|Q28zubfHeN66QS5`<_T<FzD^7x?0`6
z+1&BHGuFMc=ShoSalo7y+07tojLwg4?N`r$J%h4p*qiPLn6aA!N7=Ej$a0;U3S*sE
zT7MdDU3*k#XE+^XOJIuZY=K={1|<?izYIzkr~_Lw%v{OPD0?be54}2iekD@#^4dmc
zK$_F~-Sf@)sVEm`DV3SJ&*BytA?j{Yr~F9QnW+omlUvOJRZdG<l?u^>wx4jp{kXE8
zVBjnt(E~DW-k@x+TLj9bdw>Y4G`+`;vDUu@S93vF0rjslH`^T+&mElDxvbp?Ya0a(
zi4orJDQ#A<nXRMB5+co(cC^hPH`LEdl%g|_j<Cd&S+a{nsUtyZj$a@h(E}D7jSVT&
z#EL;4>4B+&JI#wG{ikc;JkQzEyj{%pAi!z1INYl@xP-BlJcI$I%mz#keBf4$20^3k
z%J0o7#)O{ARhXm$)~|d=J1in6NGQ|{2;gSrrIF6A%TwR}y7&A@_?(u9H9dfSCmhX(
zma?u|3OIqzl9+doD{oE*6FeR5;ZoKwK$4aAAZ4>}ka4xpJ~!aZ5BP!ww&)V)r;|E4
zY5e4MADg$N`StB+sVmxQR=e*EjTgsy^ZGSN)u|IH(AR-O+-|5?5+YJCJy)P4oe9-3
z`Y&xH$zfhy#PzplcPmV^o1Or*nk#V7Lm-g-f~=<92-pt-2lDKhwVbRPZ5r=1#A^dY
z;i8BhTI0`NHuicf;kFm@2Oqi^TX|)JW?8N$sK#V(*~<R4{KFhqGw=?!H`zD+>(}S$
z0;r1x3--DKb4c9s*>8;vrcT00^B3>GKjsotIF$~GEKE~<Hmetrkkf5Z@QGvl5%TA=
z;LA_V4N8k8O49{#m|be9_PeFwR3@WS1Az*9e68A~v)6F@^%6ax>~vtKB8Xl_&Q-3h
zZGZJIIgX7cxO}-i`!>Vw3Jw&Xsnlf0z1<EBoz|toTr?cUxEldKhlGf!lKIAa>-FL=
zq|a1L?>y#u6Q&G)!_9e<)xhq35FbNAG_j9Nimz|n9+wxE_k-UcMQ??BwV0E}siRd3
zZ30mYZ|uj6<FOx_TsYx|yT>AXSDFt)@6xs0m<}aGDT82}RXN7OWtIYFz-bwU)?U~l
zC^-zE_86d+8x2~AX?@6=j}cuIwyis#mLyqO)wH{RpX@Ti^1i;J(!`frT7ewu9MQ6`
zMd3FLC6dJ~$~^t@L&)dbN&b3BuzE5V{W^f3;0*!mpUIm7AQTu(Wj*VNwP^fT1}>lT
zz!2Se{?!krIJNm|wC{`u&43IN-v|`<U1?HMVcL|B^ipZ1s7I{4e6`+U=c}aUu;v|;
z*GPR&ph6elEg95kOjvj`NqMGt=t+8^o86&K;y{5*O&up-oKKGWJ>7hv^X|8++Z2bd
zO!{!JX+q{@^|@j$#cZABizq5!&}ygzNUUV_L?>7kgaQx`sG^rV{in%aP2b>!wb9SN
zo{p-Z)3_PVsHYa0z?OJb$V1K*dT@N-NK^DXRmKP2(BP|*`b)Pft^uV_Sb5E}*MK0S
z6WD5h)h{&_E=aqII>+}U`Nn>rFIa3=;<k)z<fiBiWk`C?QNXXA_0YObkty>U?5i2i
za`?4f;^uoj&jl+WfROTtPMz5`1c9er?$bTx?H!2T&Q@lacmF-6Q2ORWt+>o$n*5`B
zcEaL(d&)(53i(No)d%al6|4Rw=^3W6vd-+<{I#UbeL-Q`RkRf*y?{QM(0kQu?j!)6
zdd|M#c~`j$7bb%VHxB2P_%@rZj6ZD0kaGv~lACEtJxg1^=m5G(_H{4yriE%H>858{
zW*GAGgX9KoFVZ2Y3ul1&Enp8*HKxgK0kot!$j(3l(y|PCJM9b!Mpj|$PrT1F0_A3=
z6gBtV7$Uz{Kd>|g_9tkErC00AO+D$_g;?u^3FF~|!$>M~_Gx+E<89(x<2JyF(|Jag
zEfh6@?j5PtUN`oaTj4^Gs7F`xyzvv4_VZYQ$4<$_D^mh}g!R`C<i7MdqmkDfjmFzX
z=UYcPGsYiW!d8bFGJTgmuRsiB{A?>sP<~*zA8^td2ZRfzdOCLPAs(~7)+&nL5rTgB
zgvprFa}TRJx&(}aE#r<<+#a=;XC<X_1DNee;4SeWj%JtkXv2FekmHq;WA5E==NZ1C
zu!vWuHw0z_{A&z@m-KcE875n_NV4MOqU)qsJNhF>_<65u@-LMg&N)Qhovw2$RPLVu
zj1&gAl5hF=<U@*u1FQDv-KL+-nE*w={K_1CYm%j~?YIW4D}KT=oEg`^!RTmZH0M)5
z(JLP-E9ToBLD?J>5$DI2dg73ciuw4qW@<}d;eN=)*?<Su6}gqq_|3=n_So9d6*Q@J
z)rFEWsx<z4se<wfq-00bNc#{9oLz{*1&y7Kjyx(lEt=`R0f~Q(n(6907*N22q1WUl
z;S(YD;$GON>qJ{>v^yYccR3GwF}C^N;MfyJHOrBLBhm$i`Sx2!XSbD5rBpBn(>q7s
zEi_SUp)s=%>vQQn@dL@|>n{mFwPiur9OG9&+khy;SbG`fZ)OLey56dm-H}#qJI|V*
zcR0g2e_;INWq`^IW5^BjDfqhd%v32u{v}JfPeBr63}LghG=L@&p-h`^jb#O<NDFRV
z(`f{C05aS-PxE#F<e0-3%ucf6g0!R5=(-{l?=5|tXHQ#80=?$2zBMzoCcbg5jDAbb
zJ^iVfBMthmDa^6)hHVlD(2RWVz<Cj32Gpds`HY+!Rhug&&Y0k>xyf)}7Ul>9LG22I
z?2gUVNl(ElTBEWH()_6jD~cI`)d&SbL6)==4mqCH2w;jWN8(EW=V#KDyPB6xys>lx
zgJZL?7Mad#(a2NeQ{?!-;EW)pTLGm|s&^Mc_X{1#%3ywr_X(3Zl{vLJM`-E|LeiUw
zJl#aycWukeGR`9}Nf1Q3r;~^j@AE}?3Vyzi>$}CE4Xp_12Z0xr&{6_3s<)ZiqZhh(
zwfFsiz^9W&>Iw!<kdD1osCy!QB=#^wk-P0`?}ON~48od`thz97l~(Qz!Ss=O?q+Ej
zEEqo-xAdY(!Gw54tekA|8S`+Ivk$$3_>CDdbKsbDF7!(4?Z2VQgJ=M#BTb4dA^Hx+
z7#)fH7SCqs)OS{;1@k>O^6<nJmLGC_*4|$dNA57IJ2+XTJIh5+$L-w)M`f=5`s8S(
z;QH^${HY-u+ELO4{pKVa`Ie*-Im*qGf(ott(xnIZQ9wdVet>9@CeaF&WSX;Z9K0mO
z>HFw%OgD4C80(1Q!k>Y@!YQpCw1oXRFz@LK^5G|s&I<xdQuJ~Upt?J*0$$umjUQj$
z^0stB+0dpDj*dW5sFw&$`<g$wv6BO53Ir|(x%JUR)iJ=lWe^yT);F_C?XLipRw1}6
zFiYxagpLE&29sk5Yi~v?qi!OpfK#Bn1tS2|QgbOQ)-EfmjuAxUxU|WS)&t)$Z9x!z
z>hI)7$`G%RQY`S-j>m@5x1z%Dkc6gkkSXb*5rPC|e^0owWbF1g?01MPq)(IyqD<iC
z^!*jFH8EW=ZCs=Kd}8$Th4kE`-{at+<-49HD1u93pzW))xqk7KFq-)2j;UZ-K7skn
z6vin2&rJ`fgQ5iz2i83aF=+31Kcwx<loQpcH0>`1V|AV(t#S7H8ZU@A@rpOHl1@E}
z{Kr%8#W^(eJcD@G#2%h-qW`J+g2SO0uRqFl`L=n&R)OAux5%BDsbjhgA;K1d0M<0N
zL~mhFsGYTn9KBXdss^Ezx~10D-jA`-L{EX{1B*Gqe4#V63cm!)GmK)ef(BCnMg9Hq
zK5At1Z6pMl_n15HCb?OPlp8cUN$eBZRQlPIiRLAO3z|?iZ%69&{aXC1#yP+&zkYNf
zhe-(WjO0Vn`|~V`KR!ryp^n~eNb`o1>R~7|uXk`djd)v?f>=QeR2nkJ()bs{o&&?0
z4PmavN^9ZvZeD@~%~s8a1RE~;+DXHUtrf8+RQ+c`mGQhrvs0c(x=LV_!GfW{4loYm
zk|9t3mfDp=Mh7cjezp&9HOFRN(h;u^0-Y!J@f+?gUfAaePE7GR2C#{0qz6U+UwD6c
zcu=@d(0l|D-6voFx_0=u?3oTUtyb7K<Xk>U1N_Yn80c)WUiHIYYS?lM)!R`A?g0=2
z`J%1IsD;Vamp<5<kJpipOZsvQ-neo&5|N5u)RsRQ@M~(&h7{Ajns#PPAWZD7;ccTp
zABT9)(ofBNRrHU%yV7Pi9kW~U^QkVkq6gWBeL^C`^foK32U<wEH#7xv^z%FQVKrX*
z$L;T%DwXZ<0f2eH$=_{GL#AN7)aJAoJ*V>Op15Q-kHB{Cdu1kT){2ftf3%j<zdrh|
zSZi)DPK*A~#=~fMn8!g<FJG@9rv~;P7Y4e;JC(W=0W(k#o3KxS{qW0&id^Vfs;ISI
zCcKNo4@H|OWp~u!+k*l%ceoZ#*YMXFG=MpO6Rdk?C)k>n$yKbE=U>zMLw5fB+aGjg
zzSrHQ9W64JVr8|xv;<h*c@Xc3;p<v`&&p!EJUp$5_u`;~yoDq`(&;0jhg#4$FdO9$
zdpUf2)mExAD!q?8V-nd#sc|m*AGa@nf&XY)754_&TDntE0JHVw&E`Y(UhzPT!w2l8
zF0DuboU+JiN^hMX{WZbq59JDqylq8S<z(N^#=`~Bnjeg4cx0lT5WPPLH0;pyabMP*
zJ*VT~KErmns<$=obf*8@TCoIft;~nfgbc$c9ulv_zQs`YhQ?UM#y;k*OHyp1*u;jF
zt-PF!!8xJWfoi)bhJ;eX7Pu*ZS51&$>0n7KS8qNLwN$xKObJ(sXh2U{)6Qu3xKXT&
zxpbd?=+oNLhLVVqV+eLgy*^}Se_huHq@itt$nJO(W5AW7kP7a61TJ27H=*?*fnF85
z?x0UIFCpB`R_d)72SrFX@zc^KaHpt~JWuUZ*H4hILpCct6~nJ#Ogi=|^4T?uM+(BL
z-;<CeNSB!@L7RnWu<_3N9s6A1{t@VmbY4Q>c5yLyfig6O?W6O$-vN^sc2i_$nNe%w
z=ws)P!NB{mbTg)NiavlzzOohF9bBx|49)xqL_~JykDEk1UzsQ=HR#(d9GAjrys)eJ
z$4~glclqZ$EW=9)QQ*4CYwJN4Z05{N^tdHA2P?9Vor!;pF=Ka~8ceQ(?6OGw`iMVv
z@@-A2MCx6RIwgZMHEAkN0v$)Ixre#{6CQEU-8P2i2c(_VAHMg=B999A9}+Yl-mZ_v
z$M4(qHGB{8CW%{DZevI!FmMJgf&rucPFB7y>;ot-?A>)*Z3J7aH|(n;AduHdmfUTP
zHJq`j_$@@mr;p7qI2<*{Ymw&`WJUqh%|T&*-xkyJ<Ov3_@EZ1ZR(Xt3_co7ymU}#^
zMc*kuJF(!odlwy(^2eaUD$Y1xe!oQ?O|n3J4Y(?oj<kDQ^HvWrT^yM4@4Ir%|J;>J
z9%MJf*21~q?%A%Cu{HHI9@!f@>iEP(Oe*EjEE)rZ@$f>iKY6zOOX1*Rph_$00lApA
zS(3Ugi8@h#%a0@!*qVNmzXN}@FBMvQA@xxnFf)P4m?`}o``@>h>|*0Vg3mkjf%|4z
z?V}x5@3O!@h`dPIsD$#+C7+QLK;EEOx{o&w@pn=FvL!sBMT81q?}p#^T0eIn&Czx<
zrup`PCt$552vQ%(8LL9Kb09y~@WbB=+xE{bMScHiTUu&$4%J!;A&L~@4Z@9=Hv2?k
zpjjPYtnDH8rWdxNvcK>;Y`gh(y-dB(SeiGeeA1k=oMe`mXC?CbX3Rdp9ve{W#XrHv
exyhJIvfdVN&6%qYSMmRUfYv>Iwc5M(asLBsGyv8B

literal 0
HcmV?d00001

diff --git a/arch/arm/ARMnn/docs/Doxyfile b/arch/arm/ARMnn/docs/Doxyfile
new file mode 100644
index 0000000000..dd87daad4e
--- /dev/null
+++ b/arch/arm/ARMnn/docs/Doxyfile
@@ -0,0 +1,2541 @@
+# Doxyfile 1.8.12
+
+# Copyright (c) 2020 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
+# for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "ArmNN"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         = 22.02
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          =
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO           = ./docs/Arm_NN_horizontal_blue.png
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = docs/
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES    = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = Yes
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines.
+
+ALIASES                =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
+# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
+# Fortran. In the later case the parser tries to guess whether the code is fixed
+# or free formatted code, this is the default for Fortran type files), VHDL. For
+# instance to make doxygen treat .inc files as Fortran files (default is PHP),
+# and .f files as C (default is Fortran), use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 0.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS   = 0
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = NO
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = YES
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO, these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES, upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = YES
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+#HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+#SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= NO
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = YES
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line:[DOXY_WARN] $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = ./docs/01_00_quick_start.dox \
+                         ./docs/01_01_delegate_start_guide.dox \
+                         ./docs/02_operator_list.dox \
+                         ./docs/03_build_guides.dox \
+                         ./docs/04_contributor.dox \
+                         ./docs/05_00_software_components.dox \
+                         ./docs/05_01_parsers.dox \
+                         ./docs/05_02_deserializer_serializer.dox \
+                         ./docs/05_03_delegate.dox \
+                         ./docs/FAQ.md \
+                         ./tests/ImageCSVFileGenerator/README.md \
+                         ./tests/ImageTensorGenerator/README.md \
+                         ./tests/ModelAccuracyTool-Armnn/README.md \
+                         ./python/pyarmnn/README.md \
+                         ./include/ \
+                         ./src/ \
+                         ./src/armnnConverter/README.md \
+                         ./src/backends/README.md \
+                         ./src/dynamic/README.md \
+                         ./tests/ \
+                         ./delegate/include \
+                         ./delegate/src/armnn_external_delegate.cpp \
+                         ./delegate/BuildGuideNative.md \
+                         ./delegate/DelegateQuickStartGuide.md \
+                         ./README.md \
+                         ./InstallationViaAptRepository.md \
+                         ./ContributorGuide.md \
+                         ./BuildGuideAndroidNDK.md \
+                         ./BuildGuideCrossCompilation.md \
+                         ./samples/examples.dox
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
+# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf.
+
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.idl \
+                         *.ddl \
+                         *.odl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.cs \
+                         *.d \
+                         *.php \
+                         *.php4 \
+                         *.php5 \
+                         *.phtml \
+                         *.inc \
+                         *.m \
+                         *.markdown \
+                         *.md \
+                         *.mm \
+                         *.dox \
+                         *.py \
+                         *.pyw \
+                         *.f90 \
+                         *.f95 \
+                         *.f03 \
+                         *.f08 \
+                         *.f \
+                         *.for \
+                         *.tcl \
+                         *.vhd \
+                         *.vhdl \
+                         *.ucf \
+                         *.qsf
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       = *.md
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        = caffe tensorflow cl armcomputetensorutils
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           = ./samples/
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             = docs/
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = ./README.md
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = YES
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = NO
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = YES
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see http://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
+# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
+# cost of reduced performance. This can be particularly helpful with template
+# rich C++ code for which doxygen's built-in parser lacks the necessary type
+# information.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse-libclang=ON option for CMake.
+# The default value is: NO.
+
+CLANG_ASSISTED_PARSING = NO
+
+# If clang assisted parsing is enabled you can provide the compiler with command
+# line options that you would normally use when invoking the compiler. Note that
+# the include paths will already be set by doxygen for the files and directories
+# specified with INPUT and INCLUDE_PATH.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
+
+CLANG_OPTIONS          = -std=c++11
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .xhtml
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            = ./docs/header.xhtml
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  = ./docs/stylesheet.css
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: http://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the master .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = YES
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# http://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = YES
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from http://www.mathjax.org before deployment.
+# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES, to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR             =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sf.net) file that captures the
+# structure of the code including all documentation. Note that this feature is
+# still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             = DOXYGEN_SHOULD_SKIP_THIS \
+                         ARMCOMPUTECL_ENABLED \
+                         ARMCOMPUTENEON_ENABLED
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = YES
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = svg
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 500
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP            = YES
diff --git a/arch/arm/ARMnn/docs/FAQ.md b/arch/arm/ARMnn/docs/FAQ.md
new file mode 100644
index 0000000000..0e6fcf84b9
--- /dev/null
+++ b/arch/arm/ARMnn/docs/FAQ.md
@@ -0,0 +1,58 @@
+Frequently asked questions
+==========================
+
+Problems seen when trying to build armnn and ComputeLibrary obtained from GitHub
+-----------------------------------------------------------------------------
+
+Some users have encountered difficulties when attempting to build armnn and ComputeLibrary obtained from GitHub. The build generally fails reporting missing dependencies or fields in aclCommon, backendsCommon, cl or neon. These errors can look like this:
+
+error: ‘HARD_SWISH’ is not a member of ‘AclActivationFunction {aka arm_compute::ActivationLayerInfo::ActivationFunction}’
+
+The most common reason for these errors are a mismatch between armnn and clframework revisions. For any version of Arm NN the coresponding version of ComputeLibrary is detailed in scripts/get_compute_library.sh as DEFAULT_CLFRAMEWORKREVISION
+
+On *nix like systems running this script will checkout ComputeLibrary, with the current default SHA, into ../../clframework/ relative to the location of the script.
+
+Segmentation fault following a failed call to armnn::Optimize using CpuRef backend.
+---------------------------------------------------------
+
+In some error scenarios of calls to armnn::Optimize a null pointer may be returned. This contravenes the function documentation however, it can happen. Users are advised to check the value returned from the function as a precaution.
+
+If you encounter this problem and are able to isolate it consider contributing a solution.
+
+Adding or removing -Dxxx options when building Arm NN does not always work.
+---------------------------------------------------------
+
+Arm NN uses CMake for build configuration. CMake uses a cumulative cache of user options. That is, setting a value once on a cmake command line will be persisted until either you explicitly change the value or delete the cache. To delete the cache in Arm NN you must delete the build directory.
+
+Many DynamicBackendTests fail with "Base path for shared objects does not exist".
+---------------------------------------------------------
+This problem most commonly occurs when the compile and runtime environments for the unit tests differ. These dynamic backend tests rely on a set of test files and directories at runtime. These files are created by default during the cmake build. At runtime the tests will look for these files in src/backends/backendsCommon/test/ relative to where the Unittests executable was built. The usual solution to to copy these files and directories into the new unit test execution environment. You can also specify a new root path for these files by adding a command line parameter to the Unittests executable: Unittests -- --dynamic-backend-build-dir "new path"
+
+
+Tensorflow Lite benchmarking tool fails with segmentation fault when using the Arm NN delegate.
+---------------------------------------------------------
+There are occaisional problems using native build versions of the Tensorflow Lite benchmarking tool. It can be sensitive to errors in command line parameter usage. A simple misspelling of a delegate name will result in a bus error. Here is a sample command line usage that is known to work for the Arm NN delegate.
+
+This example is for:
+
+* Execution on Android using a native binary downloaded from [Tensorflow Lite performance measurment](https://www.tensorflow.org/lite/performance/measurement#native_benchmark_binary).
+* Uses a TF Lite model that has been downloaded from the ML-zoo. In this case [MobileNet v2 1.0 224 UINT8](https://github.com/ARM-software/ML-zoo/tree/master/models/image_classification/mobilenet_v2_1.0_224/tflite_uint8).
+* Arm NN and its dependent libraries are in the current directory, /data/local/tmp.
+
+~~~
+LD_LIBRARY_PATH=/vendor/lib64/egl:/vendor/lib/egl/:. ./android_aarch64_benchmark_model --num_threads=4 --graph=/data/local/tmp/mobilenet_v2_1.0_224_quantized_1_default_1.tflite --external_delegate_path="libarmnnDelegate.so" --external_delegate_options="backends:GpuAcc"
+~~~
+
+Arm NN fails to build intermittently on 18.04 Ubuntu
+---------------------------------------------------------
+Building Arm NN fails intermittently with error:
+
+c++: internal compiler error: Killed (program cc1plus)
+
+This errors appears to be related to the number of cmake jobs used to build Arm NN.Try limiting the jobs to 2 by modifying the make to:
+
+make -j2
+
+Arm NN UnitTests fails intermittently with segmentation fault on aarch64.
+----------------------------------------------------------
+The DefaultAsyncExeuteWithThreads test seems to be throwing intermittent segmentation fault while running Arm NN Unittest in aarch64 architecture. This test will pass if you run the Unittest again.
diff --git a/arch/arm/ARMnn/docs/PerformanceChart.png b/arch/arm/ARMnn/docs/PerformanceChart.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a6d8dbb63a6494608a6265fb8a8111a4ae006af
GIT binary patch
literal 63529
zcmdpdRZwNYmSy4Y?gbR??obr&&c(HGcZb3q3U_yRcQ4%C-Q9g|z1KZ6uP6Fr`ln;&
z<Hm`|lX-5+%DvZKxieH=Rty0a7xv4SF9;Ii!irzMfH8jg0{#^m^2-+xS?|zf;0qXA
zadn3;Ul9BM{y-9F5b?fzd0dqc7F2dkKU;^=LOaBRy3)l)T=P&lE!8~Dbo%P&q1j-G
z73Ogn>Oo?Sq5wVBDz+XXY4ur=x2kbk+SpK$*NpLV&&S2LTI|;-+12YykJg5!%|!>r
z<i5Sz<PXv>I0SG7G~cNV`>?6lBVr8xFCO{XX0MgS+P7fpi#ZP)zYkp5rKuh9E@GM<
zYU<Mj7bk-xCf8$DhL(b!?FYU~b7PRfi)@WNe_?gu4M8Hh7>cT+D7YREFlDJ?LEp4|
z=aecl>N`5lOYyKXGJd-q_3aDz3_i}(h2cw~JJs*YFDhV860X*t$thiwai7!T$)Vxl
zY`{wVT){NjNEe#US+(mk`Bn)V!<=mhI1?y}#D3h4YPLTWuvF{7Zb)Slu}kniHR}8M
zql-;!TFy1%<6S5jPy)_rhDTbdAgt*HT3e6EE{&NCsEM;$woG>X<_U+!cFu`kdIUxJ
zt(`5S?W?GG{*_7Sr*>!9!g!i_5lYn9*bSEG*eVv|s3x@QBWhBh*{btVSU485vu$^f
zM6h`u*RcXiXWpIbeY#vM6>Y)iF0M!_c>-x&1bbKrf?@s=<8eqhy3jvnUK&<$6eXxX
zh)FSwLt0G@I}D4V(vfWj9RmXbE_{w-el&?ep;$i0QoHC4;Z|nlkfgRl0yTXVHTaGf
z&4BgJ65C6s6cXw}bo*vlZYG_Ynj07M;NV4t6k+o`J?^BL&?-&O&q07Tix;|X;e5E!
zX+-cc<e3fk$>UyrX-ti-UI9k`wF{jPuE*?vzZh`mf7?5IrCZE*{rH^8u;sF)?!3rF
zuvx@^a!H}s2XCgdU^_qjjSo*WaHV%b+oRCNkL28=%sucSJf~R{rD&aR6w(_a-i;;n
z_3e6iEaS;>pjvo$<l#{<SJFL(Zz%r}MCM{It6o7ma7WB2o*9U{5uD)Bn&3Ze7sC4D
z4e2#iMo%idoV-vNFSLQXOXu=7xFtNM{0Ch)eCb15OACQV+nvbtvJ*#ZwU}o^V~L$5
zA*@wN2ljb^eMYymg!Gg3bwQ&q1f0B9@VMn0=><jw-!z_nnPu*Q8OL$YGg-D$I9H|y
z>~`@FZ5=?q{5sy^MUol2)c+NYYvi*=o$&ttxM{1lRftEg+u1%xnTxcBDH+Y=R@S{S
z4l&IEwpDvX!v9`l(5TAEh6fde^}#WuaIE0HgJsGCFk`Tyw*Ai8ZR*XfnUrPT2mZnk
z<ukKHfPrfPH9CYD8W}4;zLcSJUMKF(TD2XL{awrx_f;gWMBEkqxoh8R+I=C1Rd!fS
z;?pI$O<?sjW)~@`?uR9M!Cr)P3RsPPgz^NZ7A8#am~*3{Ejj0F^ho^g;h^cQEEnqd
z(LArjJ7o8If4)X=NFynTdlVce0*A@Yo=7>qYM(>c<%M0=Y^b@{=$yI=0$Hf67$bs|
zsAMxGuWEB+j(7$D^)hjNqryhF*@*&p`bny`lT5Jd4XXgXH2zn<@V0stM7zb*fxR-~
zRc_{GMrv?N>`IS?hWH1jYvOv0ISWYAY{lcT#R>5NZ41a%?^fmp4;M}=zD%prt!;bA
z{KxrPN*+9s{yb*V0U*e^4sE$){lb~}UuhuyH(5q6*sz78B9f=3<m+uVH-6%Y#9Q};
zyl%Q^2IX8FWndrgeMwgDu8<&h^?60jL?qJdc)`4YtMVC<6=1Cv_a=xmZ_YtX+U+MG
zuNr$G3lAR7pdjcqo9tZfJj<W4!nPyjK*1H+D?*%0XbSbZtCr&TBC%Ft(d7XP=kbH*
zs)uFwDmooOBTT@_RS@RYk`?R~RgjDtW8wnA8x-Gf@7t$6I^@BE>cBGmUUaeK!|ow4
zd>W1t_e=9RZN_jHB8y5Z&eAlQj9xbRlm(}2eLOmq%|Zf8KV<V!=oWio0x$bxV=8!5
z{Tg+tif>K!c*Zf0TT6CQOQ7LSt*-E)WsZ_Wh3cscOUTLq%P@b)$x_b0T<oE#w#+}R
zfa_x*{E$+sIb?1APi(+qr<XxC9ITP9RNRc2E;N=g3YBNn@J9H4t){5#s|g3W4=I^X
zpO(z)ujXV|ipn<Tx1d6^<U3?LE1sJ<JTrTN=eREY+{{8QjSB>4$5#+#Ef!R|+mZHM
z%fy_ZP~;C|9)zB1_1S8JVbmumh#nh1QmuAS+oMVBy9(1F6`^&ebr+gS-L5EZrxWPl
zk#PA@BhUkB`4d~N3);FX)U==Hy%B$i5G6;aoevGMam=ypOf<_j$PmIKNl@`bzOjdi
zTcXD-QO-Diz~v8dkvlqb$<Clv8Qfsulqv{^5haL-h`@>QE<cv-pXpgyMcy7x8n!7>
zL?~ye<RgapL@^D{eqej*xXb^eHa(tpUjEmPNyg=MnxOg#OI-udPyCEv`*i`|+28N-
zuak04t!~y)YG)aWM6o!hSKGZDn|VT`qEu0@+JBaE;_wxmb8xjK$#oM?)4`TH>IFJq
z<owymVxyayT)xZyej2xmxJr7%1@-Xo(Dy{9CtQze><}f#^D*6Eywh%1x&(t#$1Bug
zIFHVnF-KPzi6j|W>TxuWBq=4ajHFIeyyR52L|2fEB$;6kquht%@iRaQQZ)ZNd=f7t
zd~_e0ym;VtMh61y|Jg@;plIH7z5osmF6n=16pCAW>^cul#T^{^T$u=QpzER@djz^_
zE92QtdVTr()_$vZgY43$i0kskQyOp2%RkRxxk6uRG_OW6z?iP*c(gifIW<+sgQjOc
z>C>kWo3o^qKc>eznz>nC7y5!Eq@?!ggMl9sRH;g{R&zKP&b5<Sbs3nSdMd7YYzqbv
zh!%Ty$G9G@+_WRR3vje?8jp)RYOeV*!C+pr*t*P^@{L{c)H>>E6Xu-m*k6kp?vrRf
z${{jKR;9LVit>da3n31|E|LU_T^cvG(~E3=^Obz1sRHtIy0Y0_ClXV(b4lO{DcMCE
zdeXs#vDVONZ1n-|W0?mGi#}bLW&8Y6@@SYf2`3mK4st%op;dqUDQb9-$hM4_FA57+
z#8HAznZw$t$b4qEC=c&TUPENev?$6yphAv7B@&ub{XfB@9A%Td+GLaGet#kf5VE=9
z7~`N`2IMH!^T?k%=y(4*jhA3&m1Z@j;|<w|6OdJQza9-YeO1sL<9wD_0K^DRl9t;Z
zcD00#Y_z+9(a_L<U#h|K6y0;?WtvvM{0!Vca4_0MX2olg!gY4E75cu#&iljzpGt9<
zO<ls*4xhXSE+%&#T&V(;X{2j4OICS$sXUY_nHO?roebmx5a8xr+qaK|DW<8-qA|@f
z<3Hz_<=)<E4h7*MRTzbk+I%^kjjMW2f2-6UVi)k|hencUbQz_PoKBYF`zRZiE-{LZ
z+#w}tnmay!qmZc4$JgS`4s{R6?v{CC!=-$C##cV7W1VK5^o9|gZf)dzV1(owN!~8&
zBAGhib1;jMY9#8@YKjhsMiaRnY|kub*jhYos<9e_R^Ds}bIBaa)`;mxxI4`9*?Dd|
z)4fbtw_viESLH-F0=fS%T<&}W?oYdMUi@@cSzK=vD%OarBd&g!i%#=q1kJF0si3WO
zC#$~x&<{s+Mj4NOc9Wk-ORKA-l$6jALCRwfk`&}R=}AN~P$NDuIzQ}mbBCuXbJ6h%
zEvPVBAuVHrH22Ux&~l9wiOxBFHn~=OqX7~~2K6B;GT{Odn49hWqA`@V&FL6B3VU?=
zCU4z}%kFziefyj#T!2EyybxDqV<3b4$L#F(>#&)vTCw}dB_9;M2QbewK0F})ut$<s
zC3JzNBaBI)Plh_}a4~7{#=-JNth#aPPw^6(&0-Hn)b<GKyodcU@y@Lzg^5RYapiWK
zG<m&SDHDpA<3A_$<+gODzI0o=uA#R?6YZ6P;hM2!tH}RXL6MQRTw$8$4E;II9+g%2
z7b`TPA46<m{r!kZv!vP<s>YCCdBeog7ZwQYKHj*>Pptfp#eURU@$qU+kklv$FE3M)
z0KM(sfMbL!c$7pZLcjfokkIttiKb(U1D?sGrAjMzN}_RJx?8UAvNM)r>Mjx%7M5pr
zsNed;ch)9wt#tfqk3MnQjB0HFCjD3((hd|K24U9C_Y(SB1lMNTFq99~<4m$Q5j9nU
z)mr5BRkoy{bIYME>8k@vO=rm&OYFATUQNF#N)iEqF5%s`xfP^jhxc8d-bA?<-B)%J
z_OO0o#H4||p`@H}h2nQ!+RaBaW3RQZ^Je?j=qN}@Xvjc{qMC;-88ynp1@V_9l=tnU
zi`y8;F*E8*Ap|)$`CeWlpXqc#SE6D4(W<LwGC&13QGV+o3$%zuK-gt<M40RBjREXE
zKAl^8(`q>5K9mg}S3)qJZQHAU<ErORd1_H0pf?Esl7msvVs$j=o3aE~P&6(rSwMlg
z>+1vC%^`!{@A=^mRc9w#mKHz^`E_e;7)G5pjLLoPbBsW=v%`MKB$JFJR;&kx8TAbb
zQYnpA@R;5Wha%8I%<t4zk)*)h)_C48CO23kRlZ*dd_b~@D99P<iZG|z0x7;KZuw$$
z!}ekp0&up*>yV4XR=StX5Z~sdY<DNIu9CSx4))uAbhv-rkyQ)IQ1>fhcR;z>XRv)O
zMkY*A>5WGz%YI0yoZ7y@S_y_w7igo^@vO1i`4XYoMdSn7l9$_|-zh0xr{6D%@?6#c
zcv&n}0~hpdi<1GYS=0A(l$)QQN={0_`55GhOP|@Q^B$MFIYO7tcugDK=$A+Xc|;Db
zNc?W}2nh+vZw?!sj)P-PQ?QAw%W$RBmp@7+@PDqe44EAg#-48vjY?$(_n-QVlz2(d
zh8GxPdUZ9rjF<QpP>irjuZKb>w-456?qTw(`po`#7r!4;;9)#kun#fmKritU&pN#y
z7uif_3&trAAuYQ?v<(*EKkr~2Gr7q2wrh+)ajmZ7GtKaP`3Lprz%*%=t!0yT7htkm
zM_<kQsS!!SToq@K7qTN8Z(N`tY{nKw*&vD%I#f`?_75_|52Ov(Hb#YE{|E;~|KEMk
zvVs=S2>zsgk`>K|f`pGD#AXzB*Xu?;_5~68zblXrVzt`D00_xZYa<LW=~3QxDv&W)
zq&z)_=vHEf=t6lzCN1(~-x+D*+gDW(<hX@p<%R1fkIHHx;*(=6BjHBOM7A0JI&hHY
zy@#ck$o%e-R@qDfZZ7rc^<;cHWZ#wR$fPIkOiHaYMBu<d?Cvcxi^#`R)dJfAcp*z2
z^iV?rKjR8o_(6_0)qq^7!%3WwHWW1ps7&{@$Y6$qH_{os^I88(GiD6gw&h0XFS1St
z=dm6-o6mL{Z<FtyWMca|n-8%LeBue%CV&la`g=+`rfkL)%${~W>2s`3F+&Gk8ZW{i
z@D_Hv9xbv>w)=zKo1CM2peoa5wY=*P*KNtzX@)~Vgi;Di7wObf4m3`9DqMHEdrrTC
zn$QVPU8Pn(wuG*bHVU1qp^R#u^jVz=(>}cguo5U6C$G<G!PY*WE@F<f=EU22V(ORl
zwxDf1?Wk~pr+a_7^u?Gi^uOXd?3M6OPuA=HEf~V%-L<qWQNweuQuK5ksSVgJEpu)*
ziOt}bi>vwZH9|&tp)%x(0zCYv46bIBrCXgBI0YIS%im;Yqx$Ub(~(kPE<fI-N~=F5
zb|(2Q+K_ggnjr=k*%ys5`!AoOefR_U>-?_fnLdAx-aqgla3oEXES{S&a$8aV`t4S6
z?<R3<&@EqB!k#XrT8OjBG=FSD$mz?{yFJ^o6fp%PR6#0CeL=d06pU40Apq%>yI{8W
zo?lv_vutED1VVMve0sV*Uq!hUw$6QEg-+J}0)Kn;%klaUX8L<?Oc$emkV?wJ_wF~G
z*59YNpi-s3O|U}34^z{Bsq8n7Y}P{|_y^mn%5c?KZ~svp(hn>lP{Km~?6)J%3@xl!
zGNG!xlaQa1%IsqS4ts?lNmUZivxUtVZ>AEy2WW+b9Qu_5&*frCq|@$M;K){6Zkyw$
zo|*+BgcCbYKv(qV9s~^&GiYZZoJKp6w#Dm}y%bPYX-bfro2#cE^Spk%Kbl0TTrAJN
zAWqoAuP5Cz#w4p&qfYR8Mps_Z){{Fy32sA}Z`xr`I*XJXPtUaIuW*={yY8zo6rd;Z
zG@$JrVH1z#waBCv|JATNWb>%B`tp%-Z<7uE`6eZA_Dp-`@@qDA&~~OS@!M(#G_)G2
zv<c+(4{MXA^C{?Hu@5Dw?<%z&%IuDPWKU5xzFD4wxjk2rGtB4b=P20d-@J$ma5q#s
zld_w!w;7)AHD(?px4A1h?KZnzaE^nOR<-lDeUPNTFMsGT&oQ~gFzMA_z7YXXRJUWc
z2o#|)V<z+2Z3%rev1{idZ-D&IlFFbu1Q&iYp9T)qPh`l-oPA-rIdn5O+(yWQs9#SW
zf~DllH5qbo(6Ki{PAFD}2#b~kQtCne?)R9Fztjtw0?8sn&`zZ*pXw&!j6;`^=JiPq
zmocSA*;?P8KwrMoxuR(Dih6CU)oPx#-=W81#Nhd=bJ#pZ6VvlAV&-puO_H>Lo_(*`
z{?g{)rNouK&K^%bTF?b|nQ{dsggeC8aPVigg_%@@tp@buv5YN4F8m4GX;4iZFY@~y
z|G@(TjSQev-(k&51+w9!p?`SDX1YPy`}k}E8_B2r{hcGihA1kmgIB)R6~yet5Xo>V
zFTz>Ojad5WXm+^Rx=XfcV;rzyAYJ$7>SRU=+lU<u)+Lq;z>qqA)0p6_!1oDrwavZZ
z#ZDYNHdM;z+pp*;X&{|43VX2sIC9y>=5*zjCEP5v+-Lq?_3Kzebaj3_>dQIRhI`F^
z8t)-RTBGV3Z$UkvKFm#T)bkKM2@g1XHJ|>yFn%K6Aj8GsF%>?kuZwaWX3dPq#3@u*
z<!qUss3jXuB8&r-UGGLk-d?^z5;fRVokB`<Nf|1)>Y=0@4hMH+Rg65ej`rLk4U2o1
z#y;gK(>T)Q^S#k&H_;hiv<5z$V^k)AO7rX4ubS=0WAE&Y#zcmFbHCu(14_as<-ivq
z@IXVB4+X?;x~&FR&XUqUg)6nyRl}hP2fK?qgRh#e0y$%hdflc~I?mV)1Z~EN^BK<k
z4)@%WbH$NjDBEV4-h}5zeg-lgddJPizub&CTPg$HALH@ufggOD{R88Xs62mKDQr|5
z=t`~5>$Fz4shrM<>4S0}8*ancRFUAvHk$<kCi;4T-7MoFV!vi)(pKnX5In>G-cI(_
z&gSwjF53{t6)SRc^YrztGd}`EX-P;x-EI$5>Jf3bGh`DaLF@VM{GIiduAcM-Z8w9U
z;ja4^Klg>&Dx+F&yfdt9uN-g6sv>=diDJEyhK>+R;TfW5FvN$l^h=_s7wk}5;e4Gx
z2c$v-<Uh89iRAl29wkXDH3qNhYt23j1?XuoqP;Aas)=z4IVR3XnEwT@D`xuv20T3>
zJbl<=w>S98fwj(*k|}W|zm|dgr82FaK%^>_9Tp+1OQeJZ)0EkrqBpP>%;GRx?eX|X
zwTv-cTddcoscFtG&`5}5DFsSuSL34wrA(Kz_vR0%Yq86aY?Dgzgv87E$Egi__V>|i
z+e`=S4=E4QZN%HBcqmDyX`QQ-TUi>4cnu2dT(+R{5S=J1^^lA6RH?p)G#}Ba<d+%S
z-pfR3l|s<L6*I<kEwS$%9>G@|MKR9%<atecbM+W%UGC&Jdo}$WLr5?nyVUh=-%vMD
zzFBKY>EqE8$>Tgg4JfLdIUZ1IihGQR!D9lTJf5H+C3?tjvZg^WiCXpjJ`vP(+Mv5i
ztQ6Wr=V`5e9UYk_r#+eNyIgL-tg^kzCa9|?6JLgJ>Q_<+a0E(98W~>_5`U&OxoY3N
zqaR8ngN38henoG`1kGoV&EoUtXV0*EdV&`g)ec@-E~k3TNh$f|)+K^e94IgyF~91x
zzZ#&2;>BxCykm`ge%U4E9&veG0EW83%T4)*VcPMu@p2(@l>j^br_MKz4~t;$>@%_q
zH%}L$RNl)|zpqbh@{dny@kGtU18$1kQqH2#nA6+8XJ>YwZwq~}&2F;vT{YATXGefL
z^C!*6g3VUpT;T-Uzit&z4KDwD{~C(r=I$k$!bPqX1N-mQtZNQi%EceA5Mn}mT}m}f
zO1yd#0<+nDZE^AmFVONB3z+6Q{YVtq=d?nWV^4J&T(9-DiaWAKng)`&vYg7HmD#cg
zax|lVT90^yVG+JLTU;*}k~QDi5W3Q*WSXk&9UDZ`#C(s={wnQ2<Ts${OibiMXwc4D
zrMF%Kq)`G48LIqyk9HS)73m!}C#|GJyv~)CX*mIXxf0&jMX>;zbqoQ&jzR1!$@zev
z%E*U-dNp=SVZWDVM-nkN(;TZqBEIU?1{EblzMAj<{_e6~T|yKU`Y*#?bJS>=vCn=R
z`{sf+tJfcbGIIzKtD6XRAi*lXH_fG%cp8#~n~mISaLHV_9+Qz_+_z~1s;3HB)p;ZQ
z-P<L?JRQLCF_p!1q^Yo~V!4z;=4!-(^(xk?UmNX`YbT{j*S~fQ7q~zEtIM4%F_5><
zGA*~|to{b8#>Wo=M(*-pa_U&8rGK47{`Lr2x!_f@FB7o2RrTCa{UAJXBF#4n!s1iI
zA^GNj65iw@oYMrb9nZx%*`MB;DUkZ{yWcyB1bno?HrqN)0vp5Qyq0sLyxZgHG+My!
z*akzRerhANDwZo>59R&-*2paVGU$AX@F<)(YB#_r6fh}j(y$Uge&F^@^71OAhLMes
zj8=yEJ+7;vw_(Iv9Wj{>Zav_0y<V<7b--Dkkmw8D+-Uxg`}5AhlyPG7_4tX(6kS>K
z`kL@4;bKLB^UC-0`ZiGZlflqsk>1&n+Hf8;oV!)LrNxL-OsebX6-+s7SU5P2_1mqE
zG6XfDKBnhMM&1g`=Gl?!9+HuaP24okZe*NT3CM_9wQWW@r7tJrX!(a(nO%O8MC>AI
zJ2;+;;qBp<l^fr8RoC;+m{-hrwFbG55Ybo+dQQgGSgj&1S~{JQ+1;sE7}N^dIHDG>
zZKg^+)md$nxePI;0=9dT7UkRRG+B}Br2H1+721$82U{3EgFah{XB@5_jLr|}@;&Z;
z_SaQ;UccA1S6^Iw^)hqQn?UA`V(Kg2=V4I;L!kG5CL>I+3H?xkBR(-9@_a4q)H(lZ
z?_$@j+V$Ox^SCo+U7A^sJ1RB8XE8FwtFVV+VVsd_i+qK1;m_3DWGDZx!Sh05*cF6o
zkq(e5A54a(VOJr#RCewjmiN<Y_fgzqgBeb=)~5cS6cnt0G8R=36}va>b`);(sf%?9
zT_w=~{8v!(&4Sj&uNTF$4<AwUU}Zl|KS(Nqdo|SFm9}@KnYOR{iA1(`ms$KR7R`Zr
zTQfb9;HNsf{5(z&=D~^*H_rR?)BWk90$C57Y`e`*fjIbN;vF3D&1M3rwmw&5TQLj_
z3};$}UM$`04`e=z97*<aIRasr%XV^&q-niSBi*Lo)|>^4@tIG0C}z4sqW&VOq|RvQ
z;a+PcI#oj}yNfNFKpNK4^3cN1M>|)d1m)xmd6G%7!BLwf0i;8U|K$knzYx^@_cnox
z{C+HnoC0f%O~S{h7joDAi+BbMEJz#){=cJuxqsNIeMaR^t*%d7U|9}DT|GQ7x(HeE
z@c&y_c;060h69g1l3vXC)zW9DcE);SZr~R%=-|2}{Nf@;PA^5Q*YDzV%}9B{r2l+o
zoOsl9P<+RGCJNmexfvF2@{pInSm`)F$XR1S%>HH`)HV3nUAk3HAZdgJ41PvyVS00k
zvNP+8L~=EKDCY1QxR!rvCv6NBu+-p;wu55Mb(s9oFvbQ8`e_W=7DXzryEIEuW$iEb
z9r?wxmT6ge+9w0;!v%vm?16BAt`HLTb7<?LBk74|rJ79^4%o`+zFaK#a6hnXG?)VV
zQ%;<B{rStWou}3IWxAYY3C@BG9Z5dXE+;-3Ftm3lKLMk2*d#c``G;8WTaeXdDfI8b
ze-35)oLcs8>TNZIR3}1T2-*ATKBns&ji2A9N=LtGAzVv4EsL4l1r8ZWZC6`Y(K^Y6
z0ORGG+X@fUDHq3F=J5u!64@%Z7ibTPyZ-L`k7g`wc&WF`ofLO?*(`z3;;ar7_Z2AB
zUPtZnww-wS1&dpTr$gJXmEa6<2hnfNXyoAh-TRl7yKB^VOYWuBUWzn(B{Rr`o5I;K
zMgi#zfM5_I$edmVwR<6S$U*+DT@J{=@%lc@d$C-jv!*MkBG$OhJT4M&naxj?Sy^N!
zFw<H{Kp_G$g=mo(g@R;qlm4w8a-Gudb=%hZBY|w^RJMh_O0AO##b3E#9}T2c{~Fz4
zU<YE#ZqHs@y_bQl>LDycj5{El$Yp;m2R*Ue&*hVOMt-q(S?d^`QBNEt81Y=rhlI5M
zpSWf&_<%~7ef!Pu0?fPcWL4sIz@XAbI}j)hR@P}Mz6w1^dEmjnMjOJ|{w<a6yfd7n
z=JoKcoUY*(3IB?zHA2TXhoFp1!+Hs6NTFT-k##ZSZl&R@_ZP80ip$o=2FLcHDi{He
z<3I}>#WZaX*(07RgJ}4Y9?nJy%|qt=#np__GquB*wRw;cfW)w9BOun8I>3yz+0z^8
zf%i{znos9IyXp=U4#FAcx$qSYf+d{srNkHD4oEfcYZ0Dg+vPDfbE`<>m`C97*~Dks
z;qKkc9-OLhJv>CbR>WaCvOXvDN7CZRGYi&fb{F($xHS)cD$6xt8}?_}?Cir)0a?=d
zvoR?Nj~yHNU@QHTN}H6N`|D4vm}G5XfB{n4PSL#ms+%;=?@3re`UH@qr#>1cHDS9R
z5`-aoii;Yb#*VQ5|IIR{I1d#h%&0JO0z0TNPx72l>DR;bzf1`8-oPk85<omUL9m*%
zZ^WoaDFll}UF?Vv0s8OuCE2lit=$cc&3sP26;~bg-vj}TV6kYz9LM9Cum$3>i_Na+
z$4wvIhkIaUWE@<u!9Qk%x`C1J^yzR5I<}@(R&;!s0zNv!=KC7t1dMFG=AfID+TrCA
zw|eCTkfn+;fubt~G^k)kzhyX<PerI$j(-hT9AwG9CPjTCS_U0lHe-LAA7y5*XhV8y
zLJWKY%egCG+k*<<6k;-}U?3+v32%SCF{&vX2@8t~3T$x;+wENC6629?p|lhm8=kuH
zbD<rG8D2+GYT|l9)cl&zf@muUsM5w(CVoGcGK*vDTZ2svKsw_?p@-kZ_$VViTa{G;
z%hp*&VZ>@1WD1@&wJF{K6}d&0Pw3&?v~UHUj(zEp_yM1gAi5p`o*1nL+NGVhwdXBI
zealTx&!shylwEzv!Uni>8p?6=mU#4h^cFaCLypr<7@3E+7k~Ic+V3ZNcK02^q0H|`
zdWcuE7-=Pqh0I=CKTB}{GPSB0bRudi$$j<q(nZzz{JIZY^!6<JJ(hfa;7r4xn4fUT
z0l|g^D!sbiRwm$Wd@n~M1E}|j;4T%4W;{PvIrHhievnnqZBlsG{ur+ZvsS8Wnh~YF
zc)a-}O#W5--jTme#18+X@7m)^&GnEfp}Gnx{PB1FDHLIp5fX&JU6JRSEDoUPkmBjM
zExdTKxnf3Tn9VhDtY5437JtCU4SX_`eu=*Wrx(r>yk<A2T@pZ>#CZJcxnk6#<zgmz
zW7b6~+RD%-RzosMQlV6u8yG@I{znP5Cm)QQLaV}3#gjySjb$1uRspd1WBw=X*nY<=
zzLB2<l(ekTv}4%aQx+s;Pc{=Z4q|dOi<CT2SnuuH4DyT=Js#PmuSS1IE}*k8jd9I<
zmZiUve?2~M!v>2F?NBs~F9T~lN1Tgvz~RRA?$pfR&K@7NUCaeJh&ltsD$2Q1dX?Ho
zd_aVThwZOB!u=CR5&coM62kk$fTW|<6<Lzz?I%)<>sD2=z0ySG$?z5kOkvo~1UdQY
zg>|<Vcy?MM-wHmGNE<ehVQF`BrhXi|%CY#6b6M`i`QGx@1V4%BZj=o3!GTP0kHXG|
z8nlSt?bCFjC+M*?Xd|%WpGtI6h<Cmn-PH9N)R46o9G6B>3T<?GLC~`fFp@9Rky`V-
z!!{Z*@3PnAZ7#I#^kPX5&{NnQ<>(eu{q>%_4Afuplx3LI*md2)Q3j%Nwa;Yi!y=_U
zO{iz@>aI9+o<@<<U(t6g_&VRA%*KX&cQMKV51Rbb7%ag^{o9k#dXpKHo{Hn3s`vbZ
zndeUNl^Q|E7>uJ98G$OdH8WSOCGXes)L0>1IOSyEqnT~f#p)6sUQ6lQ8QDK5wo)19
zHVgydS@6@a&WExKXi(_!jnS!6ZXRbZ6J|HOOSRUV2xE}svj;fMrp%r|i@h*=Ha<e`
zJo7uN;v+;q-XOMEd}#P#9FsD!=yb8Vpv5ShWVXP51eH+|YLI;k5^X!a4p#RsC2vly
z^?Y6uJLiQ#8WKi8EpJdxdT!lTMe`~cGPet_EzfQ{Q3Z-Z^78U;V=YJKMg<`NiARUD
z;9R5+sLfqsMRikCN_KbJ!H?f{DLU5Tmeu`Cfj~`2oJ~UFZyHQSUd4sm>%%#ULvw~7
zn<64u(2SG$_E%{{=moy$=TjE7E)KOGT&f`;PC0ZcXoK&#Z%%7%<#G>=TUgbEfhJ5i
zXz)LBFufU&IJ8PtS!eI^f+fwgMkJ03)9PS?U439VjVU2ajS`}B;{_*LyUOv(o;xHQ
zwuy&_T0(^{P*EqxONuE=Jr8_7e1gh(C_7Ewe<x6tbzzksN0?mEST)nV8mX){<tM0o
zZ{4{{iGy9FzKzw1Y4;jNe@iUlT96^iw;jJTX@)n{!C74}uX+z_OKmY(?viharlE#0
zI@X>qS1idtjnrxs@Y{@#t2gRv!!3UaB6app)5WdDcNcupeN*bG>NInXE^BcnS;iP9
zJ**rUof?2$puUCo5XZcR`dt-uVSaG4Ke`+KRlhGmxxhokK~L|iR$`XWSQk=&sw?RO
zmc>-5(!#e^lIL|VbV~ubV``(Y9#C?9-BFC4N@L9Wy68<?GwpfWo3weR>D$(<B~qJ>
zbdNSTq0ophzk)#CHox;6B1N2A^d2ufgU3!o`Agog-%1;b$G26g-KFEwY9oqi<t)cY
z?_KEBSs%jj{9HcuFD7cKd2l{0AP=LD$C-y1egnxVj@`eHPY<0SNBJ8;9s;4oR(y5`
z$HhVv)pL)7oo2JtL4eEGuEFI&>9u;j{8}#{t)!kH))zGBH3wWlp`WJz2d4(s=ejZ9
z573Zo$_mO@S4)T4D5vS%=Rqfezfb6}j!y^LtIBN2jh36c;66KX-%@0-!KVRU(TD?x
ze(N&69~<3$<P3=8<`s?=_7&0)W19Wia(FP0$EVfK^YFaUAeD43LFf1aoNZA7pVl_u
zl^Om9>qWaG%r0-3WDgoR#dt1nhiFGSU1@hpCD}{Ie!OeGitET*`ypPBx!2dPRRJzb
zB(~0}NInh0*6%}vgV|y@b@=WL_CKxxl$r~(h-tlSojVvKu~!VgJnaZR$lesvk4Kcd
zLwfA>w&ma4IdC=K->=Afn#m@jdKCg7=*2)yykFp+^jY8L1UYZNGEZ~X)oL>he?l&B
zEzZa^HD7uZ-_$&~(8u6uGo32v&;nk6_KkiMDGsC3_P0zb`IW-7fuK@8q<7H^iOpt3
zwos7?k~K^^Rs4-{QTGqYgwWI&>d|yPthYA+M~L98VDx^#J7#w<12wU(a+Ai)&2CoZ
zcoioK4KyF#uxo*cgCx);Kcor;!%Gu(q(V2aZ_I<T5*lPtNKLh2<TB;$oIW=%PrveN
zende*p*o~PV_EBIEFMrSn={n4WmMwcY6i>S5(FSSEocn+c@XU)qE>^sRA1bA$9o}W
zcQ~0fIhcy4QwVCj4{5W0#ZIgye7kTtmGJy}8A~WY*n~_ZB~&>)=tM_WcvvV==#NgT
zK|L2+d2LMwu0d1sQ!>r-d)m{)pUb8F4MObwnGSmx{L_LCr{RA}w-`~fHU5h}2hFW6
zLP1HpHTo{OhlPZcc27B3ia%E!X#E5>zab>i0Fa3-kap_23>csISOP5yY2y&Fbt=;N
z6k9d8=+%VB3kl;19XzT-{&QzKq+4P6b>_LItwt`)8Or=(aCobXa=DZx6$7=4A-p%!
zkQjR@&jGQynal$C5pnX;*dy~rh$fN-ITwU;zrkJJeCuh4SH!qgU4a6F*T+ewS_Kp{
z*DGkvJ)1e)O*oLbKWZLJEXX2Y2d$gnb9UFgcO6~3f>-+B7s=lecp|Rm5Qf}5N5BeN
z$AV|4=H*{y<3{X1=0_w$1lLq%@=z0kja^~dHPp!ss)b2W%OF*KPdf&`-@$PrEPokw
zw{q&6R#+?uXW#7?4&P)yjEKzIy*A%JJG2x4_Z0+L;(I5Bx3eGUm1$Fu;Y1A~TF@Eh
z>G_1Dt<o0mUI4*nF=S(}O0TpXr=s)sa2|9R4dZl^f4z;+@@lYmM)emN#c!K4*LLgG
zSLmh9gTrhpu?IvmAJ<LFZO^Tsv>pN>ASBC4Ezhe*S)`aV8XxhA->nR!vG|tSxCzu9
zRlsAhy?;w#%ys&G8D>@n-dC8V{nrUN!5s=l&J=nX7BGg3>zk-Nu<`b4#<O3an^OLt
zE&}*TWsep3=2A;65p@;mJdg|HD|p3O#^!(LqJpolFP*#XR}b#cvVr&sHHqjz_cN$<
z3#v#|N-n(_SJC$Psz2$Ey?#4_#4k3C(^l6Y8_x}d3TTk0J0=pQGt^lWjdp?-u@nf)
z@-WKj9Efd71CmuHk?(5a7fu=2Unj#(Pi0BO7bFtF`gkbK4)}@X8X+ySSiQ?;jja4<
zW2op&X=R!l3*)m}{KjlSJW&JGhqbMC|4W|4lD$aVZmMP;8@c-v2%s5yY)CJM>ziyN
zqho?zPZ$P$r4t}Eb@bxp<AWODW=M$jk@XOpm^lrtUf59T814$*Zj|ykPwFZn)k-Jd
zeYrm^Rw`mYJ3rqhzy=roB!(Czl{pzL<F&>3;APcxNhYDsGtS_)70@Eg*g$~7Ez8+B
z2}%w&sa3R!?U@wtLRxKPE71SU*wp)BvUMbc@RBf$vw1Z%tPw=zgbk?V6|+LBc?Ga=
za&mrUWR!PZ4IKogpck0+hm%K-IrN0K9sJcVab$P%%^0*>1CRoUuWJNvG5-*?Y12Gl
zFwgqd%v3i=P+SH;s_}_K=la%IjQbPE`EOw@k<zFXo#j76ee!ySG?7_kXITO8c^h$b
zb(f;O2_Px8>nxKBew8dZ(3g_A(nExI3v|;=J9c2b=D3_)RsM>E8LUVC7?^!%f<J>^
zLg!)6d2*rd0?On`ZVk0(iA|M3DKZU<r!6Hu!A(jS=OM+q0Ubq^&T1+^^3a>b$jSYb
zuRK>OL<eC`)0OTVBEihXTPM&Cz@Or~Tv!I9`0+rgMrs7aS_qcz!DtfT*!-u2vN9_z
zZN#B7mP7*GZ>W$KxGE|AC&xad5Kd;WU?JJQN6QO1%oqu%2v;`&9@5Fycy3ZT$w#T-
z>!O0-TCUSN&mf>r{TtN*1t%b`3%xnX6<=(n<2SaGsa!w3`rd99?m)?J=sVlo!6G%y
zx|aDFO`r*flJ)0qGH`SPW|xe96@HmZqY$4Qxb!pUu<`B1;iZr6MER0bz=4A7VRAS&
zr+*rIZJSm0KJGC$$^Y~q<5h1uO_I^F(=w?Z-23Srp1=d9<SBCuqr;7!iiF#1z2$oR
zY+7_}i*uR{AV5s#4EBHhf!u6Sa`!fS3y?qVD>PN&HN>;uCl^qABMhUm@G3ae8R`-&
zyD^$>SJ;PN+}>|m{arOoDgF*ezRa<wXAe^6G2_{64clEpf$K<B(pyuTS7V`t){(f~
zX2zb0pHbwIDbNMsjX0Yv6B@x*??xw<tlxRzW_-e!Z6>i^JyHs~@%{l8>wO!aSRqAW
zc=(NC{Wiafim%bZGso@O|H4gZRhF~SAx{THY3s!LF0)bn_1g<Ke~mRHg$Yvqbyn6x
zM2&I8F(SpIN9!R`;-7E?elg_VJ=s|MW~iI%dNW=^h~!v|UXbQCYAaOE{>j>i0SCiy
z#gZ#OU2#Abd#^9IJ(B08JPQT=A!Qh?E-y`4swE3g;#0W4DYn3^sQS;j*on$9`fRfi
z_VMnv<x0M#GJieM&6XEhSK>I-aa}*^idayaG3pd(jBo-E$Ur}6D5#BnSt|yppnjC?
z)(HkDM%p^ww!%+Vy1gR1cz+^IQ^U<wX_GE@5<|toCFh>kulEnCcow;LY>Z+nGGtvP
z;y2Zqaa@@5`40#@*wWTHA-8oz8d_IY`15ZL)~Ee$hRf@(q5`8GA%8U!^nDS!EB0MD
zlCgUqF-tl)F5WzpRLL<8&8$)~d1EWSNfON9HgcSdyTG$&Z23j3i2Co^#DBjS`ro9%
z{{Q>8{(+*|j$|Prl!x0WI4CH?elS5HAwPS-g@lD`_56rQNYoj7kWpf2ll4ME#4)8A
zg@wPtkz<pP#6qCK|Mypqw@P0dq|t_u{C?gcIDOC3gRIc_eBY$mjO(s=Z2tT2g)>Qj
z@82Qq{T))!{ej6=5#+Jlq9Un}wzw*AI6rGUwV066<T?Y?ORSsQeejL%j)HzC;VR>|
zBL=ToP-F?-qRa3s83O)D#J9RZG;Jmfh#}CNhz4t`!L&hOO}SsGH8)WCPmu~9ahVW&
zsF9T)I(mbjC;MZdk?$OaAldGp;j&t6!pW{i>3do^NH?8_F_a<=aA*p!Z(%Z8|LwsF
zZJNHsI)mOi3;J_KQuaqEmn$aENzoSA(&xQR^J5>NS$`TV=@EWm{6S!IK<!%22AI26
zNaLXG2;#OPI|S)s0e`lH;gaWj1Tt+(8x`RO{;H3be>~#y-mk|n8PC*%95Y;}S&!sg
z(<dZrN@Pfw(K|)PGrf~X{`B4e`R9{<eIK8%39FF+D#z?qrIp#CuGts1*?ody`fijv
zVX$0*Z;OFXsb@1TO9#~e2#L7+e`d|wrMSLGm9bX_IZLq1t*KbwFXD@I<&&xeB71%a
z-(VC}6ULnL3T(?=njhl+9jBV={WF9&{ojS)8U$v!#d=l6UWLEE9Cz-D==bF=FS%R;
z+8=We8Y?qHa*dD2!2;fG@n3!Ff2BzsN-t{PvSN?Flncek-Xb$sITa=uN&_+ZC)>O2
z5vw&aHS5CBMfTa)8bWR|&+PSmCVMx?KRJ0T>?rLdwEFQGVds%7aGaG=z>x<uomyho
z;DayXz`wCiPF#2wt)1n=&6D`0vNhi;mw9N~Vd!eykvK+sG&;G7L66S{ZGvIrTjaxo
zcJ$-~QJ-2`CUhMwbh}5WIT&C^>2bu=nh#nDG|HgNkYR~-)8DNEmZG>G^8BUJe?2z?
zT89z7Cr-AzF`L?<YBL15mtq7v!XtXuyFL=Eo^j`Z*9+8_cycIICpQ8@hk{NAWggxU
zh?=bji-byhC%CT}*2g7%DET0D;QN?QcV@2Au=V;=rHYvxY5cUP4ZjanVmI7lHHVQ(
zEp3~hQ50zUs0>h?@HXS_g#NMN*bW^81x%cY^rGA885rI<9YLu0A{FeRx+G}aQVCYK
zY=t%9ZjFJS-x%LNVQ0{v1(y_I1NZg_(+k<tj(7IrAvQJ-{^P*Mv>YFbQX@61&1XLg
zu^YUpHnF#H9rbW)OARxaY(_N0st>(2=QRYV!A$E(DM6zpGbVr`LPoVI%qwxnJnQMu
zW0Fh_P(^x_HH~HV1Gs3Ei;Ogya5DhvXPO=6SLrx{|7l9#OMyobIW393HZqh$`hT(Z
z?Ow>a(AH0PQS9Ue@)YG7?8B?5XYBY^zPgWc>|7PZ8277aps>;9qRlUUZTHg`v{|}G
zM;%MGl5@;L)!9PI%q_)z3SywRvP)8~UD(|Kyc1Rd8|1RJ#(Ij21G5H)pVx-`wJvJn
zD10D^BwGQafBx2kr2}8+@1OfCS#c&^u%=!UuynkYBBW(g*W1`Wk1mRy9<Vv?q9%zH
zw*N||z!s2sMSH)M=q$r&-OHWRT|UWKm~pu5#ym1u#~9Gwtzz|9^g+<_$qL*TSm;aG
zGODhZZg;(wtW%AX%XW)H%5l4<s9f_7rNN5Y+TTYw;ynL0HtNz>AUCufJPNoLP3|;J
z2r344dWZvs(_tIjc*)uS-jBi7TcJ(bhE(XUk)FkP|L$T~QFGFlq7&hx<zs-!R=vS~
z-3;c_-`$}2CYJp3JwctluSa5h%BF%RHB4Y#A!R$nLt87A9zL<MFzxdFrNOs&x7Tz1
z%IWBk(#RVW;N9%|Bk?lep50H<l`bP-ulC}|uKaA4z}n-r+e2@5sfEpSRv!#EdNXpT
zx2T%Hq%p6g0Z+EmG`un}^3lK;2qR_*s(Yb>)V<lk7P#RmbSW+4P#P*NnMB1dL++A0
zfO7+2=kPuD&M8r>-II&;<7PkZFqVPzVx-aeFm6F2unKiywLDj1jVt~?cqISP5v-@5
z-2cHLji}lsA)o<mokAm1pp?n5?Z3W<SpVmlinz}YoWQ`q9M|ieumwT0!D<BS&CbY^
z`7)aS{e*>@8Wt0Cq=T-*^BFY|7DLhUJ@@tTau;~b6Ht%19QZFCQjm|VPVMFxSs-@F
z!k-<PW2ampD0Y(ODW4AULg}4wrkwz{k#$Pc>=e4L$UuVZGD$pP73P9LgI7>3>A#p^
zGn37Ab3>dZLL4F{#~Kj<cDt7PfawRrhadJhTtHAzuFUI0cS6IJ>oy`19*w@A>C(ey
zaGgmbwYCz8KXVUwo^RbOJ}}VSR-O75z;*<lHSeKN^$9P1q#}k(w8uqM<nsn4^C~mQ
znR9kdgvHd?2X%m<slV>0AP)m_$-99R3F{qfQ59y`kp%UDnEXqGf=J3jtr)Qz^^xG}
zkudbro7-<W6H1}Ik|0se=}W%bys65WhnAb-ms1Wpp|^K$R$GTu7@K>^WA&qxoMQF?
ztXBVpkkRDoCg}Ebb4fXszUOe0Sq#!QCn;52W>L%61iQ&<&K^#cj=fnEVadm;te+;~
z4JLjucehhrP~>0h3n(B}wkXIHGhxV+`(YO^q6oa#?SMCmcxTT1n!@1@o?iR-o!iOJ
z;DaV%0s^sZT)KjGe~W`(;3CLGz{Vw7#TKUQvEpW)L{>j-(m&Y{AfBfMGhX_bRNw1s
zT5is3x?(~UU4O{iAjd_(UWB)AMTv7V@Pd2>+S%Y!yIvB4JX&q)J3M+kH6~!KP((PW
zpzyZ&=wcU}g~$kTDfDYQ;x;Qhv7(nd%7n}q-N2BSw0w9&tMT9!`8+|<LCW?mWjbGJ
zD}dKw+6>Wz8(?oegtBIgNC#Hj4(*Q2lBivFiBRJ$pAm6#aq4AuY4FIv@H8iJfpRQ@
zQ4)LlDcINsJ;9L69L(3`uPAa>m4tk$`tc*t9ZW77;_2goRN2ZPc-?_u&-3}`Pijoh
zV9Dp)>!I1VCze3%BGqZzc$D0$o-*EMg3N~^#7~sQB_|$9+{VU6!K-KFts81uPbN$$
zZgqWc6ALX*iiowJV4Dkdw(cbv@Kqs{yV8t3#rd;_xxjNdr-Jdp^O;DO^ji2Rn1er>
zIS>n07BQw&A9z}UpW1<P#MdkE85C~=QICXFe2J!0nor6zJUz+>z3ucheTgvq9sM-=
zJWwEZkN2wT`ZIp>R9)yzB_-{m%tjKIPO&RyWiA}f`j)mBt}P1zhFx~eIPFAc>)PvX
zVvOl=nCw|mWHtVTMUy5CC&J4k;J}SCv9*ix>t2xf4+DJkc`DLSFYA#yZw0#0PmN?f
zbjN{0-cqVHzCW`?ath^YOd_Y*tSwD~z&vB@_#e-v#4Y_LOBTeXEG_bgec*Z7y@|$@
ziGXGDsXZn@JEG(+``1{gys-a-@}l=|#7Fw8T!;1sO??!OIQ@b%SA`Xur^+IKU6-7j
zZ%}h@g5i!=5KL&|U1@mx?z@7Tf=G^?3+{aRIQzlKTo41XDONY*hH||{qDtuL|N9*d
zL5qiEwijF|Ua%DDB~O#?x`~=D=b<5i8-W?nswgZfod}NRYz*_T<{ovCr<xaw?0Xv2
zw(agp_yle#Y3E)8*ee#yEYA2*&kwxq9_>LZ(uZzNU1_qK;G@Ulji!OFsNWd}hgtHj
zcxJu(=rwy?dr8eIUW<QCPbeqrwxyWYa-=|hO%-#fTi)YqV8~eS?M;h!5^&sk0*-f<
zam5gQh2-jHhSu1LG3fFq>xVjunfay!Da!Q=TaX7-H?uR$nar}<ThJaK>0|d6>*28f
zFX@LU>7W|VTpD9UqU2&}U@jc~13+x*=!I&6#pF9tIS)ZE>7^yfs&hF|Y2wG*j*@f7
z6uLa$c$zmVSMIb9K_fp?loIpSEw>lkr}9>O4i^^>_P^|04HFVsznYf2E$COIc6QV0
z#d}lJJ@@n>Obx{1OUg@UT>E2%7DkW~2DiR&w2(1VuUaz4Qn4vZwDJm;&c2@%6ns|_
zQ76ZGq{wWIXhkB}e~Eqmh<QRMVjvZlC}Q{&pnY|{5_N7JH4OM642c|=<@17r<q4+G
zGbXUS^m*~&xB6(a)hgoTO+7AJOMY_PIc!tSYOg3Jt<!<jx1!$jDyt0+UEwB0iIm&`
z9k}MM?|-ObsoJ)?nT0xe$bV@VaeFn83^H!}X$7$a!{&p+QyH|YvB$#-wJ}{Hho|0x
z8kPAD6L``YOpUSIwbJRdk9x7`6DlxHzbm<aVyc?=Z7m~6ugo1wYmv@xb_pZ-?1^|G
zgBsDP3jA377Ih+Om&yH&GQ!C$m?*%AwLD<Y=n@MxcJ%6Oy;?ZlF4c!|=$3P3(Fe(g
z-z{=PVWlnlq3%4)zlXxkc0xRW<2V%5=0jLoxw$uCT>Ym6<G}d=$?<R&K~2#REGsr>
z7350oFU<7<F(BF^YdCC|3pc3vRFCS=G5UR*hp0QMjXIrETb)iQH0269rBLPoJn0mH
zW^}2M0|DqKFM@TLGrC3KMh5oN^u@SyX~vTNCy5d|F{Q|FU@pWZ@gqACcZlLz-5c@4
z{bc7{8q^++(4=x40ZmE?do055e)O|8+jFh_RP+r0@WRV)jrr8U1#t~fZqAL`jsphv
zDEFGJ+JJ?c$5GZv^WWaCr7<D3<J&3k*Tx|l;`CG*@eA3SCP?Y9%Z69(G0Z><!CGtp
zf=;C<Jk2dEEd14Gqy67JzfQZWZ*L$R(DOIh^u-AHbdHVVJ76ua_tp0$weE`R%AMhC
zB70eK@7n$`D{*a$qw}2194}mnrPA|L_${_>P^^#_l<Ncy2PbsB)AD+l`0r!_cC&SR
zI!}yJyVdc)n7u6Q_AqYqik7}5M6@(T%O3APjh+?chdC^M3B<qKFH`g4yCw|N1lxzO
zHVGDB%m~fNX)<2V7CqgcY(m3qq+8Ys@Mi@+Cs7;Njw&{5R-=i!P?DP@gi3>=MfimU
zUw-*eZA5J0X)rBlK`=mz5b>X0n)?pMvsyD;6xta{yz1S%W#@%_eQK>mv#v(@(~lS!
z*Nsr$o>1b(AoaD?|0;FBu*60O=ag1%RTT~!^@R}yGb!S_0x*CX5ccZ#>wsV2C*^R}
z<+&V8Pq0)E_!#BBz&oUdWSBFU5<(f?q&#jPXTP7rc_^_%40}kc7R-d#s{7fibg<OU
zhoH`C@x+u{Dw^Px%g)i*2V5~Q^k>cTBWe5XR}JZjaCnczYa&WB`W<;xH#K49@*=f^
zIZKI9-B)`WX2Ow_i{*7$!R<wklc=9(aM8C2uYI7`Rk04O_@acERh|R2{t-wySq79S
z>%-C8)=>MFW}TQS^#$=6np*ajuWy(yHm;F%P{~^G#3tm}w<FM~0zJ@C&=UD8_+988
zYKT-*_!Xm=Jy>mryWYF%M_4=_g&rmgygiDS4oC7u-w-djgL+OO;hYq5R=b3%4|~;k
zkv76&GI`*;_(JS`<K0D~T5B;08P>L^nNXcGAPpbhQj$43;po@BqcG<a&I_qH{s1Sl
zfR7Ke?9t%--B|G?((P@HX~hqcutv$G#>H4gVhpFGqemgF@w#KoyPdT$cps5Jw8+F}
zL|DCW#a5O_-Wd5RiF4}9(ZUyFMK{vzOFY^S5!WL=gdPF&Xk51QIEti!bsqgSli=9+
zPLcn?-dhI8)g@b^lEutS7Be$5Gc&Wr7Be%mWHB={Gg}r}7Be$5y;6VO_jb?3>l-l>
z4<_R3Pf7Bra%tB-nLAgmwQ}kzr{L`E4B$^A;@TiEo`(B*EWTCG=Rhn5ju_@!;NgmC
zQ^($%GhN#>?ZK<j?-88&QkI2Vk_4z!6|3(~kmzYHd7~H5Ng2$J1e2726;ss>w<7B_
zX3Mx!zS(JlN#}I}B}T3T?y_6$)Q824dL)Jr8-wdEN^=xVTaJ{F<t}mqyNk!fHG2K-
zAkmv6jRcc+fV3A@tt2z(UyjlLf;81j+&og5(cbp&Wdu9WY06L78QS1&1V@QxDMGIE
zjMk!&C_w@?`nRBYR9}$d(pB%bG_~Sp!I^V`D&yJQauwyu#nYVc@)M&q9qMmiGi)ED
z#)7{i0d71H4wg2U{T#kzqmeQZ&g4GipF1#CwYwR!_Knq17xcIzwOG`NPGaVmMzLeY
zZ?=MXhvZfK%)DgeXx_v?_&NL|SvmMb0%!5ouZ=UxicPQFD!6s{00sHc_)<naWu!kJ
zFD-~rM~<WoZqdPFO7P)&HRs^^m^sZ7Xa+hfQn_9%!alCQ)#M|*&TI!GE>=n(vdmTc
z8B+5A%F26rkiV^RA^XD)jPA5WHo-WI{5}V_L5%NLpEV}fJ@GRUPD1>^RIY+ETwkw-
zZxrJ(5296Gy$3hC(}wW`-MU$wl?v7=m)Ivku?rOrj=I;DB626L#&-<lGD~HD<}ZYx
zPb4%!Gs#Z<mnX1d<=r=5if;_mCy*k)g||Ry#BJkNv{c3ycO^xyBvQC48SZ5Y)}C`f
z?u(unOHK}U1s1s*b1-oPlBRv+e}m4!4k+9@=^)>-C~FE=tDdnSh)ku3`N$_EO|yE)
zpHZWTM9<b9@>xAP%JbflFFPIkU^5zt>`rTv%;4~mHT+S#csGff5V=4>9yCBRzsK89
zD^N&wL_G%6b0oTn^-OEKe&S!H>oyxWY9HjMlAO}LFw4^uRz1fzQZ~<<T#Z`PZva*7
z)Yjh)P?%+x62sF(eI!knRX0<qf>-@XmR&}o;9dTCE!*6gG%nSQAF6pIqr6JjJ+ce6
z^4n5QyY7NXMP{ohP4zHZ5-rI89!h#hFEcR*KT^4T!s}K-YZQ>0K%HuIzKnQy18fPF
z(QY=sis4qpWSzq?byeSXM1=F_Dx+CalcXk<a1P8Fi}hJ_?_O{)Hz_NLnBdNKTP{>{
zYAZPYMzVa|{drE&eaNA;%j!it=7uQ?sMU!}+_Sf3JD!(ZhWa)WG50%gvYw}eUXHrz
zjv~PL%)gJ_0dA(hojVz=K@tyTHB~{)h2+Sn(Gw#FPkA$W8wIg>@QsMY(Z&Y<3uIPI
z5O7-Kz#L$Ut0wTwx5g_x896UzAQGIKabSssH{3l6xpXPdr|#&o!aTpd^VdZZC^AC8
z7zDs`9$bJf*E*~s%-9f>;?u*%Tewl63Zml#|I6@}kTkuQ(hN;<k%Dbp1<FqeadvB%
z%a}7Sp}@SDoh_VNqzCz$;q3;h*4r1zSUaz9V&6Z38?SU_Ddb5?Q)9lNl_3WRd(#by
z4zGsu;4!q35_=GKPU1cluOZ$uk|LsFef>=87vN%baO>UPk*`n5YZycuqWw&<ruEmg
z8AVq&W4c0NJVQlO{0r!M$M8K=E*2_1G~I7Jh_W7BK&t$GJ9Y3HE+35JUW;HCAA6w$
zca_32GO#~?CXkaZ@MS4fL@YX>0P+%P+jEv_$A(TT4G(-z@LHX32y;rs{4@_t-#CQs
zRzrI>gD6Ws4yy=zvU~ZSB<Tq!=QCe1*t&qcy0*wFpI-*)F?8@U=$vo#;@*qZK<6Th
zD1|OCu>tH#FXnx)D`nCf{tLNO94LYQVEV+i0Dk6L8-3qx|H^oJ!;)4-v+VIO8RmVl
zwbXa#0DpH<>t<fRs`r*W>_!mN!B{3yo4AKg6nxGK#pQn7r|YuK-A>qVA8lnRC0#KO
ziX=?*;7wMLsw)qh>~b7mPI(8v(#KL97}|{&lem&&3ih!jZmyF#q?c%AZKkKwg%_%2
ztITF6wv@J5K&X%+)lv$62V5(HrCg?UDi9C}L6n{AmQ1pC687-6c?Tv&yg&}XZder`
zpGtBZ`q&Yh2VPNP6RP#3T~NpYQ5O~|9e$EOZO0w=?4&1@Kc`b{kHUs2{A31;F&4?=
z3S-bh?<)%ns?BoCK}Kdk>lxZVZRI7P*{-d$a@-U-d^))0rD6R(tc?OkN`Qrv;4|?a
z0K55jeTKhw^JqOB6)nX0)NkImhhEU2=G(zs=<t`N=XI;EdaY-oV1^v_Nwn?OaAHvJ
z3US{d0U;mk?PiKvCE{$h+9bU5kr<jHzOzS?D6!B!AbjwAKGObudBeiu#7{DY&2OI+
zR~&-K;DlmLMQ;F)aaJ6T-4jA&D5VU#Sv<D|C$oqN4LbD=2u6)Yq>abHQM9+u2h#-t
zOaP|_n}d7OzsDopw$0Ey<!j@;^{s54w1dNHYcD3~^JHpvsBaIP@Pdx8!X}yJ>f+jh
z^PJoRf{<{e${)(#b_w!du{FyOZ!R&_bY37EY$D9&?unHddKy^iBD^JR>;3IcG&#C{
z!~gv#>ZI)hbR)j9Q?W8NJ!FGbym!2t-#4yld^PmYa;3L(zF7ASIYDecd(U{w^GaW4
z1=&w7`G!(SdYoeqS!VaaUh@IhhdHH}wb`OOkr=%cWRtxJG9}YJpv{H!YI9VlvuBh5
zuvU6dd@D_Mj|@{dFB({aMzhnM0f8?rptd-aX!H(vA-ewSm`qM^U%Z$X4pMi@k<vDO
zLG<KE8Sh?V6$G4R*{i3B>~6z&*9_4*(3EvbL}fM3)5B_>tHmowCe-0Bew2B{Q$7;i
z%O~YRY!jd4A-@OVVU&BUAx}Ek&REOno8%s2ApuzVb;49XHM)@!i8lzHCO}WyP=e*l
zhk8WMu$Q5{U9*RBHG6ATvJg*8Lu`Oek7ck5bOP#1LrS%|gZ$y^z_3?iDS?ix1@Gi9
zXZrG5=go-JHZFiK*3G8x=}o|?D}vZLrKS%Q1_>1QX0okX&)I<&HimlV1z#dJjy!!w
z%Kek?Y6>(pX=<E-;OP*M4tp?(e+|Xf|1>B4P5S;wRptB#XFHhN(`-}bEcUhrM6o(w
z2*9|UHXR3F<T&jm_djoEyiorc$zR}~-{0})?IpGkfniv_4YYtNN+5?43T6|dRrjHC
zz)AmT#?$NdF{q>^A$hMN&=1bJu#byQ5AwTY_jutAMoUIycbSSjF1dF%1*@6_-Euu1
zaE4MA{k-!teH?xToEB6a|Ds3>xv|}N5(tf*E*x`4H$CnJuV7ES9wuWh+`W5M$cwr8
zOr^pr8ceKDTbO>%pu_+UFS@b(s3u8Iff6ot5Qzj`eoSNx0BfM(PM)j@*zKi%3a9Wa
zmD<jIFtWT<|7fE4L4UPpUv$zOyg0Y?k@7H<Hqd8Df-z1BI8aioWNL9hK6rW>e;2Ti
z*fD;8hUGI=?`uJ<8TtkHpV=%hb2j69YOv=luHF5*!Q*vEbsPA9Apf5|$AYg#^0DDb
ze5@w0QHD4a-X1cAx0Aynmh09(fuPZ-3J;$}FM2+@0kRjie>5UN9zEU^P;m|-we|%L
zzo~b!yR(-cRnK^Vj6l60ZSK)HC+@JEaRBV*a9ZJeEe^02Hyc3F-FMF`L{96?fnVqG
zCCA|!@I*e(Lw|uI4&o$={4bmKhU&LfCcus$I=h8W7qI(YiZ~O`YQK|<7UY&}!W&S`
zUMf(W$BbIN1+8`%$fMTF<m{K~x9-6t2Tf%bwL$gdSe->fRxtV*y%ZsT@IImnckQ&N
z%`)9~e}y2!1bIBkk@y`Qy$OAh+{=$7(9{YKl~2&u=|y^0D@EaT-FxYbXnm!RrJb{}
zReuftYUY|ygkQ!q?IIB=`nH9M$lYnzj$*A|uBcMXp2}PyM(owgMc792&gtQCmSnU3
z231n$qX08TN-2B+GZgVfo>gG1)yC$~TTm5r>U;wOd>bW-@E&Z^_n_{)L~D^@61#&8
zeCsTurM5BRLT%LGB_Jxz0Cv;Z@Zi<!#_P1_8FPS_XGU#NGT-MmWP#dT#!?xpL#%r^
z2#Ib~c@q4+qolB>_q1C01hX=Y9inH|H^KuC9LA19cUIrPOvDTBg%GDm5MYb;2nt3R
z>)#Ss63JUF0uAyWv$(K`=cBx0R2T@;4Dq}*d{wn*|HwpilR8u=+Oa1|$LQO??hxYg
z7@7y!oDmsa?d;?%QI=J|(x)l6=*zE#-Q40D#6<yw4RQg~GIc)u+@-%+CCbPlyF^8b
z5jY&Q568oczWK5etI!mV=Q8CS7XjBsUn-?x9MR;mD0E6h&en$cT|W*7CCdL6cgH^)
zDqGv@Fr>AXky;BwkNXw1YTzenc6|+jyEY*Og=A_D_&R*zYd}JBcppvq{2HEidj~mA
zPSsB;=#{T`Tww{#$}T4@I?+CoFmRppDi^yEvi<r(;L%W9i8F;@6^Up#PWloCg{}hi
zTw%NG7`wy&hqT!Lf7S2*?>F=RI4z@g8ZxOG6bWgM=k>pL=s(iT|0{eK)5xXBl$|Mv
z4+>ql2-P|vP8w|n=^3t1B&NC5^8$ge?=`wSc|*OOs%?GE^8pD9tIUb@GyT4(R4U+q
zyMU8YCtkqe8Pq+ZlK6yq{)w)`R4KlGB(FS<rmz>^MMXmY*`BTJAfC%99{k#y2{rut
zlOz%zA<}T)735l1-ilnWNp)nrhUPnzwv{oxL)qPim$?i>{eBcZ0sJ-%u#9yH&J=kQ
zK@l@vsegjxdLV{Qs(^nux|y!<F|_2x4&$r=@`y<bZ3E#SOr-`l+D`8W@JS#b5k%*H
zQQKd>N~$n1_e!%PeX>GkSrlEbWI3j{bxY%Pm^c#45qVPSMNDr*q=nQ^@ZF3Q!7J2j
z1b+_TXk0krcXfJ58+-m*@;m1URg^a0?n)U^<n7Rxfd8yo$!x-eX=U_Lqqe5la+{2O
zU+FjFRNjYX-_!1#_0oF0L)aNb?Vnytu9z+LjAko?!}<CKUp+x%gF*LPzq21pnKn(=
zMLw3k@k-W3sLnLHn5NP4M&Y;;ryDT&0kA3=ts=ol#C1;wHcOROh7!zWFpG&5*$wy?
z+HbE{8!$mZK7j=JvwCtrcS2{4(YACMpV|2v?O$LSYWyk=nx*#M3p{%DaTPQT#9lfd
z_i&!{l;u9DFvYT}kb&c6wKaak*Jaz0+$)@U5;?((9i`|$Tri_te>7qkD=x@AelVoy
zx<!!U>DU|WJ{(@$NU^WGuP{c{;Q7i`c|FYvsXa?S+mh|C>(&ubj9us>|BXDszBjj%
zM#MAB!;wo}k**(X7vOE^`z|?*ZlngbE)1}O^5>NsN={doalS5(i8MGr2{SwEkeaGG
z=IEnxoS&)#?4nT-&2515qzI`Q+Vjpg3tvmOfpC;Db4^FoHzSA|vUK&|i_U1(?PNK$
zM;?vT)c(>KN)+UH|ABF1sZBU_!{*@z{wvs*pDB35GIXx*v^D?=FYSu->HNeP>9Q^8
z<^!A22_(#|HZdAeXRQ8J>(*5uu)qA4Tz;sKZq5FR77^Ry3m8)u>JFfZCc>$8l6O;k
zuDj$^;)z@jtIJE*zibMP&O!-VqnZN^WoFCe7~Zq4(r51a^?GT^oUadZ>}sL~yEJGi
zHKcpE{+^R~=x>Z1)e|O`HA^|`8-dC33)62;%pT4I1q=NdO{SzeoE>-{H@dhlP{UX@
z+_wRjd@--2(wk?A2d%izoZCJ)oQ_YTY3k3I6`?K(*`5&1ev410<|o;Y^kSZdLPsvj
z4*dmnw~}QRd}!Ih+D2Ykn1)on6W!cw>aP9o?=3Utd+Zcyqsa35zz0(VeX&+Lj7MIY
zxR~&_8;|sKIUCVIWrt%|&eNsPMuHU<Zk6j_NzydaJ<f(h01FCxe6Igd%stsL>h$a^
zIgs>5ZjgAv_(O|(C(`w5Pm%u-a%>1URVbG5xgJAHo;$;$TU;oG!M8^$zkbxN1@MnL
zQX)TaI;M?wxwe=io9VG1dUp=>m1Iz67FW9=hQaYdmBE;u>dqtblTUU6k@p|BItUmZ
ze#5xQt^{9`cNFAc05p>hn|C0u7pcBM?vvB)?#48$kF?h<X<_Fm>jpDl?K-D_3ZegM
z#)1Rvh;e5Y37;~j*z*0$_iyAz2DIMgGeJF)j##V_!=`?_>A7M(>5H=+ZUG4vgq9Wy
z?jM%0TAjR+S95%Ux5hI!9rlzur8Df=7JL%icL|R4b>qcx@AN-MKjULey_o>}&oExm
zKXeH849%<gTlTx^c%qT_y74t`S0*=YPhJt;0Dhah^#XmZbx4IdMsDkpq^{;zELqWI
zv1JId*|M^Ue^+Xsxd-*F<`$6l$F|!&39KW0jzG}J*G@Xq$c7!(!(ARk0(}m@iafg6
zRRn~oAhBeKRC?Ta<cu0^d28JhsKuYD#y6O#fSP6xw%mx)<ztp!@PWa~^-S=FD){P&
z2*b@w5i0z-SOrb!@@aQ%B2zMt=Ey@XQ~t3NsoF)BDe%AtHX`GpKS`uu{8rPy!h1Uh
z;qD#x$XD7F`!mijaCCU%lj*iaUOQ_MlW>@O#oPY;+war5`e7Q9t3~KOZ;Jnkw>g^2
zvY^z8gt;g28E*rKW-7huIc&H?u9_x`66n=o2M3vh9qw*9Sk73OuXfS*{7ZWhlT-&h
zR>YUxe?5_C*{)78s6KITj+bFc{=o}|i!?Ld7}CF54`o|y_aKV+L5SDZAT0Cw0c+Bf
z8x0%}g+1SWC3|@fyQ{!B^0`PTl`#!YiwmZt%b#l%JyacorMf|u9JVOz9_znW@k>3x
z4tjHg#?ZH%W}YmJ_v9V~w4?GfMO7gIqVA!NQkZR&M&yu5V{ua27bcQ3(R4Jb%tQB|
zJ>r-8u$!){5Y9ucQ_rk@@QlhMfIlAG`E}j*(BXE3rGzqJ<ja&;6SG2e0(XZ4@L)B=
zO=EV%2W`tt@(7O)1+SXq<*%{7KP@T9G2p@lxxUnj{M8pz`d6r7dQ&rh13P0GJ$Z|3
z*cjlI$tHYN5FR5z{zk1zgbDZYE3V7Zj8P?!YhJA&)VrGdq3Zt-_iV3y=*JAxBnrd#
zk%?zi{7I*kiL+P95gJKRN73OS%U*BC=3<?BEHovUT+Oh+kGF!JkS(n{q0(`d)%lYM
zcuLs6hRXk$for1h=K^gA;4Wt=XI#v5t&gY_^0z1kur(_97XK=x9tiuHul_BIH_66!
zqpRd3JuOMK3n1sL=mq?ROU4JqO<^&TJNpzS3j1beNAe`hpPwo^7_<hZE*KNje{mX8
zlu%JYrJ+e$dzH`T5gbb(MXMT^%wSh65Q7sv0U)D<WMtrG(0Xdmr%TMJv|1{4cai7o
zU6JLg6;xWGzVA;Ed*jK<jqpfFNTV6-HTlk?bpQANVUnmP8{Lrk$duvbF9X=}xW~xn
zIQOi&-3izB17N}fMZqV!{4m%6C^5>JDgTOby*84nx8{6RKc~0FPpX#kN?wr|JBR%$
zZX^8%WFDoAS;HfXbzz=`Q$5beu235^fOUdi#>5~y*n%@{J&U|_B4qMOH-$BL$~R7P
zbxN3>|H?!GSe!OX{OyT;kXnT!4@4}?&u6`TXGCqe;2F_vptr9R!)vV4#(uNz*fzM7
zw0za=S;B0$4()FuVd6=YX7~qISe4ebk@DVc$EA0#8w}o$?(X&rMt((JfcB=6{$WYI
z8`515Yl!uu*Zjj7^jwQ7xhIBUq&_jn#2ZZ00CsI&Bq)^FuG<UX48Xjy)sf|xkfZ`+
z3%0WwZg4g?9&N}?Wc#DOSN*1zI(pBN1-n#btY;ERhZC+S5(ACQEx?Dki@`_$=k!Jk
z?qTXzzGmLzQGw5LKzI({P>uSBn8c-7mmW;KjtpJjU}VUR8><%D3%+_bYVWh`gn~v`
zON*~pc)GL;QN4m`rA`A-unIOnQ5iYM&p*7GUNKXphV-fR_qA#qK93%mcAB<sb*+eT
z4}BN<N$8oDd>oSeCl;}KrrP+XzpV9Don#fgQZOUpsnC4@+bLk`xT|!*7o6<2yr5zR
z;5W7@vc&k<S|69rGPT9?!1J04FZ!`~wE)irB@H^*iH2qz0x*@v@~Xfpu`i;IgWA9`
zq1{4?Uj|l{#J$y{fpf8ZL?=er5N0l+YCodEjv6io$6j2DVU_3#W9kn=fknKzRd2Ps
z4DkHfgV}zC>@}hqnz5i;+O-plMqh~?Lbl_*^%a-Imu~}%9^*-JujS^UU#=(Cf!Tn`
zCrl%B$a`bXJHXvu!*kc&XiLE<V<(g`r8mMb#-f{9fa;?_!2I;`B*yqA3u%3Vz+q$q
zOe<z^2Fm|&@v|A{+2=6JQ}&t^%KU04QZTh@Ga7;*nl#}qw;o{G5s+LBuGlWa)t81A
z`F3ld_C?WfLlEU2mzDcLf@LywfymyfF!Ekf26bEncqpiEpS7tgS<Px4w8Ks;&lq~*
z<GdPNm$qpjD?@WjuJL|Oyi00%Qmr_NQx7u7>H6rG%C4w-q`{WkD%Q!DkKTH^WhCAG
zmx9PkOokyS`9GzKOvDRK$VHo%Ki?_1FCh79Rocl!j+l=?6O+o_P+nb*DGUzEsjXDG
z=`%~7^OXYpI!Rx%+HPT;S5SxMnE@WwkPsTH&4Bja)it)d>Xsgff@s>q&-s=#9_ZFo
zIXrNAe4v_XyQ|7ci=zi5Y{PVil0W()7LQONOlQCQ1>HDCF^H|SD0R{DVaF7qz{`;r
z`SPfS;amkx;>$G}%z&-Uo{TnzgVQ|s_^5mr@gs{{r4oHqwsBxxpHQR0e+^?^4aiJ6
zT;lFPPkCk4&T4hE+QN9(xo?r^yfY))!nl4VhKM#p^Vk|Wf=_GXmhkW)H(~#C@%F`z
zHlojRoDFrSBvnV)6g|)*cq1>U2pR*Qyr)Neok!6^i39nEg`<x94+up|TAF=2wGh59
zLNA-lTb4)N*^A8GgwKmk!vG^Ufk#da*LfdtW59rdsi_=sK}|h0#o=_F07_eS3~%Gm
zH}=8oXXN$gHw>4=kjizt&}hCh<jI?>*5F5uvG346(_nl)y4x@`<A!VZYwaayg&&iQ
zW_jepBxx5Qp7Lw<4GxsL7=``ky<gWFj<a;?Bp;>8<neoLzCqt4QdX!?5S$}eR(TV7
zdj;PPb?j@+Qg`GV%$dRq$yK3(*9^EU+d<x+W>~v(nW;MHNnlG#j)^AK6fp1=p>&Ci
zGq!;y;vthk5iLm^h_RMMH43oKPqp4`;B<0*P;%X;Tl7XCVTn}(qV#aCh#;KpT6ZwJ
zFOQ&8o$Jw*fQb<xUM3;l!E{$}f=y_9K{hT1(YnkrE|^u#HW7_~YA?QryGKn&_Iwq)
z`I48HXEE2!Yi~-+qt`kxPz+z$ogGGu0ljs%z~}a{10}VPLb1ZwM^R{x4-*t^o%$Zc
z^bEj)74k)(L{IDw?rl^m^mJ#qOb%Qaq}`HpHUDZV%~`U6;nxL*3K{v*EaicLj|`ej
zJcma?p+pG{KI9p>C$GKExc@|}5jK=&9{NmC{uJa!DoFpX4fXHys#LfSybf)d1zKU#
zhFT+ZDdGD=biK2{-N^sI;F0qPWXSC~Gl<GLp@Z{XtmcilJ}(rp1h8--OlJ&O*jT3}
zTytgO%Yl{~t-&HY-C?yEiBN_<X@j($%bPDbHT|wvJg|!n=xjw#NFfxofW^dOvB~VZ
zT|C`nTbz8vWwY|2)FCF?0icXYgxD5#^+27k?;PU`=hfEEVKRQOpM)Myh(%AZd}&G>
zfp+f2f+t*G*s{dg<NPfiJ|F<o6FF(9cgY6gwM%@EFGj|hH__|M%&1gwTIfja_Of`I
zjWWxSB?8_HzPBffoDTLqidxOXKx_|&K<+Z}k`Cikwej_>>Zo=X3cy%FhUI*$iCN>M
z3jv`Z!st2ytyF!LXQ-Y--!po+x|4X5w><t{)k?+9T#r7u9b3-HiRI_y<Xnz|*u{F6
za5m4lPs!EJq$kDmVY)&(bsE>0HdRA&^YHd0cicfc!K*$(e_SympzB3x%kLI}pmjIW
zDzEZ*?{Tk7h=hUy!pz)Yt)1Pi*X<Q96bwz->@ySC^A4C+blP#HmB_V>v((krj6jU>
zyrmd~CPw=pw&5X)B2(0NeN^DE)uHWH<KXHhgPmz}Lf*#Lr=c=ZVug@D5%E6V`3lXt
z%r@f)Y*r^pnw2%#N%Ti{3HI)fN_WzHcwgSm2B|bdxzQctnlW5v7J4B=EI9G7&4}Ec
z?I%G>X;apX>vvd<VHuf5cc0vkUu%XG&7kK}buuz_KiK)VBA31WJFcXC(QN$_gsF;^
z?U<sTph9j=LYCoi3Ak@*w`(XH1V;0S7s+1Z9RPGHLSC);7tG@~0_lN7u9e(_qc~7I
zFRGtfO=t~+M+A8NUwbxcU9~t#%QJ|_oFabWg4}q=NZgLN(-TCUuH$+ZeupdO=*YeJ
zt=W2i^zC=rVPK4D(5!lDUiT#-)tfhLo@j`H&%^h2c?Jh`cvqneMP{V;J30|3?GWI3
znqcn%XyzXq{pwRJ7Rd%@k|k%l`(P}Lc2|#A#;CNa&?ycm<%KODN%}&!-XilS6<L)k
z;yh4#2plpq(R6xb@8}(jRMM33g(_q}SZxv1tUoYqU6nO4kjAK<bvg9-+5$IdtV%A6
zOZosaD-g^1V&rU;xfTn5CY*7n*`nyyy>%=+K5Lq5?36WMPJS57STXW_yX505?*W!;
zK*-y<=0o&_+xnfCce#k@FQ@E(UV%(c^NQ2itP8qQtm<D}vB~4Y4sMN26kX14!kQUU
zBUVx5Je&3hjH=<4f|MjR$`gdS;!E9tP<fbp*0#z>N=$$6ytWdpr@a>vxSa+O>|5zb
zdpHF*@5>3i?M4qF?yVU4BO9$8i|s~xxVA4}AQ_KRI`qCmJt?N@4E&>Q7V&e<H3Yux
z)N(%yV{MpEaL>oPwnw{>+y*G|jX~a-FUO5SqQB*xfgYA(3j!p_WRNNJSdmk-Nn)lp
zH~JQU$05!csWS1$>lp_hoa=Ddv*kK;CA70SU^GrjrHcAZmj5LBL{VT(gwVqO84f$|
z-NSbgc0J$<ub4`85?B0>E7ye|&=KRcp|!uNl?a}5AI7}-X6n=*qfrt93wg!00R7R2
z(%Y#6H@nNhU6}C=?58QtozMsJg32ey%%?6)Gan@t_V(pm(@T4Mp~*ZO<@9FY>#k?0
zmh)B7qH@)E_VY_pGT7AWRpf7%*r{~7LUZV}xldydIQm>hN;uTFRBrH|biey<Hmfi>
zt;F-GynBq110rvQa!UANZpx=SMIB$`gwo9g;#@*X2{!i&48IdH)do6>5p_z+qq;fb
z;B?c*HV8qA>1x8nVw_(dKH@s^#1?qJJ=p)5g3%STR%&nOjDWpxp?Z8F=Zm#m8pRVf
z3SD@9k&ZpXem)+q@ZQ00LaH<l-LA%s^vt~2smbVem{_!QI$;deH;A+KlA80VU?-_5
z*K}w^Oo(_anKR%Z8-n-gbV_x}4V16f#EHHXDS%Bn(qIXCC<UDwqApo=D-<cN@Cgri
z9LsBC@iO`Dx@yG%n@r?8{0BzVUW}KqFKGr6?oA0h!W}Wn&sDf~u1TwYZ>`G{Lp2zr
z_w<jQ<ttXwdL2ff7E8vLGv@*|4KDXq78COou#eXp`Q{pa_MI&jBvVa*yOnY1^HwBp
zb6B6qpuUq$%>CJlw6HcVhH;xm_!m|Z(oZ0yd4{JaNWX%EXixvPv(F!cz>vjraqMp>
zv8Tu?K!#w>;W@}WG-F84J8j%Y1fm|1^a-76V%%X^zf_4_Uj44Y%sO#L!<pJ;H7Js>
zlwp7#Gd2Tn@z_JF5_WUr09ewLSl|K7#bSs=BPg^Wj2GdtWlt}B&=S(=JgHEl<k8>H
zg%R(_pz*4PCY*jEdhvm_c0$AGM-3&&sR?~~Msq!m3aQ6KiO&NhbCRB5tY+8BBESps
zxCbPVCU+ez{Z%ktZ(w32Fj_+ty%2fcpJGLz)BZSm%5nuA;IuTnIhyv)vQxw<Sy=dW
zdp*YTU+_l$tAq^&@vd;+jS05i4Ys}ukoS;o1VOFjwdOlZICG;~L!n*}!?goEBPOM`
zO1buw0i|Z6bSeex;cpxQd0fEkryl=_K`?{ZTM-$C@fi^y-x`%h=vnP!Wzh;C%BI&U
zYdobjc+*v9C^LX5*{i3)7a`@e)BEap%(@TE(}*xUHJ6l{d=WALnnw{yr`H)9NM4e6
zTIrL<?#u`ul4)Xq1Q4%MOhBVE3<SwqUFM<HdaF2}zA%I5sHjw%n-CcT5ac9L-cy^+
zr!#4%Z`zRRbQ^+Id@^(*`M9YD{l94X0A!QA`q5By-ZjSe9W$&<<<Gn?a}6azOpRuQ
zk3SG$MPS%0JA+!n>vZaQdp5(NXbJEHu-EgE!&XB(3F@5`F4{8I#xUAQ7Pr5-6v=pS
z6X#cE`Q)Z_qkyw5^|sv6Wa@nj{WMAD*k!qH`Qa~yQ_^R9<$U|29jc2ax!T6I@Sl_E
zsRk<))ZFWU9fIMna@|Mrat?I62f>{L&hx4%ioMaQ*Q;@$&*@<(Fj)>|CHz_@w`Rfn
zXmz&ZW~D7LvgZAmxLohL^`OMZfL1H;#mGj>#S@0r<zWHVrQ=krsc_bGzdEhW?SUPw
z;&-<N*d8*ae%`Gb?Nj@Jik~r+p;C`1E|{gq&eAO6m%DYC=4s>cU+wpt7F{pC!j>@r
zM)~|i4gqlE`V1$gAhC7z*|Rj4*e;jm)Nvzy*mNDf?xNP*GW+4C8*HP}nfUY6g)+i~
za_3E!rbB*sU=X#_4)~3K7%aXrr)^y$`8;jVAD%4hFN#+&6g<&--N^*1L8T-*t*@Kv
z3837d#}e?1%_tScUHaYaZX0so>LQ}3yX9MANoR7t6N6VycXPs>sz7ga0`t`~Es}j5
z7i9rVae)(>ADL;GY8Ek;Ql+TEi4oKz!vGnJhcerM6+cD6r(T@w{dUE+v$e6=bVjtu
zRt3$X3wBV~!Pq+iu4tD{MCHp#*cR6EeO}_`4^BO{+<<z){4CdUa1qM|8Aj>P>w=^>
z9RLW%kIU^HB0a#InK>;{HGHywy&Joh0fS28r*0Q?r{VBGHtsXi>U~0H`}PD2wh?x~
zoY>QKmcEKH;6lLQHUGWrbAjaIpX43-W9{Z26*$c5{+8(eyl)x};cgYwyE8tz%Z)_#
z)EFo2s93`-B5}Lo(=@&znB#v#iMdoHed@%TZiT&+0(kKpyW6vCxv@alzTIQ+9sb%C
zB0+%pZ8r^hx9~DB+=n3iWep{om+is06ARl<5Y2ISC0lWPMtj+P{yT@<y{O-U!Y=<o
z6v28<RQ>*x!~TO&+RPQz{kuhQ2--D~mTB@Kv160q#OBs!&=292FHhR!8Ls=}aB&y&
z@~zGXbX2NU5uJ6+Ihw3!in5->`xnw2=u6mvLDy;p2fdd4&@wKsh}=GQKuzuus-*dq
z)31S?*&{RE@17hd2f^>k57I+_M}EAzhJiyPIeU+K?2xd_g*xuf+<yE7`u<lD*gl21
ze*vhwST;uV*=2W9mPZ6#mPhc8i)RaZJ_1^?wlFXYGx&<!yXJ8cs#a{De=O!TA8uiC
zr<MFeT>;hoyQNmZcp>k(v(Xv@4UJSqcuDsw5fQHYjoB>~tUKm_3nD)~^-~B29v+;d
zqiL)U5z#u-ix7U-{t~`fjBcpUG+)^)YN|d|>|5&2i)V}dVKxJg2dDMW)qJsJA`KJ>
z_yR@r{VX$_*}pKE1p$LW7IOP?`9Ln4=UH&GW|(g1=~BlXpoUG-(ziNz#_#*U-i$Qy
zD?x_FWVm^vQQ!M-e-v4!3jt_dm6;V?`%~36e2W*I{=eR&$bQ_H&{k{QEjdNq%$nqX
z|IA8lsryKk-0%VJGtZEGjldu8H}bEoqbnMrl#4c3YHiQy9G~!Hmr4;}Xq5ZcZF8q=
zC&-SHksR=hC0}ed@UpApc~!zC$V3qk_b0q^y-%n-HR5Am8s#0foMdC5jYiLpyTuMf
z@#E`r5qRvng+d)7J=`?JCw?#YgFTJk5VpJRo&8(mQDkDpT9LAegD%kgdu^UednsgE
zC_|eGIG~P<*sb$3^AZ<5x!fH?NutqYdV?}`y4jcgU8*QN!v$FRu<&sI^z`ppCVZav
zu%eOZssJ%*hwDZDxB2oA?Ywdz9=20CH5#&15gM({L)ZZo%9794-T%7nLcPV*aM9!G
zBPZ-SR+Jzbvg-pfQw?*9*OS}>&(~cZDvAYS*myxe_9cDhv{zkvulcJ~jq@OU5nJLi
z`NrKC)dM`=>)<%u{Yo5E%#c30Jy#nal0P$Mi<=0uM+GP~HIw4`T!_2Z{LB(lM)fd=
zi~(&26QBtUyn)Vw#H4EQ&zDCWlf6OIN5}UImRblL?FFoT{b4F#+^hEi=lKtB|47#3
zT${DoL-Q3hh(LL%or~wm;b~KD)N0cBEi8&Y3qUeO<6RRQxPH2T^qzr~#`}hCHZOU+
zWqslI08!Pyy?#Nszr|SDdXHqbf+}mcY{AiS5%BS6>xj;I`LL{1>m83OMk5yA6xlZt
z=U=G%dDOn|e~?K-T_?9s2=|<%^Q=_^4AE5XkYyT*A#iuAmEeopT!cJe0V5k`Y7rEq
zZpi;U`NRT@@xqH8@iqf~B=;*ikC%*99}*8R%LIhKqisd6O?T<SY)Qf_4xxW|QbIN~
zX9icjz-GQ*BfS<k2i?3XUp^dCo+=vJU%jIFB(7}!s|<HGwOn%I@{t6Wtkk(;6(18c
zl0R_@BJxKq4-bxF*IT^(gLHdm#_?E~$2;cz{Xwk)UJ&;FwhDGD=VVZeM-hws-(|M|
z`hMSzGJb1PzchQzUYKZ8(BOWfKnR?4QU_{FI_SdU*2OCqA$0i0AV8fs0Fc@ix``5C
zWg8+#x#MZ@<A8aF+LX@KGZ#lYis(%W|K!kv^zrEqd*AsgSD-R}^DVpIX284D`!z`9
zjy=TF2P`MtIU!Z{I3hQ<I?Vl=RP4elQpgRu#AL291)bBb38vWB1BaTsxGqNW#Neu@
zFvmazbLE}YR^&?)4eRAw5!I_(oQR161o@)jj=ZA1j%(P#V{~TU^2m@ej!uyHWQDf9
zlV*UQ^Q2;nF@$BNyQtOHhZW_o5_b<UMufnvZDbnWSSBl~q%Ju%@J9bQ-A$$g>C%=d
zR6d|ANkeR#0LA1+!E{GCH*;Q!d1oM6<x2TLLX@rU9d7G$9|QvUoVdL+A(7`JfN$Yj
z&<L$u*5&92jg<K=mxEFeU`6)P@>Rf!lFOjgf~d&8ly4Jkr#fQz!Sf>Bjc^*uSZ>Kb
za$If@pV83!CgZCP{phV9$2q+}Of+!+Af*?E9d7#>QnSo`_k{Sd(@JS2ZoE2tL$PsP
zpi&&|Pj-!$z*rNr{ecscyFtXT+@Md!4vAzMA_i%wYAxiBUhUsjKcDA>d597PonxyL
z;FxNcYw*OSvD2XO?7SRdJGFJp1nP3$5M+rD?YpnRrU8a=KzPXVJ>v4UjX@X6nm`ji
zQ5<dvAU!@jVuRtuNs>|fv$ux-itz?bcYi;NJ#=A^Fr`k^Ef%bu_bX7=jf_{)9j`#^
z1A-g^;^z%Q)Em5*RlPEr42K$0Ox+Z(?x_Q*66U4rVE6F(WyUf(F`iBk*CQ~J{(#S@
zm?f8Io-LoCU~R&mGNChURnzrz%gC6RkhZpZ!sqif?%lCO@xO(@X~+*1tmm;}p4nw^
zHT311UbBmL;rCR8U@fgpn>qwUo0P_G>|IY6m=wjCFbXA*3!1x?Vx<1=IQA^>tzpG~
zk{}T#A(kULyQ86ZV@8r@RsO}19tqB{-dyoOm!ofzc62A9A0WY01?Gs(fGHsv+SB4P
z0yu0>Is$ITJe7OBmAp%ocM&<8#<{(!=`SbEO%g12*Y%8$(AbG?V3a=(E(ktMUk+*g
zEol%en4Pi#6J7U`YO?}?Rvr&Nxqq_6q!^ZcRY4w2Qr(Es(yz<@Iew#$mcv#(m0qu#
zNK&eZ02E|;s+bx+C{rp4ncTT$AYsm2;cKG%_4!&ysTS*5VD(>;sKJtA+8^@bIs{Ok
zQfJ#e5}2LG7#UR&u48hdog!H}EymG|$D5RWew_*yL}qZ;A>M{#NXX@|{8w9&1?HH~
zhNSV-ZZ~H0i@<h&8iU9hPqZu{d69|noG+^oyqt1zfC<ZI!W)R0e8aR00XI+>BC*5U
zPc}4D6y~m^g1<@a!Rh{P;`DiqaJSx$-@p*==IurcB%ErV?u$!U_g^wLL*w8pxN5_l
zczT87b^<=!4vC}V1SEg}QX-xQy`5e@`TB2Zuk4i9(ssQw9GmsqUqvA!BO?Q|^0|7)
zR5q{hOupz`X7T)Q0g^7Aq5LJ(L1(|;o_Fz!{w|oAYiJ~ipSk{j>RSIZed)h;8%jcc
z!v%Q#eW=ea+fi$QNp!_|7g*y(8FEuBz@)3qG>SF<UjOzwx_jQbV2?>9`W{t<5wRKN
zVU$1i{^*6iuU{V4M=QN$+b4&)mY1e8Pm1zM&B2=WT)SG#^LaFir8Qz;6f}w&uC1a1
z&B3ga*&C(!q)h3tOwn&GP{oAvJBxnAV9v)bIvgWQeYE40NXrkQhklb%jgJB-*pW<i
z26Y#?d_x+<c~DX_(EP~Ck;I%Vft@U4CY$@ou|liHf5%6ClA%pye(TqswIKJ6w2%{&
zP=y-J^a#B+ixs&?9U8I1>G3l0wXyYew$)>C8;2O^z)&AO-O#J;2wXv7YRL!I$qh_5
z_P~5YUD%ZH<1e;B5Rg<Aq1wBehyu93#ZKky{E|mN%8luOQ)Ut4juvPktPX4a;C+Tm
z2XaWclwj6*HTLV%cNJ2MkFTw5RB42_+vpSp2y-I-^kFs!-e+mH&5MhR^{q1PwFFMr
zxJt$%2!Q}feQ><0DnhkpcY5;8nGgFDUFP3zQ^*91O=1sp&0)W^!K1ks4T=xwx{l-i
z$WE!rU=#6%1iNipEJ9EeJ)xX<^@xvBUWm+ndW8Q+jh%a2gpy>15~a$_u7h1KC&9p6
zcyQwS9@OUJZtrl$;8m{~%@*7=v)-PSb?fGEc8EHG!t}SC$71iIl}p{xW5)(=w;tXc
z92BS0G?y$cl3tK8Vzs2FKis9<{^g4;#13M-)KmB3NN2bd{b>}nZ(Od5%x^;uM{|OD
z<<C<&GgNh*>Bf_@5Gry{_}ThJ=;!D%i{|PqCh7$;Oh&}#l(vviMggoz@71bnsGq^J
zk^I5N9@fa=QIO(|_9m9;?E2+<`$xFWdfm_La-nqMWiwfIwo7DyVkbF+ol7wVFVi42
znqRcfVU2J&A%0^1Y|gHn*bMg(Pk6m@q;~A2sD8U&a9Nws9TgdQq~PVj;X|6B^SFy@
z{yGr_w~&Ojj~<d%iDN?PYY((A`lQ&ISFaM_qU#*YTbhyBbCLA<sT0^|s95bxuC4eF
z%_OzU-$X^U>g1TJlH5UfSwQrY+}4gW=S9^hXAItR>Wd83a8$obp&B->w+>jwc|ihZ
zb;L*3tF~Tls2eTS5c%I8A<CDT+7L}s#E7%M{ekC)S&%Tt8+F?|1|{}U<m~ol@w$<n
zP2R282^ex0Vl+O5e0>?lwQ1+>uL`OaPn9U*$pfI>{3Zn~7It<BIk{goSh0h-e5f}&
zugJKfHNxbeeN$yj#(fsq;OFk&0xZ({%*JiQ2r>UD)Hf!w`oyA>{lm4w^7=w*%>KK3
z05D)kT-2T9c!Y{wY;wjjc?Mh4_pHt;MAY9@nhp@D>HYc%{XFhKEoB}HsTAy1yWL3U
zYfYap`PXt!NSYt=BMFOF+*8>j!dKGoDcPM7KOuLY0r!3}>3_n{<enfq3yi~`zArN4
z4L#?Z_is31B8{{B1o=*%%nM0M#ir_8_P0v*YH?uaCpxpnMHUegCiq`i;g@8*ikp)*
z){o6Rzo0J+6xILq?*w!6SXW+73@NO3-{87VcJ~fpJ5M*;S{7AgR(!U&a*H^t;cbg8
z@Me#H3@E%m?(k8mdg%XiF`+Nj^PkQ%5D56WfX;@c+?<`I7r#XpUQ;akWbxN(<6^*Y
zJ89o8({kdYP$)6NeJZpcNos7lIccJ~CX%Kn3Wu8bzc*bJc=~uIP-sasO<M%()ZG<S
zx4ix9F#w_Hf7uJJAn6dJ7hxW)N+=u?&R%}$*RKjuaS-+7uB$4VmAudS@^^FSGOxFk
zK3&i3LILdL8P;K0a2~(3B2qD$H$56uHI@pj@Bai!2+A--Ag%w-&!*A>Iih71o1wC}
zD8nzZ#@k(mM~4$LRtAAPwH_&EV1xG6Xcp%`NeQ#mM%Ojd0r$8~C?OJo23R90MS;Q7
zc@h9Zl)o36!+v4UEZSeaN2!_D(08l#&DE!yu<&<;W{?G9lM9g|E72qyKuSct0l=D0
zr}j~oEJ8EOh^Fj-p3vf*vc_%`29nx^;dMV)sp256zB|0Qk1GMouaYdWn`5ge_|ecJ
z)mY=x^OJV>k{5`)*)~$8^l>5rty`y&$Jiaa&AY@Y%+qmp{OlOkrW39P(7Yz|0=uDB
zPg{%4Qx=H#V^hlCGMMHUK%O-MPLxG&*9&WX_>%{Z#u9jrmO85o;N<~lNQb;k^Ug8+
zTF}55jXMY2Z3gknu@8t-vi(zi?I%W-)62`)IWReD*SdPa!SusMk|p04IY|&aT<d~O
z+rSq+4XXN+RsXE4m~XPYVd9Hw82<ilcUz^-GV_>LOR#y!h4OTK8*mXZGp>?wo39ZN
z-g+JzOgLX>*im!q)zm=@ZVf4rF=Nc=b(4CT{u-}>U*Ct5SbXv27v9JSH3%{Z<QAOH
zV<a_q-85FJ$8$k&QX0#m{T5z+9lrhQk2Ywn|FMA<PGGKwB{+txJbqHCd`_<8Q6?BE
z6h1e8(~?5L*=e<ZEJl{A9kBJ<o+os-Jx*^>R{b0K@kAZ>kztFcO}OZLkv;#SJoYdB
zHTdJVg2FAyESu>YCy4-d3OEmVo$wuVdBK|tB*H--K|fDjQGi0ML?d9S>76CT+Vu}_
zPQ3vGnU4nuP9{$N=tFh?`E9&?ApuB_{)Dhio-A4XmfhukG+UpBrY5PD-j)#hC`yJW
zi+n(Q&kiU@&sq?@p2c*vy%`^Knz2Fm6jJy*$HiV?{91-_tgTLzLM(e-;$E<--i^w_
z(x?p_o2+Im`g$UhB1Hb`KSvfH$B6FeQ8GDzYTX*))zgG;vzukeH10})rY}eoQj)0e
zrCak#$?ZMozJ9F?gnDaKpYaf@8=^_K_^{ah{=s`S^7k*5206SVhyO$Xk9n4l;Hy=Q
zuaQ~H2W9*w>Ju$mzB-DBtdX@Fy|~s~-8KBG73-Id*w|X{@?7@7DEHGsi!7}B<(0N@
zPOk$wXHpI?(l++@IM?s5+vrB<`+8(DNu7of)sa;XU7n^st`hgTJDy5?^t|450-Hq}
zJEnIWOJ?%doGe>)V#hcb_LzCXIsBnA_+}=R%fH$DE=zmt-g77!eJ$bG8fp3FmBx=8
zdM>`>m*{<py5C>@<5L#547^e0DDt8D-aBvxHM}dFwFp|0e9vXYQQe!6H=)3(-1K#v
z_g~svt*#W2*kP~EEez9Y5^VH&l<M+*T1sM+DF(~3O96^<zucIdSfF{|fPD^bgu5v3
zGrUz58@(KHF=(tAc=}2*dwja1q<yVmsMx;|X1D!u&}+htN%Y=2dqgLQS&pzgi;&ne
zG*9ijSmwT!WbzH%pA5#kW2Mt=C*ftMzQq4AUlCWCy$%dKYrH!E8Vt1`sA>3nq)la^
z%ru;ocf(-6TgHYeUuWx73<1@e8whNDbvVRsVyE)*9M$<BL<b;TXdn2TL^vcD3r@4j
zcb%hSC2$H?v~((hX0TG@oJOIN_i*vpCG;>xsO(QEpo3&8VbWbR4x29mu%2*HpC#16
z2@8&rW!LV_3xr#d8t3B+y@ZO<XX8sLtH!1LB<cmbM`@!7@;JjF|8IG)tTJcR$zs?w
z2|rfmJjN3)G*4-FYSAMP@>eN2SsE2~UY@>`Fe5?$(xok=OS_tMW2W5BegOcknzgqR
zw@DXmpu7yj;~#uUa>+emOxJ@lWq*ec39A?V*9KM2G@CeJ;zLm$ZA@gHPM3&3b^@T~
z187hIl>DC!s(zZGIy?K#A#uz}g1|j!0IHPaO?2M>=ZQI!cRPLiur?9@u+AXfpITH3
zfQRLuBK@O5byg<Fv~WYkmOW<GD;1vrkQW+NE9FTZ2_4B(6qJ@yXlPJ$zyyVgf7RzR
z-LU_x0BE_mu=n>hJLqG3UJymP!e{T{;g~^l5P+T5wl$z|p4C^&n6by#-$G$2UB#0F
zptXMN9B%^Hj}!&u^4Ym2V+j#|w)Q78{<3>v2suOiWEC_d^)xXAnNMZsS<K{XYK7dy
z7;uo&TJzzx*WA#4+qr#S*3^PXM+X3u|E_qU7{sKDRDB+&jGtAH3;%qx?qKN@73y)O
zp{+inNh;MkoJAFWo$ijmE`pMLQ;CW4al`*k?KB30*oW=wkSn-){mJ$_XFrf_2<7$`
z!yQ8|k#B$`G~^Pt73^*w3cN-b(9c<X73XcFDGQ*I#{x{%(@f!#!Qn+yo#+uBLOPi#
zkV#M$0$+rmVgIsu_2jtvbVT^&H~!dgtP3_mY90KnJz&6ITp71b39;Le0#7L~<X(Y~
za4&oCW{<5z{*~o9^^zb8_(NL<W2v-q=4uxnJkFQtT)(^rFgP{aYw?p1Rfnf(Iu-l|
zytfLn){aEr`(ShZC!$?<Q`Ci5kes&#q*m&yvsAb`dJ9VipIyu#c5L7a#c1T{cyE0M
zATrFQuD(3=KajPy;7+ArC6=DkW|IbMi#6=@eYo`cznLIHe2*fyX9Sw8FQkdjxWcG@
zTSXKn(-_e0@O1*_^6~KP`AbtqF%P;h{6~clcMV$pYB5{NNOP@G%)^JX`0FYf?>K5=
zBoi@^5Xu8jt|zNrMio~(RW1d5WdUj=li4><a3dm|EZ$dN!Ejgu!?M(-p);-ap*qgs
z+wm~Akv_$;8O64(pz~QSN{}kadH*obOTeL6FU!xx%@EO!gmtAgz%PIq`bKG=@o6eX
zTw?R6fOy)|EpF{vHu?x%rm7Vih<v=t1)CC<aF>pL3!16|lYTs5eY{*THRr&K`^n_w
z&l<&@#l?hVEpI+Y=hWIIlDE^S7sx_A{i$((0<l#;O>C8ZaayMAc%oA@9;9;4X9_%n
z`9WUmiivR3&$`t^>(I~syW4(z0PCGSf5!a0+2pMN%S1n{Gd2q|W`2KbYy`_qKMn>Z
z>a-H^W?%8NR;-iVF~1HBy7W?wq8asmW1(gvH#ZsA?wjR8U#FJ3Q=Bc`BIJk9QIL5*
zi74Ew<toDpp^fdYuEG&;ZHxg^Q^@6?=Oj}R05$3X<w)**c7}<_@>x>WuS)D_r5%@C
zsRR%wz&k8YfP{udZXU9;*ZdCd<9Oy&0NX_GlztIQ*Kgd5O3tHMAl8p-7Upd@f4HY{
z*aUC4{t7GG2!0V1tLW~O{>%O)05F(**X*5-rn(9PoC^}d21B7<c}k?frh~oLU%#KR
zP>=5v#wW-!2eTV~rvH}>T^7R3=83PL4|L`+>!^!FqFaQ}OTWH;kKgP(_h`e9Qr|6>
zZ#b{ssbmDHa@?k=^qs>yk`I8fO+V<DI0jB(u2!^AuD+C$baCJq=}$wdDhzcuV<7z?
z$uWR!puf%F!GE||Lt%R+7U`MY<R`31p~lpv1UZBGZX6Jw1<lPC)BAVT?1|a3L=gZ^
ztL2?W5M7ZK9-F+`s=sFp@jQ(y6P&Fcs8U|3-MTat*{6?`#BrJUlhyt|%4V@$xc^yP
z3n@PB{ttaFULe}d{$}cnykgCOK_$MLXbkq|W~XHsd~8Tytxuh_r?UbOi-WU8BK?rd
zxEiCc(Ml0{Z&5trg|&XaEgf6?AI8(}Tz8m%avjudXw)yp`L5rJQjc@;;N#zdKD5bV
ziW32KaiR#mj#0i)U4R$n0TEf#I7T`i_qV-{>8Cl<e!0ArS}Fru$iX|}CUCx!*m$f^
z(ZSse>H9rn7bB_iXYLB29W^Lela$VV9!~Kf{4bq^YhBC+<(|E7x6)c^x$u-t%FRp_
zrM#@kL4prp$!KJ>8d=})M803Z%yYLHC0?e+1^Yh)KccY%_!zs+aesx3e;afEt;@u}
zXAAuQ%l!Jk#jyXosNuh8B>!EW{_j2Wzkvt*Z-cx4-+oLVb{Bk`859HzG5nI`w9`uC
z`1Ldu_3RNe@uZ%kO1H6JDTGs!2t7x^HNQ|v4*{3@QAmr%lfokcrdlf~ifOx8Ca&_+
zHcSg)vFhBsJck$Bt#^1{Jd-`}b;YCh!Gi<E5iRxs0V#oUV*&|87(jpkNg$mX00B`3
z;K2fY6|D^f1}cPfVF2>`KkZ`qfwB2$Ai$U3?k2s-=QlEeKp=pJUKU7(|Nim+{MFd>
zzrJ<MLi5=~P-h2m`-(Q2DjvC!espZsiV6%41jG}ESqi*>cv?f)`}t`zz<q)Je4P8*
z`}y$Kj}l$KE|OeOJNR#?a7f)jS99EfTdhDbKQAa2^aBP0dXyhJ?qIaJ9WQ5Q+V*Xx
zdYId}1JF*cBR>8w(%w6&$!^^qMG^f30TlrO0Tt;@g@AMs>AfUCfFROC?;UJ(>Am-o
z1PCp3f{JwMgh&m&3P=ehv>U&@&u^T)&%XDbJMN!ryyLBFz3Z8CuK7Ic<bw!$Ru<{c
z@~CSW>n3J;*?r<$eZx_qF!|62R8ZH^n_iPPn|>)HwGgLds3RmiVB2+CaIi6*<38>9
z1l&>6y-#rVbz82RZIBkN^yX2Vd7q9gX?bo?f!K_b8&nI@ogw!#hSQ%%5u^bMo8}cc
zUatE0w?F+E!E-$0Z!G)n=e4s^4aYuqvirUmD1I^V7v6}L+phQrxwwxS=L4+f8~x*6
z+w>}E%(qMNyl$c4YQG|2vZOq)+=c0UC%k2^e%d_aL6FS=g$Y1nnxmbuF#Th(jUJKP
zq_RJ#sv5vLajD%_d5f}q<>ztE_bMnr8jMUieQ6GlS90F@`v<C6h|?ErnN>kFtBr2N
z$pOEg;7SsL#ySpw0N2-ntw$q9qSx+6w$vo*G+!|5@in(Cy!{~c76A$JNu~RI28q;1
zmlh>3a&1a|x<6GRh&p@6e_<2hwM^x^<?3CvlZ6Cj(Qm#xSD8Y7O4{TQc}wfF^zP6{
z*_tY_NEEBs$tqv%y`nwu@6wg>OH_cW!Tj%Tqp!Ar7gWg|!j&&E=?9Yos$buz31XCv
zV6)9tu0Lc008SCbJCdA=kN2yY#Y<%}86Is)eg0jRR?Sr-#Bjc4{O){+b$6e4;cTK{
zP6*smhq~}a)`JHpIE~)YNJQJ7bUT6_tGYMR6d2s-^gq`1V_-hqmf=nbau7&%M4x_$
zSxUd>jIy;z##^L4jo?mS$><ePwlBot)R1c0;g~+$2T#iw;4<ou_H7Jv3p>R2;{_ZE
zT5dreA@q?*_<?EeP1H!w?oP_aj9CO)D(e`<<JFsv%Io~J1<JUC+gO^X#@o;A08y51
z0G<p*hBE5|yJOhvcHI^xq+Mfpi3+BoB9k|Ht#Z*cKzPH$azn>dJ|Jc@<D>@ltkbX~
zUSSz3Q(Ttm5c1^q8{$J556GQtcwrj|Se3gJH>G3R;6>(c(1Xu|sAYT|e0QK7KG}lm
zrNxibXT06kHR2<$7&bK>uU!8b4e$+G_YJ6bts|hB_}lwT8ET{o;ZYl<gaiOkO1kt3
zgZ15IVC~1`3ft0hRU9^7uzoY^4ynh$cG0^sA7HC`fE2D6{%ZT=6-%MjpN@NXnV7<|
zb(B|!Z}#A~i!S!HHW_77O@bLqsJlzwwb_G#*}1xNITmGf6Q+`9B1)dqBMP=%VvNRB
zX9-U-Jvfr=hWk$%KH9$P`!h5j;`fwz$;Mh__N$-TUuC)0LUV=W<;HS?K2J+0rkuo?
z_0FHSCrQ2jXPzK=XZ!cZV0rRC|K0epuq}-Z4!;xYgT=FXUylo9g!>a$J1b;0V~cxf
zwn|Xj_B|`?h0n~`VHRn%ADzw-8O@kLDDQTjJ=a^zZGdk8V!#yQU`K#6)*J(YM_#gS
zu}}WYLtjYyhSqV?qeE$`2bc@py<UhhrG)2jWKzwv@9ovFu{tknn~~}*!eUF#o<6j+
z0&gx{H28Q6<pb9WT?Y>d)H_DpJp7nGQ-!b!oysaW`V0qn!xi+XRkQ}fq>&V}`rlN`
zE@rFM+~|;_E3KDYJmabiu99Wsnj%<=eid$`>3VIBzYNud>-@w6o>>Q@Wt<ggzj5%o
zD~qbn1f1ACbMO-`@{m$@k;d~4X+*Bng03Lc73(38&g_DS&qfN{kKvP*QQ{cEQW8X7
z=?*7i<{dyBdR%JIrR-bd!QwMPWoH({lHMc)PUh3JIm={$-;H%&d;S<?*S+L0@cXUy
zn`Jo9-aeLaaUjb03?1J~6{xnp{aZ9NQSQ&I3mYFe4=OcS6gaYkNF?_Uu~Dx!3ht{2
zbeZzvSL)3epSe~~UcAr@P%IyAxfopk=sHt-fS>G5?Dn31ahi3#mazy^LTUJs7q&B&
z0_6Rz8SvD#^l-|tRdQ+@19thsayi|+cPack<oDgKg-y=3<7Iy&b~1VhuC(e}Em?e_
z+XVytepdcK=Jlc#<&bra5PUvWj8Si!fIoS?D7{&Fr@aJ$Bo?-4aYelB*VKC!&i1r(
zLVdvU7IrmGN2_Qc9aV_lJINf5aSB;43YR(<9(%PxIJ_pMyG|Q$xNgpfajBH{sfo-!
zt4L%KKP&LeM)CNan}NrgKRfz-=?)v>EU<>^);@|1V#jY_jV#g9MG7=QW9!Ibn<aC0
zK$9<ccT1T1`agzIZ+dx!uNPm*v<q7uqu}Op6w{cONv*(Ht#ZboG_2&V7~k#F{UgG*
z+O(J!glFo)5<uBA_rTkFXy1g%HlWK3g$s;aw(QOS!7_rQ>Iq!SrJ97pZ0;6xq&k?|
z28<Og>p2UQn7uz3ST58bb>y%0+*fmmi5hA`wXI=X$&mdrcBD}cMzq)$RvB`0k4Flb
z?$tQ7j?0^f=kP9^GC;NcIw^UXDv+X=h`Pp_o^<r*>wM|(&&*AB+Z9`r3`fhkTO8}H
zRMcJ6-dldF2;gdmxU+`~ga>PfE*AZ3Y<HPPIB-gyVIV!2aa{L(La0ZRk`&+vK2loJ
zUt_aVb2$C^IdBu5B@*iQcewU`DHdHZ@~M#>u&{m>F(Yj{D)ODOnqNg1p3hUL>i=}h
zBtM=-^$3NQWJWkpSljx<3BZ#t!lb<yY0d*9iNoPv=GvGs^ehL^4qwZd)KmUav5@N5
z&$2;jneq}VO0Dy1Advq}0Xi%Kv*I&~xBZ%OVDa0fJd=<Y7!j3WcpZq;HlB?@*O5?b
zT9-A%$xBPmrbIiwc<g@M?IY$|7$vi}Xj0b%z2^8?=ISDTD1lL|t_E(6_biTc6R%#B
z;5WKk@c~uciA7yD8k<P;kBz(0St7@k<UNCmQ54OH&VlH3nY_5Yd;TESilD<42Y>&f
z5Vccf`bD>Rb-!7rT+j}CnR({an(-l)N2Vp+(S?rjg6;2__C_K=(Zr+dxMbxZ1CDIh
z?2+(ekeaHB6qiZIk31BJv-_2Fs2z3Y9>*w<4(sWwv-zRgd?0&5@P!%jFaNAkQ+z;I
zq`8yxeO-%=47-LDwEO99-^JM4=H7}nlGY8N6aSdtl<P{*R|LMaE9x2nTU3F@!!%{I
zS9emNhc`9>`^$BVG@Gz5zaGI)Ur~G58!AjJBHxE=xGQ7Ji(Km;l!jY#+U@mb5z0&{
z_}wb>o!jHUvo-l40efjp_Lq82f)Hdlb2=iZHgv?YyVTw%vUH&U1C%V63r)E-B;<6K
zUz7Y9F118SiA<NJD+nHs`2`QOo7LMdLF*r-6^w}tQp%Zjxdcc;>*~ZtBRfhy5d;{E
zR??8T=TywT$P)0tEW!<^K3WMV0-!;@8tiV*{{GWGevg+`?+hyDz|sP<F<cWKi{JfE
ziy=OK1rGwRkgEtXp9j8g3UcVK^yW29zzR3Xc23>;CY97^!0Zv@H(i}w@2ckumdlEf
zFwDs?5Q2|Tv}eV9pNdDh>*Bqn<Hy`TqtUyTtF8!Y<HB-MC)BhW^x3*DN~!4s3slN5
z!wC15<#_P#u%*MW)74(PyZMxedX?_{cd$zGhVj2QVW9WZ6ZrN}-JPixk_MJPv91to
z>)!%wQo2Gs_e;R*e<0lN{|Dhpx~`?IJHd_SncBvm#fg&zVp2=m4|c=9qYswFvnPPU
zTiRO@O(v~p8<Jihh`GT6Cl1OMBFVx)faeJUzTy#S$?)F;<#YOf#>pl@oU_ya43l{2
zA4eJt5e9(QQ4kz_KSh5c9S!f~3j|`$O@=b{!v0|0-w*eBDgsrd_1pSnI{jrJs8i&q
zZMz&bDy@`SBIL>5)?qzj^%kesNXQep0sf!VP|vQ4bY{nx%j^{J#}epk{6iB~?$2xc
zO>++6PJ=k3WZQuSkI9xJR{0KJZn@}C;C#=DZ=NByW#r|nA4>dN`(+9akx2k+ny9U8
z4bvZnc{|jZ+QF4Q<R&BPx?WQP{Xgpzb-~!td#k#GSGQ_yH0)?AC*9pQmo0yZ!MhtQ
zmjv?RlvAa;=O4%`86yaR)svJ41qC575gwU^qDHH6DO;6vhiiE-i2}=>y-%Hk>v=p^
zly3;Iy+wtn&92-Mq$2}Wz#q#5mG5W<72H|mh5efAO5E0{lejDYcVwK@4^n`)H%T2y
z=D0&x2#p?%r$divsxOPk1jn%-HX~Yi@#854c2C3})^9<|X>a^W*#KP-O9$SDEExEo
zC$9WDVjT1Q1d?a0w(?OsFNxMIpEC!G1%`F@Gv*FjS?jewa}!=`Qr|h=q=`B$D^R$V
zQ-O{)kNDkNeib02-SH&)%YAFnbyQtD-ly=M9<0WSABOaoCiZ%UnXm0z>HuL>Y!g~a
zc2Lh(J|}>Xs^IaUOmp^EK4%+mQ7F}AX_j?)FC-|H4lht(#KQOd@j=g8+T=Zl?gdlj
zRstTBhSY=&e`oa|Ah;P%43wiPvJXW}A4g36p>Xf?wp#l0NJ|8!_x-Q7%y|EFr&oOQ
zH*&8b;Ha!G#N4%RIAHXI<J9lhZ1I-I$ZYki(oVLOg^-O%+&Esx)EZHg&q(S{nX}b8
zZVCFLjKA9L4@|yf3l-K2KRzMj;zEyQTNu5qE5|nVm5j^#%gFIV2bBySi5QzYUFbWw
zStkGj7$^;dfeaGv*lCj~lrBpz)-ir|^SATamkG2N8tefnBh{y0EplyLvGe`-lG#*R
zcCp?halXv3)J%%@g^xi=<d|-|aobvZ`)aKtvA`qus3~f@kK1j#FA5h<M4|$3jmK~U
zUw4nJD<n9~cJ;Frjh)-yFV(X5hzBRli0)2+YedFUD`%iW3r1OzXK|C*sX}OZHmKlv
zBkfw)*n8#-{KRSrFz6K!Nb$2&F#j#eXVyIc{F_~h!QYs-B(OB+?V?qpEXM9$N_{@E
zf4W^LZghojZ+92%?%sYUuU&DVFOJROOq+G58Uti=Hb1*_9~qiG)?JkaltR=CMP-i0
zG2?6};+<*Bk^tz6^TFkfN!#MO>>4gVlmbunsZhDePZNNr=h)fJsKk=F8iMnAjVS+&
z%m0W?GnKGG2X}!}yChbKdi@Iu%KCq>ZKRln%@)%~)FI*d;O*A40fY0^I~-yT%GCo;
zP=i@x!F&HX<UkVVnM$Y0RAJ+nV3<ns8T9u$=f>H&PXEQ5Rx9*0$o#DoZK~9357V{0
zX#PGfzcawoVP(Ema>V}#HzDD3A#A7?YkA7kB_WTA3)n2A1wtbstbb`kUDOw=J+Vip
z-Jys;c!NW!1Q~LzXOHEr`5AzI-As%Sz(+m>igMNo;Q4q9Y(+Bwxh<p!bI4{28H!#c
zc#qjTxE0e!YTm?~2&?o9ndWFc5I=xuqB2|cSJ4^8W_Bue{^bXmDxN8Slk3Zyf>M(^
zat@nb8PIwdjn`HW0qKYHw5_l33Yv_U%<eTVi*D+YiZ5F%e~^?tuC>ahoO}!V{6YxO
zh{-Qw0Imnm4=65db%UE=1PNAUsJMbD?zq~m5pzReWc+<S*pUO3dH@4IF_EQ4Wta7r
zJ3XU*!t~$i(7&;xe{H^mg9J4d7-rzjO$;R4H2*<a`8T2EpUrnSMtZ`OT6b5bDjcu>
zei`N`Gk5pT$=(m;6|zRoj-*{Zgh(Xc{P_ZN@2iiq(kdLUzPoY%fB9T<$pXd=xx%(1
z4NVLJmi1GXRR$I)u*uB5l!&OPq?D8tM8>=hRj6EuS$BJTJ4W4wn{YK?o;;(Uu8nd*
znr((QVE!)Q+sAMc(t4UBGxw$LJHekmH8gPx&@wPEutGk3_+U)Dmbmxt+^M7B($do7
zuPV;Ym57W5olq)f9kl<+wD?$#bO4xWKJLvFtNHr?H7jU)x^a1Z9o=5&wmPu>Yoew%
znLp#tv7mE&O0`u#4=XFHz9<>i1?R;<oC8Qk|8MoG>6pb2)tP?k;<x8Ek{k3U*Va_M
zfZ@cYD4Lo~iRHt9ua^rQ$IX=MwNAYP-8*-<H~U0sr#ydXg_;>99?suhZfKCR^SX{$
zY!MxM-nAuAT)+9VVh2BUQ}KgNm#Llbd3is%%~7#c{h;6xI`l3{q4c%;4XkVmSLkJ8
zqZ^wZRV00r1CEBp2sqBtS19}(<!;IS;v0Vb{@_~^;hjZ+0qc+p!x1^tty*@s5SCu!
zgJ<jwrpXQ|R<*dYnhw+P&(=2uNn#oQU>MkUp8y%h+~mwaJyL5CJu7tG6%rkG;y`#v
z`}AHFG*zJt4?3?hgC|EHTqS|g61)G6Z?ufe&=+MHF1x9;4bWsOa`WoDv1pQng<JAv
zEHD;|LBww99cbpJ>}80xoObj02I3<F5a!5j**Iw&dO@&={U$wFo-HH}`%XIQcFW{v
zy~=*Fqt5Ysfw;>`{t`{DJ(&v3H^B<d5D%dy(A}7Z9Qd}5J2#-C8bQJc2g-iei%k=5
zHC2Ui9q_8#)OZz?u1+J*tWP+<7>DYu!wGZRR|E=<$nQ_d{94$A*b}@?T?48&Bpjw1
zO%)&KFQ-X2lP4n!>IOxDusNe3Z^o{mq0q>-F-^Zio1LAY&BWGNarVuS>m+m1EU9K(
zEcdA7w;=_Vn{tV5hxjX<@CQ)}P+2y55q@pDqMI<p!1)FT7eJ$Iyc=RtyB98-{<H4Q
zhb#Rs3JCeO)@qvr2T4Z&TFE29n_qpQBS!-yzPiXr9WYstrZB5FJav+au#frCTF)_?
zCug_9>s6<QP5Nos4>sJoAp$BvScYf>2<Z%>%8;>;9Xt1#TPhpZL{*(_QEOlYenH&O
zT$*7|Yu*)-2I{MyRez!`TW1*$mkr651WK&77-4#zh-e3JA&eq$v|TW2w2yYAV`om_
z0<FfT$UodB28Wx?xb|ku=IJU1VF%itBg~q+76f&oP8M~e7<a3pmjho#Z6<>Ra6*%p
zm@Y7(AZCN|L7CM_E7E9%yQl{u>mVUjPA<kL-h?OZPi;25=nsLuY<=LG8KpNS)_kjr
zEKA(QxGRe-C^9tX>W7gvgAZ&^Lk6}}OTI3)#g`k%0g%Ag@9C*Gnu}OBo5NoD1{yd4
zV>D%YOtBfSov=Sx5@;com7R40&VWz%@7MvFu&+9VnGo0w%SQiC9kEg(3RRxXZ#B+g
zIVnL!q!6;P<7eBiuFbVA6{v|l$LT>m0u{tEkwxmO2^QYr)>YF44-%3qokeH8h{ZW7
z9?z5d*o$xh=)RGnx@oI6XCD^ie8AEFph}N^;}-Taoez@odt{<kFz;Z2sNG6sjK_9Q
zo^OFA8sR}6oU9veNh<ti<S}jbSxW@uMBOh5)Ro5!>^vQbp$2Z}-mzug5D$!(7C37H
zuk_`<45BtHv%PmaQ#q~TF!mF!CU+lacUXwuvw!-^JVVOer!kloTpSVc*-zPxLs0kq
z9rdJBm5+|~72#V|0jdbOqx20aX!Y#PtX!%N|IGduaM}cVo1nZ^VVGm`rUrG*<`0?B
zgB`I9f!8eKUkgg^AwEY$M|)5)Niy6fagJI$C*%gP_KWEm%&hVc>o)$}y85McAz>_-
z4+7b1GP7`qI-Qtrprdyb-4n_5pj;Ik2?Sl9oqK)QU|DX4ss#&^H(lKRB&G1o#rlc2
zDX;7IPM#u79i{Omj&BHOBfNd35ot+3sH(>#(e~tZz_F%bt7IemD$S2U-S>^J?%ls^
z2~*MYH$LHPDK)G~hJQa6@l9=iN^+t4EVTBmfs^_Pdn7mpQV7ViJ&_|PndZEfdc!b1
zVngybYt#cVr7SOy_D6A-l2``u{hcdY$3h8=C`A4bTb^}d^>v#c-Mfqo5Fp+zaYT+m
z(tX$c(}h^2t^^-&tQi<$w_kf!(Ls9M&Z$*_Cr6H8Jyk+@gTJ%v2+-@_Ghtk|?oTj6
zTYIe`P!fqN^R;^iRC<S*?{7}6=qqPICQ7iK)H;J7HDT*C#Jiv8MfD}ym|JK|$&~G)
z3Ivb7%TWV1VW4IL)Hi2a#SILftWW4YHDMvY-*wROotprtOnv2Qp%X%^GAO;<To+U$
zE5TM%s)6g|dFg%Sx9Kwt+tE(~j4eE`8aF*h-pw*th&7rpp~L-#ALAZ$04rX;^iQpF
z$jGHT$MV$T?ouLLPKvKZFHWLd>NQ^6l0Oerp=z%nH4<IRco<psH5gcBW92zbGR>0n
zk$WIN7fg!E9miX;_%`65@Y@8Kn+PUt5u)jg^%oM5RUxgI7iOrTc&x@Fx4nU>m&kqx
z=ly(M$-vR&>f~BWjL%sOkH3$WX`!<+Fo?q}OW4wM{8zB(puoKR8QlrJp9T$`q?bul
zJ`V_;EzX(BbF!KKM3oN1{9J}uq=r8Y*o;-eXlNP8YYO`cR86MPZx#7(Wv9f{>$@gr
zLnpR8JI-08B&Sce73j_1u2UQ~EaryP2uga}$`l||IAmB{bsMG5<kbEBO8niV3g=RZ
zTS6bb#4sPg;Z{unLD5pbtBTbO3u+o!_50y@<cH!2HY0u8nL+Jd+#bNgn@_hoCfR~=
zAhz<$-Q0*)9A;y-K4-kfUXb5pl-Dc|kPsn#cwDp^qYrFH_t^hM(s1YMt?8JKd{SE?
zoFK>g{qBTU#C`8aPWO^d)4QG$;O=o(MccFr+vvJ3XI3U>nq29)a!2Z0XDZcY%1^Yr
zT3E4mqma=>{lH6pc<)Zopj`*rymzW9|G6ugo<y?pucwY|Ft$e|a~hJs>eCxwu*dt5
zS{N)Z&eQr@JRiDIuLA$H+(uV+_7gpcckZ8Xyxsl+HrPICyI`tmySIQMZ*`TUVbh_2
z*cOQQ+1Z{oOX-cY4)R=kcY~1_iRTpFx^HKyhnU5*eOv^jY_FB3+L*Lp%I{nu5l|#j
z)Kty?b>z&k=qX;&l>tuRDe#^#<BaaV8wSK$Xj2Pf$y5B4BfGHL9MNc^R37yQkj-tu
zT2XF1PnK3Bm>LskI5oPVC?2lvRwtWEC!BQs4h6Apqw9+Y<Wy9i9=h>^^-UoGhUaw{
z$v!-WbJMXQDTO_1Z)~m9Y@iGOt-U>I_X?UA!v29i|1(zouQkH|4@akCA5kyXT12{g
z$Qk!8gvEDMir0Va_#0?`s1dwBJaFm9$t4thnM+L{1j{-6H9U*E9Ow3X<dJ?90$!<{
z^$e_I<tSZ7@ZOtOGH~qwskKOdq@{c)N%GF`i{rKJu0Ze!Tc3(d!yha*jG^>?xfFAn
zlW`O*EGy+MV)OFD^Z$y{S?@1%*rg7B@J(g)kFn)sr%4$NE*~|-@RO0eBdRJ&pF8sX
z#6_%S@?)B@qxt*K;x!d$x%3sc#Op8xVl7TG{3ImxFNvKwt)Fe_YunQVx!IfVH8`%i
zwYkx6H7bnMedJ<%oy57G2-M%*TQ&Oa0{n52Cw+?=Yg-Weyy2)wpZLUopZ~+fxLIU-
z{AOrgDNeJ;?5D?@&FcX)7IA=vN*=oY2W_PPGULp1GIZ7QmBpYb;rS8ruG}BJK5K+H
zjr7uHcnDV(LpJqeeqSYN;N-aZG~_WTMla^%#?rF6{-HtvpjOt?q?JC}1!7dvnx7`q
z(Mz{jvT@G2+4$J28tS<fgscy763DAm9)vr`uZp}l*dd4t`^b4pNt8B{`@A^7a(=vb
z=;Mgb99CwJZ|&~e6mqJ!rxRRudZr`CxYFXQ61<3`WgIzWJE_abuVj4h<;>D#azloE
zbPoS$<6PkeZL_-HFF!gMZY*axMHE>LnMOqUk>U@IAHj2$54{+4eqG^bcy64N>1*JT
zA)WUD-G#!}uupn7r9vLeswaJnw?W;>0~Gk$t)FNm37Y8nJTKDzF`Ppb2l&MqwRBrU
z>-C=nsqSsEXEqMl2kDYp?P%E_a8K|ajvvsg_HKrNO9Z^R8|1R4kZf}zhvKY*({<xb
zvgQfi9r>0s8MfXZs@c;M=d4-Rf7gRgkL$fA6i8eYj%<*D%wXaS{Xn-YqYNwR19$$~
z(#x_|{<9m0KaW_R^ye#?R42j*GN);p*}zHlu4+V2F808+?(%K<I|=i661TK2Ip{Ph
zRYW~ImpUREh&XPZx*tIxQ1%nb3&p*j3R`zpFmY6GE=ALVTMq2DUfj#GiE;0Rl;&^L
z+rxVs{hdnf)_%<b(;JT|9AxgXv&}t9g3Ty6Y~Isr>NB^TS~n+bI+@BdDFa4^g4=Pn
z>x!l}eNI6<y(<NueN@-Olgsr=v|fIDx?rf~KuE+KhMxnTdZvbm_PIrxc5kiNVP}J-
z0R9*k>mLMN^K3&_nIwR&NvlXz`K!A(Em?vmq0^u1cK4+(I-DIQGp3twWnqig*=m2;
z11K~1J_~i5@bClYpV6}5fHevMxn)S1^4Ct%7B@#|NN2}sZjvL`!Wj{Oy`*4pOH=VH
zrqy^rBfbJnS1uKK9eTEd5q5`?m9L3IrV<)Oh--~0^0ENP@Fw7H^0=LrUV8@B=myGf
ztZt4?`Lker_90-kmPv$VC^E?{!@lw_uQyH7>v1*jA+|ND0UWIb`848O!mdRe{W5Uq
zxi5SvxbY@0)8)o~UI0GOJ%Ex)T3OKKg7lNi+g3;r>LZ>0c1iePDCC@?b#ckcL0U*d
zkQ(8}ZF7H>DAOU~eX1Q}b~=hlzZ=yTKPv@DRiw;{gG@*jKI>)ho^)T-CsS_~(?53~
zD`ZJRhUC2%FY^$#$&9Iy`go5{!k_?5+i<clZZS~*ra7E(JL8P)saO9+=wV1#H{#9w
zr$-$N$k-2!g;}SyG9y|Sb4$z;&j-%R>J|C)yekwKjV}pW@jra4LTl}RJ`FIfj47rT
zc)~h0=+)*({6ujsYN~#<7$%9gwDf9{SLHC!?{_Kr7@SXq#O|imt-m|*eTKyNlgc-c
z(HyEGKIE)bP&Gq_YZ)%!zmMpfj>n#&1eVylG;i85-f3IwA`NkSV=CuUA>j+XS&`4#
zCouj!CunTva0sDzn~$Lv;k)jRon0Ha;<LL!OWWd7P`6V9`_5J`jp_}XF5U@j<qEA*
zD3B;uKF)9Fn2KY?XOvg>&Nx58-Zhp(MjwrQLd#)9J>Mx{n`HLY#RB(^>bqSoG6n9(
zV=<IgYtxPU1E$)ZMR-*O?{KkvOoF1FnYSlkM^tftZ^rxeuS5Lj1XEk%HtMBe-%lZl
z)UoGhK3PPquz!NCJy4r|z%+I~L1A`!?=O<rbi?O}o<t1Pb$K;X0#G2Qi=iU!@xmzr
zS~1#6rb+Ycz03xlzVieNZ%r6!QZ&4~N%}c~J&VU{y*z9phquMR8t=HLb*4?x6@~c_
zfzU&H;V+sz2nA5L<yPNoRv$r^&a&^5R5@RKc=OtEn82D2R|-BziTRRAOyNXh!`<2S
z#J>y0u}@dF+$sWA{eBy4YLjdBk&*j|`5L)u2+s!KtE%p4wB8O|aMm4JVktJT>#AK`
zNlD4)pZ7aBd?Ka-=5}MVOZC-Ef1JEl^#l&N<)wh3na5z(a<SsM^enxvJ9MK{7sDaz
z1;VQ-?(oq~4}Pw6f!#JL_L6MVJAm~G^=Ar&ROc=yQ)63Q?gQIrtG}Kxi@U%6+Q<T`
ztMw;bCDCCcDmvk(8^xcBhP5LiL@ZpQDdI1fW*v$t_vVsUy&t9tefhr2U0BjY*W|(c
z2RH83^>&$%3hmLJ?{z7g94H}w1@n|%%!!Gbkec@!$kJXFfsWfDYR`wb#YR>^>ytCW
z=N%tZax!<3>;=$It>ucBllg;Il*3CVp&kn@k8lUFz3E4Hk^=J48}t+!ds-Fj0rDP9
zJ4I;$oa++JC0BR6(V%5tI0EuH6X9j*i0rX8x4ec`-ebIPGF_<5j2o)^X(zwaIs)1e
zyTtQg42Aq;=vWKw+kT9znv3!j_GN4U#T+vX?->-utlXO_O4^CmIpPkzXSuJt@_`of
zX6=F0#&m_Uqd?ake&#V#R#sPzTUkR9aapwslTI%g?C{v7L;0n+X<ysw(wFmmH&e_a
zhwE7ep3=vv29t+QyWeRm10`|c!$yudT)uayf1kA9Z7XRQ+8hvgt=LTbOD7hh4aIRs
zMA;`lE8uI_5nYa{;}@%Xi5s<4)}~4mas&tq$a0rhFjAelI_E@pm@|fZ<rl+*c~;J4
zw(U}ybm&>N%o8W1(Ye*HChs}ktZ0=5>ZprH<m5eUF%}oDgIQ9%GBPrTWjzhB{-83g
zS>u}&pZ6#CCyZO$wmG-N+|{&K?f1yL^IP(g^t_Ir2h*wqp){xJqJcG)9^{lp#diSb
zj$#h@%3$9|&GHO0CzS7RBr6|+=*TdPrDP~alRkunb-$szIV|)!on2{4W=x^a+WdGz
zZ7#kjr<Y1~g9->*wh`8$U}VSdvrCKhB7F2$*?N}QTcwHQWX!U&c_Y4CuRaHvU@Za+
z42<1(L8$0k8E7MaAibxc*&Pxa{azBYdXAKQ@Dd1PaY=ktsU6L-PQow+lDQa-^$p+U
z%hRZF+V~6Gx(&<elk^dFyG}BFBgR4v;WU}poOh5Ped(dH?q)HLQq`c$B3Kn>Gu6qS
z__iX4J%pwozoBZGw{AXcJO3aqcnAzN96fL!SPG3Jy+Ot0wLQIZOd8W+8qDO0tu&%^
z6WHg&>gSYgW|Y|uW{MTm3_QFc8K(Ao;2KeL%ZYg|*4`IYHgJ2<8J#SWHOS1&+}scV
zzm7hQ={<xLCVSn6kVqc?t-XGwX*Fd%*eMVTB}bgLu>11p?}hSprr{Z-+=Jgjsj#`?
zvW@}ZhyPIXzI<d`i3Q!-Eaz^s(B2%5tmP2i$|Ac>v>-kAE<>y0)N~1YBjwxNq+c@s
zT>K0Ev+ksDUgULTgw}RbmT<1ubwdqDY>3kWq_$gB(Itu3d0j%?eu|@1{t`vLRsXsj
zYRszrOZ;hDTk&PO{@~zNVg6^=cY5LVBGVf0>PrsC2Vk+WxAjFkiZn7`JVIdLxw#Mj
zw21WyR}TY-wTxa7HQ%pazg9R#s0N3I@|vRkcXuefXlZGSQd93+4*2h*xVQ)Y4|hXu
z7}ljESF9&>^AZ<$F8IqGg4&pF%*np`v;OvPORWDtn$z^1yQ~NE3xfF3Y^ll9Q=GV^
zV4=qTtXAIxVsw3^aA|*{w9|4_RJV1hzgoVYD5iNw>f{>&YBFt_e9u9O<O|<lPn~5B
zb;-lIxEwr*goF3zA4c%$`}tu1>8qh6&Uvq=+Q|btvkW&Q_`Vil^6eN<iFrZE2Sdby
zv_08bi^!r4j7zcxmK6z0nCi}{y-K2VmB{a$gADk*yb`~^fYH)euVbhA#{@U&eevu1
zM4>oi#6qC3R~@3YYSnR1DfbGAu>$d<G$gaV>e3Xqkk~7#labIIm0t6MO*7bof~5+I
zcl0r(N2jGNF?Ja@NxYx_F<MnD$0>Ha8twBHe$CFh+XiyHsk5u&vs`taOU2?<>OZgq
zQ6(~z{y)^>wEh2uK8*e=eOTq&)tSiF#cbhH4t%E^Cr8m~y_V6KuSneJHx#E{%al9n
zVGD7YjqQu%?|<sX8t`v54!~zWc@)(WsT@p-XftAX954{ZnY`}-=sEn}^VO~7{)M75
zeg&vfd*)ul8T{Xwz(DiAF@c?$txSZYxmW4jXgToF@uUvltf5}%YnGxmoqK*SRYPuJ
zIZIeTGsT*vxjQZn=l0<u*#gD%p<cSnxu5tYSr_wcO#vWAQ=CP;Bp?Dcu@K~yoxOej
z%=wY{<ksnj{t5b9t8EakA^Hok)x3{!1y0%pxgR1_WdiDzWjZd;T4x)!frD>>wX6Hw
z(A^XKsGgRV^!gMf6W-x$kZj2`{~eWsL1~HTlV)45E#hj#-qBG?#{!yu`g`a1?s@0#
zu6`WNYG%OK!9h(ryWZ=ojt-1#!#eUvUYG44torz7h9t1z4aAcA0A#<kLkR3}b&yWh
z-6~eoKm7m+AN=tmPt#8dabm5yzN@Ip^8}HTA7ekfSHyH+J*3MhR?ysA?1n=}jdipt
z3Rt~WF0c<)fm>9lvKANh>1*e?&j6t3<PP>a#;E6z6Rqd&^S`~ogkk#0y{Vjt>Ju*W
zSsmYeOI{53J!gRZ_lstARowY&rYw)g1zDsiwX(f&m(QeTimD<ACKXHnlM+z5H7`%1
zyv#HyO+AdM&SB36%VKhR(_s~mjGv%KEXqH|8)wB{^_9Ad$k7Q3`b-~8_(3v?V}`10
zFv8BgLOkw1(nUOy#ord2q2s00iFkZT(q+?0*JL^4?+Ujf+N>4_YSEXb4Q|>4T{{(y
z*WN&*;QB3j-}7@Bk!yIyo2r`tF8I89s4ad{3TpH_?)y<k@*6;AgAOIly>U=MNixiO
ziXnYpSuV*jMgKwHYv{{&&nV^UQf=fFjV!-skLU5TLbvs+c%})4z_z=Pe@E^?FKaH#
zm*tw*gKsedV2YMS5^b^xr)YN7(6sc)QNdwL6SID>;>XUDHN5@}Za&XS1r$$iMzzB8
z6r;+S*OW)<7kBPQno=c|rWt-!^nw!cg~s{rM-cKDbfdKwzF+AGe^5VrC{rscsQ@9P
z;QWib`k1%yjezZ{^j3^;qD*R=_!SbhZ!e0Aq$W3_n5!Zv04(qGpn$jG*OPBG6iA#i
zl+@Yi3=%ijM!G>XX&|w_x8JC-DQE|=jz7w<kT|UmoNQ6;vS;bd8n}e!K7Cxj-fSw^
zYgRVAreSwiW4p4)b4l^s3Q^{I0X*nEJ^(jb#1mSYHG^`hdW0V4^kQs_MDUT*?#()?
z&Xld<v#K-%Dj@FX6-E=@`OO}P<6XtpgoU`aXz`V{id+L*DG)MQn%qyl24`H7Zw8>{
zk~ujwT6z(rL7&;!w+Wr1%#!e$P5V;KTh2O9M)Kxy@(<{9OH{h?$;pC7|Jslq>Z9q%
zxA3T>6VGN;<c))#IqCF6BS$IXbR3}lv`Pkabw!@od8}lsH?_YW>vogFY}TRB2Fr9p
z%dl#@X4LSPCPRyUEif&GAm}*wI-^KwoGY;!m$wy9UWj0FgJ-GOA!om@ZEZ*nv2sEA
z-i#hDJ}<&o{rAWinhkTRWY!UIj{phaZ|p;TH+3@m-pMtt1RtOc`Q>RHrtCbBDwO!^
z0-O=8lGeARe+Fw|p<WO7?KtSj)U378CzGQ0g#kOmk@(?sbMlG_W!mHE>m;ck9<^7l
zb@o+$UgMINXR3a2l&75K!aR6bg1$ejN>fJ$p9rf*tx(1MB(aix_l!SsBq_*{rqvp;
z^XX~SE(@YE{VV*=VI9l4U8Hmd;B|r*^LY4vi2vvfZo64Uy~A(<+?3Biyj~GO7g(%*
zO1+5-(`|ZFsI67;4gP!jeiS^l{bWAXyQ#7eS?ajhDqOSkB*6MeZw^EMoWrh>&M;{c
zR^j{1wPH4)v6UkyD(Q|f=dRtG&lHoF9sU8;wmDRnOhrNi)Q-4Y6MDJFiyKcpvLh@S
zp1+rW4<y)VRGENJwz)iVT$Qv%WGn>?v?)Udh6@_~Pks|${G$uRPdWo}ggq~m3XSI8
z$W(`1F-#P#jo+X$Ev|gEj@%x#H|l~L{uB=m;U5i44~$=YN9Vp>==IHEKtIOTWl#a^
z@6idebd;3c@#J;!nX_IyL$ccSe!x!#%!NhE%d6UcNUU*|#{{j20d7&Yd=$wUBbYG*
z_BNGHuq;LFNYD^-w)2(gXIFoa4*o+%2Q45^l71E1MvAj@;d`_p{mtU6%Kam4u^Z?$
z|LKnB{I-3ieV@HB>~S#1=IIyN6M82nvIAY^JRSVqn!y_$pf=xnm{%I0vNYS*HZ@yC
z!_vtKX1=r3f1fDUjjVWuPO6y%$*r^Lyl+taHnNTgiUpaP>L@@B6mqs_%i@}mYK(L;
zCsBkXAEg3blOT;cx3k-m#%AU;+f`BKePcByx&dR()mF8AZc~R$)<Hu{8f*ueHLH62
zU9d^_DeiZtf2Y~ZCEuDb7vSl1#CCMc)SjFtO>P1WwzB#7s7)G{6X}C85CX0B8&y55
z>q#v#&|fW=#T_pn{P6<3Gpgp@7{uX^Chlj+_`E!gdCd(<yB8qM5uVVq_|tu@o%mK0
za*Vad>Pa!p4>5<VA`G|x7zDnk`|y~f^LvS_KZU*eh=Nz7jCMa#Psp=oL1Hv%-|Fn@
z2yKnJs}CeF$gdHf1Ox9BgAt^9)(2f1M$wNrbf{V>h}X!vuZVJ=yX`P&0FvLg06L3n
znjCw@hy?By)#Y?o6c^-Z%Tk99h#@BH)U_fn>}V&Q!b32>6Rd{?e7$BU^}kKl+1QU>
z5GqEL1Qxo2J`jcLgn>R?JA?&rw#DafC&K5?i5*bjo0sqYSOAE%bKFnSw>>rzZ-vS+
zJhyQ6&^6u|%9HC&l1M3|J34P_8!ge~6%he32wA!RIo2J=)|)NWEGg;eGx~P)uY*Rj
z2J4<;4mMoNp_I@r6k#^Sq8}vgwxV{{mTl>0yPgiIhrF})!JoTl5oTjLulBrr_kOAh
zTxcAa-Af<aNtC8xG*zn>9@kSJisGn52p=B*qF-{pp8AH^if%y1N~@opelwA0pr@rV
z*R~Sfp%7n(O|J(mwl&}S@-g(EhJ}Y;h`{(iC)oUd5zk}J6JI?hSIID$xs4_F^dM!q
z{mA9gZ6AHxeOcL?T#&0WThFFLy`zp<Xw2*<T~WY`*J1+OLz7uwH(u;eqJEgQ_TM53
z*qeUmp5Nkf$9A!d>OpojS0`Ix6I=Uzi_#TWiPYl{0ZG~88<*dg-d^yxTKl7tV>FDO
zmmFxV<i~LD<K^p2(uV_U2Izteg|gXONM1t!xFiejuHw%d-h*8U+LW0ST50?hj?^R%
z+5f;<*t@(iV7u_!^7@S!N?nwDAw7vw`Jda2x}BZ8zpg=$HJlpIs~?fBaHJx6NLKWX
z#7$eVUI5#L5V1YVqY=0E=5=_fU^!@{#SRzMX;pmQ9dIFF&Gi<qgiozEiq7rA*x4&4
z9W>V+MhEHJDDA{7O$A1LgI=5c>iO&*%B~zOo~oU+k*t$EWn!I?DW>pisFxN8^*X-7
z{NgNT;O9O9^GAgKoQ7Mk4Y}jB?&NDIwJq1K{v&Mawgz3~X$#xk#Y%}aic4+NFpuMn
z>Gc}Xw%e_;=2KClE2(z!HIw5*d_=fuX*xVbis-|gOdl@ldE7sYB5$o$I%m;zE3Xbf
z(xJum=gBThMAI!s*S{yn-6|Q#)-QAeBQOD{?J%L$do(m`g1F|X5LibNdTu@K%uEL?
zW9q|R>h%9OSB{nYrn{Wo2xvG~J+HNnIV5gZs4+nS)2()Nyiynl<Nfj?+GgV21$bjZ
z4N0ArTOn%PYFj@w!_{Kk#g-CbF_jnITD-YjB?vo{$^p-7Eo}q9b<(d->~J|G<~_f|
zHl%db@L8<KU2XR(AUhmKk7p%v4&7^0K1k@Hu^glg=BrsTbjmHs9J>pH*4)_DnhM_E
zL)K=4&|m7btywX_JbFI!QgwG4BzUcEX}9t$SMXMK3)VJ%>lD${8hlZwCN%}XC{l9T
z$)+gAKL|7Ypo);kK8O08tk4bqP`L=aqqL=?|E{V~-?=j8fkfMAwV)d!u;_6w^O_r=
z*4x`ySvX|iLRg=aBT#UMydRSfzH*W?Q2<~?j$R0HdKACp?EiJ1dUg-7B?K8u3}O@`
z9oFsjw9IvUZPQ=M0!}PWnEhpFh#tsKm667j-eKJ|*ybE-aI{_ClPWkj4Ja#;A(YO3
z`3g<ONd%5qU}|zk!_7<XnXf_p1b>~r*2PWo<TQ~2sxj@);~QI4r#dv*C8)GN(w-mq
zB73T%w(@%4hli#v)e0Wk7v>L4SiTTg8w+0)-<~d`qn07NcG9=m$)NBrehPf_ONH;?
zsBM(iv;NKvl&_%$%3xdCc($*WXiMgjJ4}s?sG9P4wnyy{7*urVOABh|q`>*!?nF~6
z>F`iIz2aMjHI}ma&b9DG<9u&Rr~D?a#H9If^CubWvfWR`^9*{ObPA%wVT@7*_qHrV
zI%#X~(e!OvZX+Gl8wc1Ducqm>{&cZl=2?EOrL#6^I~xyMwQR(a3`Aa%a`5ZS_Y5LR
z`P1-{=d_vqG8X_g-!^HMXJB3dkT#NGUypBlE!@pVHy)6)$+V6bTJ5xr)lUsMVk!Aq
z6`;55h-GuQPDb$Seyd*aZceS&9RXP~0y8&Z^>(xq@EU8^Jh)ca#G;`cOJtL#Yhvo~
zjOFCep(`X$MVs?4&P;-GqJ~crZq!~XCB98k9$q!i9PUhry*eE{M*F6M=qBe1vIR!^
zeW$-40Tu<%;L<9nUvVM=vitC#nTmxzXGAOv28%fIp}0jr*a7dS4_sd$ye9`Ya@sC6
zew!_6qY<5YN(H$M!nZSi`0nP@UhFDGfdge@@R|8O9it|QoayD6??#Utx@^L*FDbXA
zREDJ@W-2edBydLiJkua4reEV<Y-@n_uJo2*X>HaEU83=?0{(>R+7*&m74QEPaCt=A
z_9=txuf;xqZ916aO5|G=?A+cM0tNJ66PL9TtnCY_c8(S7&=OpiBUuM}^9}o1JTUY6
z8OEnoZ3Gr*{tAW#?M^;1f^?rNbGw$a+k2~3uWX#ASL8sb-!NWZaGd`kPWSWx(@cTd
z3p3&IZ3L&Q_^zX97=>J06hicY=Zr-MTGy#fB$@MqsD6!6snX&dlO9drA1H!s*5*3I
zL8VcIXr79LWFc|FQQEP*p+9L7J6BqtYGTS>{ez-Avq;;a%^lwH-#Vq+&QeQxWj?Nb
zN57~7P@bV9l9{0F+4sHKLQ2x%*Y8qGjg^&vw>A*#0ZqL9wlFA8gC}T)l58n#=wj=X
zc0P9Cf%xLBcq---OGMo@1B?Dx)1p@&7GwUJ@caS9x-il1EvU8XOdLfQlab8Hm_NXb
zyqBw$GB>2{PXK;hm-Lv&NlhMiq)Y}rZcdL5RFD^l`Svfe*s%$Z`vYu6RI|r7Gp>yz
zUDx}b%s-oCao5C-9R4<Ftmuh^k;V&p-~(cAqq5GTCug;c>vYrs0lS2|kt50evbuC=
zXA{Fm!JyJ&Jko3QY|}ao84Sq@nvpNL=)hFQ?7Or-Z`l_avU6jao-QVjL2PG@_W@1i
za1}XgJhKyfeB^L|76o+1phvEeDQ4`f(@VnCdYt>T+6vc$s!ryEOIQSc@Okova-`j|
z&(s%!Zh?BLw4_^CQKdIlwwn+=O!f6+RbJ1Ts7M-;e0`on<v%W9$v@K&YOyuxL3N2N
zLA8aistMeLZaRL`CfT6tQbln>O^9FoP8{Z=^UEK{+<K{s%C$5-c$W0vX>8{Fo)I_A
z&k)yFKWPqUdExxB(QOr7&>TDSJzia10S)80NpANCB8dX=d{e5b*aH;<oBqQ57^%SZ
zlSLq=a%V=AOv!;bEfbZ#3d@_I_HbUOV>wd?fG;%7w=OGBEJs*x-P0$b;bOSrx0g6<
zU(s;vbdlEg`Ow{<3@5L)8HsnF{(6e7xmN1ib^_nt@FXFLjky0`#MjBFb=O^IzN05l
z3m538MAavgK3)|=C4)0wuKtPd!cySJBV-cyPi)gCIb=T4f9c<4Ai^r0&JI$7g)^)A
zDSv%7>nRM)wdS=+00kA{>w7Dc^p1Sk^!8ox3+%jb;{ius&qMD3J<BHW^1?;ZJBdG;
zr|(Yt{*}cQTfE$>Fsic9Js&3qnjw(GOrQdQrP=?6#$rRA!@0@fEtDP??>w2a7t#L7
zEQkQ2YKpa++}HnjUz@yl-=6s_&(6-)BQqni#N0Z!PG701shxRTmzE|XoCwt{DL>G~
z#f4))tGvBh*#oLjfBys=y^3Yi{Q2UGT(T^Q^{n3wPSG;O9a2oTyBPZN-f8a-vs<9Y
zfh2FI#6kVv#<_*<(-e%Qr{jwk5QO7V=YZYAr&6Np`sV@T@PkGcZA|iL?OC}0o*T}T
z`cJy2vqWGvNS!-Q`_ZFEe?P$sm&X@=(f@SoPS!i+5Z$_AVPVIzcK>wi&d<;HrU>GS
zb}n6YwY9Z(-;21d{P2>Yv6Ge(vX07DRsasV2H<;|GMtc>yMnpmCS78zUbtH%F#10{
z<G=Dn|J4!w@7bjPi=*3J4W+qjVk0=BG99j3B<4lLWAYJ)ZbY2V!$tTsbwq~SU$wj+
za&K{GqqspYgo4vjjPXfFtKf9vU@*mA?G(Gw<dWu62JS=w>S^*`{nF92LK75F;vjLf
zsr0&5a@1P?>z>Eh;Yl$MZDwYk;^&Kry|`JQ9aV>^OXPsjD7RboW8b}CeN0nB+w_s>
zwTDz!u5WkUDWBV4ENZWC46|!I(!!@7uPv+u%tJp(t@nZ&Ae{}DS#BP@#=QMWoOg%<
z^P5(ttDjBA%aSmGZ_GWKz*cKxqHK8MJ_7kV9<6}wkG1&A<qW0QDn2l>PFMtZxH6q2
zfJmnQm{3Tz*;p-m<4%fo5JKNy{KGVPaOd7)AxJH=$keS~tF6p*4O3!@apJUBNob7a
z5%rPH%AnPK`2M=H-=93ESl9fc&5suW!mC<l9y#@|HJ6(@hKOkli=52W%qKciIawvP
ztOu=n+qXmz-?Ftwj}XwY(*V8M)izu4cQ<JM^yPHW|8N941#Lwrvd~hf3vLeIPV!%B
zqA}r@<$>@ZCF@wnzAjQF4ce;rB$Il7x$|Z^%0+>Rvn@aEw6}D%kRI488pmTTY7327
zpVTc*;b=9`NrC_LAbdFjqo5xHr~Q<=G)-b>gvviuuL9@elYj+QWXqa6E;Gj4wSkYf
z!hYr@@uD%?ze&ZNk6E*-R`n6Pa(XhVZzU=Wm@iM<0?sC`Zyhgw5->PVde+qr-*e{#
zaQ*5c%OUySc>B&)s~x7Uq1M_!dFRPC22l-&susPvRXvZ>BlgzWx{SkGr()MMbC2Jf
zN43sdH)(f7O!(DkAh)tE@#gA=m5^rY2g6mRHr(p5u5=Zpgh9!Yki7RznDqPq!N}b2
zC&%xWYI**in9b`Vn(PjrUbfuwwQRBa@fW60p9R>osd%LDX<R+4oOJ{<o=cVTP4&Qu
zLio)XV@I;vb{OPAZZhPiwQZ(EF0$=hURE<%=XzfjR<jz$z@pu19RWkTaXOzx&Trr=
z_=kO?gWASQE05fZ6;iKLL%G_f3X^o{TkcKg4V3d<A&Kvq_a9R)oIWu-8?bN14{R0J
z+ZY~MF$U`TG>lW;Psm-`zrqX%{4!G~WLcj2>@^;q*j(}baof-eIn$J+tkmMq=XiUp
z9xEWTjLd{Z+4Z5-O@VSclfKd-H`e39cottP(8B4<FU-Wj5T|+b9BD1J#VoF`JAQBE
z9r$ZXj26*aWK9FOtp5cSFGjsu@KyY!nJ&7nn&n?uFzbC1Qt6h~A(I2gpwHT~#@lEk
zmk$*+K68}^ZAxwRYwKqTISg&iK(IS^L{6@+>aTO=fHK5`6SLa|i6r)!MNDJq{bmz-
zw#%WFBYh}R-wfqf{k!~zS<l3ctBw&Qi^Y=UwR$^rZR?oegag~Sd1&G&kKgf)^y6Jq
zp0i)q04@GulP~D=!Xo2?oE3^1w-d%*G|MhR(u;D;1?j!%v`>{u4?cBeVUS5*i`UkE
z9=G949Mk;I{M|Pb`YIN~^j;Ur9k#V$1~|+C2T{=}^!H)NE<7k@c^TuULUfm)Gs>@#
zd};m9Vh(XLd&!&JUrZi;Y2ZgxV|HMxL7T984gt{cd>+ASL?w%eAwn>JH4)xJZ*n0N
zS$*`K9Ki+r{SolJ<#vYTTHu&hAaM_BlvtC0B_Yt=3m=ur#e6RG>RD$JS+Q<(XbrD}
ztIV*JD_lznY#6-^9R7d}cvQfy6&$$|@3ERg5c-yP*HFy*KFJ(;c5f<jt!RfqM>z=z
zQVhN>Jf8Y&ALtlpO>6n&`t-9|k?kL1-AIC~_11=%p`3U{BD)+V-mmJ_H$lppLj9y!
zp~TI7ZpKR;iPi*EA&Xya!=9zGAS)YK`|{If7iEx2AbaA#=D?<rzYw>RQt|hx)xXi<
z@()&Vw>7mh(Z3%23p!kWBuIOTdn4tw2WXveD-Co4D|UQzAh_*YgX^BXlAom3HTgzC
zGS_DHzd?(R=(P6<iw&L*)8#Z&a@uV<@$HqHU&bj39Nmr7w*<-}<b{O7K2GKDG)uHq
z``V5-`>rP^`6tzHBhs^f0@C66;HaFoE`oC9_4WDcWkD%HQwTtU9Vm!^9%huMV`-DP
z43+T`oZ7fiRrYiMFkRR-zI2Y|t+_cd^JZy%eU_D!UclJ-6#b@V!s32NOG}HRsTeJB
zi-zQw7|lPr(uBPy1662C%S@}FaUyXuGrsYqBQm6#Aq|~DH5`h6HCes>;q5OMGLVn&
zX*X|RHe_#qAMIQB(Ru;`ZYtLQ(Bse+Sm>f#$F<e|sMlrp!!6ayy(C0_S0e=jQRiZE
z-IfgezuG&`s3!7vkK?Yw9}rNHqBKzvX+}XnK|z`n>5znu6a|72qzHx*1pzf6f&x+k
zD<u%BD7_@C2qGmwAVeU9-bp|@3FQv%o_lux<(_-bxi5V;bLQ2|JTvqCeV*_09q!io
zIY?D{P;7sRlj;Zu<Co$p6z)iB^IqgJR2Zx-lk9m`wut;&UZk=JhUB@C2md{LQFF?=
z5H%G-(Wwa*_)zlDM?ijrW1mdkFu3&6!y~o=;i5~izbCu=kHg@Cqqi=<cV!V+`ZWL;
z8v?#b=Ca8o*H)Nf_3}y+I;Hy^7RH9pj^GuNQIKN&BctRolkm#P%5Pi(eADkuW+s>Q
zwd=3<5)QB&`+?CYUd@kaTlb2ojvKGng;g+lzDkb{xtdYC4DwZjlRt6f;1L|j@Odjm
zPTSnB_jgX3tt{KOWRmC0BOBM??d8@4n61{wv^@lrqbEMD{LMPvL2~4P1_1rom)3i!
z16TQu9UBAGMDKH6NnkdL@A&bF{XJX=lY#?jYik1?!1GtGeB0-OOH1C9ZAqZrkkR7I
z%uImcWVI(N)B@8`{v!<sCJAUJl}>d34jqjgQY;*$6(ADl3rzafjI3sC?Hh50p&M;%
z=07H<{|^Wd5LbWC=lT0?Ls|5=`b=Y!<6Q3bpt6k(ynm~M9(t<&UE<ZkuWtv6$JK-L
zsyU$^^Sekt6(I0J451yXxRsG!umb#f*}QIx<#8=g7R=HNb3JVhkB<dHV>W>FYWVf^
zSvnFhw}cg6V-L6^6ED8uE)|47sFOHCgmsrk+q8D6-$KkJTUWRe+A$fq6=o%!D|ypF
zD}4N}W`TiK%abqr(zwwiS6Uk)K=aI<)1Lv$auiSu7eOeUfTT;;z}XP^q~ltgTF8bF
zAI#oZpi_hl$|&w-Aj814h%bDGfd?M59RuXZNP4jRc^4Zk<n~(adX(F%2Mf;#B5GJp
zNP%$aCHXXP%SD!@k^Si~N-R8SiaStCo&Rvphog#<)3O{q8&Dh~1}s0L<9|}YxXB>8
z$3jC}kR`BdpHWUtUMaR03YdS|Xxh7&J8^MA<-LD6?g1;0r?(SWHsxxluG^rJh8&{=
z?2eZ>328h5y@tx$@=Yke+ddB)4$2tl1<8$1sKK{p{Oj@SpzFQ)NwA|1z=+-i=D;}{
zHmEW2+eW~xE$R8^(y`~=RHWvXIvIC6&WWr`Vb@C@TEwI|d@t|xN-c(BwllkmU$6N*
z!V4KoUG#%B2G+|n-;2_eH9DgZQv1x)%G>UaZKZj2G`pv5oxD`GtJ)!(=(e^0+@^oJ
z?L|SR?`bF?jEMn2>W4y~C7!)grdQP|Xl%N6sqSR#$#>1w$7*>^eCEzhE&HwTw6uLG
zUWmEr=^nmNqf-8FEW_z!Of5~R8nu>IX{&?+sDL0=oXfX$e7WH72=?428eKDzGs5j3
zUrk^U6&AXnejh&cTd97XbN2^&$K`7p$t%9QC_YBZR#o#BR;jv@njTz9dulyFNYsz&
zYsbIqt#%y#V7eo4C|_@M``=n`>*Lcs+8sI;2eeS9U*uaeBN3k$#X^|WYemmxbKfVI
zU|9nqS?I~kLxSUFIP~U=yE_moG)B%eVP3amQl<d1azUj6R>i0r05cKBkZB@rS!U2|
zC}XC`>?76G{dZjc!=cN~rQnq9KJG)xf*7}P-EYk~0<ycUTY6U$@b*{UXPZ@MN`RMr
z>WPyDd8Re=ljWfiN=j12He{alnDZgm;gPsXh*^fzJBG@=NDM(bQ{P^OBJ5`#|4-K7
z?FXg5oA#2uu2g#J`_pf`6Qb>vO_C^ZAIbUIf9}<fN1{)%c<)&qH^rfEDt5}5bb1E8
zL2$i*1@<`E)IW?J;153gJf!0)Fl=kVX-w|P-f)DJwj}`n%gb@zv4#09R=0j%=aim^
zoB0eVKw2}R84G9J*Dd=+w4})3$@3V7T&vx<e!{MIe#N~_{m!J~^J0&%dV>zI?Ln4~
z)a~jQbr2KlvF4F=EgSjK5RsNa1DDika9HOVrWj*}RKb{utPV<;D@?8zL#Yold{C~a
z=McX#@XZAek%8rRQ=xnXpNcZTYX^0ug;i>tGpW?0zYUb^m`Er*%GV<g&0VD%8CeTt
zzE9EEn{C2~UKN^2P^#^9BdSWPybISn1BDT{v-0~&at$2brfzC~9XJ<&OaIe_>wxrs
z^~ssd)3V?f=V2gKO0xNq9F6MWweSo~`POjeqGi!d+3tq;1Nw_zaAw&`rTZgwbGwKV
zF(+%H)M{MahkJYAS`$I(HxXGE8|uAlSzNyJn6rv`bUx=*@zR^+-8%e0Ta?V6sxzx_
zx2odFHJ<|g#R^6d`LvOhxpr3SbT3YacWw6j7{bX%WN6iTB2KsCwLbz9ow=)4sHHSL
zP3_08^-S5T++8uH`|Yx`j;=59T;6RAZ?*Ayb#gMgdu!zqu~CkMow^W%R@cbj7cIW|
zl`&SqY|0;ztSM+LhF*q8;-$BDA}**Y8WwRr`ug4C*z2<{sT3N*Cd_k&^xYw)D<F^2
z%-uC(v*Dq5fQ9}w-r5LL2zf1|?nuF&5J|~@xOB_rN@=OQBtyBY%pjxhG_zkpWm?$9
zj;V@$_tA~e`hE&hv$y(&{gl5?Ez6mbc-Zg`J559Z`m5U}Yzf8p=6g-{ZRUoj(TN(X
zD4)w+o<$rI|2UdBVl&tMpc2uZlHD(`JfEZ}0Ir^H%a6C>Fi`rl&94z6&L?MY#8qi%
zk)e$6*u-8sX{C%R6hQXb@<Qjdsfaln3Z@ac{lFjA=~J{iu{+BM+-6G47sqJswhia~
zt7UiHheF$?Ke#S;B0rCO5M)GCHLs67{!%@2Jg5>5*si3sBIEQt{#!$+wJw_7;Ie(=
zOE2K}pqhY5S-m;FoY5{B^R&*F(zn8)U^(~;MPY9ty1?zUU{nP;I|A-EMlNUgePY=W
z-mP5G(l_9JV<Da+3f(bVA<0}Ujg>?ZA@t1q__f}7$9oo-n($S;x&Gm}*H`SN=>%n(
zB$sJXw0=1u({IwTPkXU{PF38{rH-wjmEbXOyWqF`JL!-U_|!)8(NHCH;2q4l4!KNy
zz}^6~AVgspvok_%dlmjE=dH=>%;Q80drpZgXJQ*__#AKTSLCRD;WU@*tW5r3AgE(O
zRe!i2nBWhTyVI6->G$dmddq0lamAv(z2%mn@Go~ib_>jKexKnaJgN8y`W!Q~*M=K$
zDs4#_IWzn!cRZRn(Ah?=IuCM7xhR|dq7>8P5xe@Lf-)By$-VVO6mnDdH2c~6guYWE
zYaFj=%booSk{=Y52=gQ4MHsbnbRcN63g~c5@V(M`q$Pu;FWupgd9ynw<yukXLXy3Q
z)@)4l(C(;u?`&pDGqZ76mkhaB5NpH5PX20zyo1cHu`#hNmR!b+)<!&LHpHl7>yHcD
z=ypa?XNU5FEejj{>IcQKWVDz=IV(&<-q^{YCS4~~Le*m+(>N)#@G3Nx>a3RNm$24K
zX@w%!jR?!H`<sOZ6Vm)wr2DOX`G>ar2nnW>wsmQgwX$rvtP2WMa8mZ&30h88OnR-6
z3k%C$M0dh+fn#!1u}pQ|I1+8n5Kcx1QDhGPVKiMwYyN38)j0Crb8irox8m&R9~hL9
zPIy$CI=?C7Sgw@MxJMskwfO^z;gl0>6%s8A4)l<u`{4(@B`H?`Me>*}_FwI#O;vxj
zmsWCuSPY-+i)2x|ZswZCo?41;-us$P+ss@a3FM4(nsrIH7W6!3%9|^HWtdvTd!zM7
z><61kyLgdcpRGmc*UY{w0S_bh0aeaZ&3K0El2u3&E*Z0m^&27!uv}FAktE<Pm|Nzd
zDZu5x_m1MP>TzT7@P{-?G1u_tdiOZ5mt)e<5n@2;x4*XUtJE@qxuj5}(Vk*Sdd8r>
z&IuX|mcz@V3eDnWLMuXB{1q%3YiVd`>|;jSU}Yx&KLO3Wjcy4G58rR@bMWv$_hSGU
z_wNB*@x}A!W>f9Tj^WE0r3dqz7X!*`HuiNgEhonlA+h`-YPlSF%X4sevH3vYkslFs
z{|jh%Uy{zPl`MY;2uiTKaOzY8$tdvDM0;`%$;i9fZ<bhH!_M;*aP_r0KUgeLF-%p0
zZ;Q-Q>?`i|PXN>th{m)87HJbS8V%UAOO&;qn4FxPyF0=Ppk8zp%WH8hR;Y60<mL_B
z=g!!Y$}EtbrlU7E$F&pupPviW)z((wZXjO;hlJP<9XuPB&JM`+{V-KMMPakrwJoL?
zjAnv7Pn^mvjF84^%unT~^+@XEhICPAHQyc*z?GNgfhDzUrE4B`*CJh+q--s8RJF{f
zsd7Lh^adOEUY5f8HcT<}*_@j`Rp2-#a@QL)cxr9+P^$5EjY9!`OG9--w0Ag8U^4Ck
zYMBpd7n4FZbMK;=9F<2pR#n1ZAsAurIGGR+D$$5TJ=ju`VD~2Ymp4T>q`+Q8Lq#p(
z{KLh=D;p6(gRIzG5T)T65rf@Pgttj;L1&9*L5aPzm-?xA(Ax~_S|M){tz0ET)6T29
zmeq@bM)cs>F8qVwn49<k4KcD{_5-+P{LI3JJS}`mtq`SqLGgr|L5HaPM)h#w;n+N?
zLX+iiwb4uDCA)fRxrA?PZ;u!~RLCw-nh~beXP84;$gzQil(%;!%k_{dJ=rEhoIWrt
zxlG5P_1TFy;G{=BSau7@8t|G!(M*z&DJ;#ZD6YQ7eUdl)aw%Lx0vCzR0a4nNs${{d
z5uVX2s;h*@vnmd`$)ZDwj_O09FD!`y?XmE~D<8#(a|!qseulatXZ=I!2MG)R^1NyT
zEhvaQ)m*>o?wngW+*Zbu!YsY;5?Kb@+$&qqER?05ST+6rg;WJTlknB~1F$?!wB>1u
zk56GV4&4uzG1qZif|w>oD<{~bg&&D(y0yDY)%VJ>>=B?l3E9O-Gd=-6w9-p2$_M#;
z=S+6$wq-<?q<?FUoiss=q{D!t0i|5UdO9KE=eB>XS3*R!a;IUYeQCs0F-ez9OR_*a
ziY44~q>g}AaP62axy-))nSU#HEg*Cxy{CUAaw6&5eTD-DD5=4kKsBO<0>8hW>BCFg
zA@zOCNA->Y&zdU?nh71N<`&4RAZ!K5?phw-_?MY*JV)M6)rtH#8l<+tazKFYE|lGj
zyG>3;a?cL?D%n4~Rv4~pA_Y0YCn7R0+!u82=#;swV^u2rRSF}_9$6&(?)L)UnMv#!
z%TL3{0x);vb1HDR9S#b<%>YQT>`0OSXpmon1LROiaM;P|GUQ^0`IO|EcjQ?$L;9To
zHY1KglnHfNI$_c`WcQ0WT5(yX;cV(yaSmqJJ0rOU^m#XD%|dq`JuIK^G^}rPWGsN0
z#f>cjQ96|L!m!LR-*3EcWwf7s??mNkH+@M!enO=vdSVTty|SWsg`VVA;JlRwPqE)!
z;9L(`^xS3!`BO`wE{;|iOHEwks|%I0mgS3n{<7&UN3N3A4y~MSIK{VQ(RAV0)#<LI
zC%bn*B(L?Sn!&LigUb>Or4MgaxV*62N$RfmRHyRit714FJ-|3%7lL$q)Hdl(s)CYb
z2#O$_^L%bRkDJ9~XyIv|oxGC1y=<3RQE(-JWLWA_(|YvX&ftd3Cvo&uzjJig6F=IY
zSD>F##PJ3Z2fys)Tm)}K)ef;@u^>UW)gB7@^6dQi$-F)TQ2mipw<e6?L&r8Ub=0gm
z3pTZ8$JmyE`v_FK9ynz5E$;0V9Xu<)h2BfU)>nPNq)8}w-Al>c&6S@tJ7{&toEbXi
zP@y3`$0;ffSM)F&d>T=av357<`+!3}Gz_(A0e93p1iY_A>Q)N2S^|2vYck{c=JLk5
zNY_yQUF{L<JWnJ8Y%Hn<X}8%SMYwVBJi{Os4BZh@8#e+1a--T3@vC*gk4Ot|0?RFj
z#OM!&vN5>Z`nmBSLt>b~eMd#odPGn^D>fhG3ZXfi0jp{_@m=qpsZPzuP;uTogQpbN
zB0dKun&$SS?q4u20tDm#k97)o5C2cx<mb`9g+BkhRzIr_cqIO9Ao$PP_*ol2YvVsX
b8k_2tZXU7L1?LgKHDb}%G1e~DydCymRLwLx

literal 0
HcmV?d00001

diff --git a/arch/arm/ARMnn/docs/header.xhtml b/arch/arm/ARMnn/docs/header.xhtml
new file mode 100644
index 0000000000..c08a619ba2
--- /dev/null
+++ b/arch/arm/ARMnn/docs/header.xhtml
@@ -0,0 +1,55 @@
+<!-- Copyright (c) 2020 ARM Limited. -->
+<!--                                 -->
+<!-- SPDX-License-Identifier: MIT    -->
+<!--                                 -->
+<!-- HTML header for doxygen 1.8.13-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen $doxygenversion"/>
+<meta name="robots" content="NOINDEX, NOFOLLOW" />
+<meta name="viewport" content="width=device-width, initial-scale=1"/>
+<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME-->
+<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME-->
+<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="$relpath^jquery.js"></script>
+<script type="text/javascript" src="$relpath^dynsections.js"></script>
+$treeview
+$search
+$mathjax
+<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" />
+$extrastylesheet
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+
+<!--BEGIN TITLEAREA-->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <img alt="ArmNN" src="Arm_NN_horizontal_blue.png" style="max-width: 10rem; margin-top: .5rem; margin-left 10px"/>
+  <!--BEGIN PROJECT_NAME-->
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">
+   <!--BEGIN PROJECT_NUMBER-->&#160;<span id="projectnumber">$projectnumber</span><!--END PROJECT_NUMBER-->
+   </div>
+   <!--BEGIN PROJECT_BRIEF--><div id="projectbrief">$projectbrief</div><!--END PROJECT_BRIEF-->
+  </td>
+  <!--END PROJECT_NAME-->
+  <!--BEGIN !PROJECT_NAME-->
+  <!--END !PROJECT_NAME-->
+  <!--BEGIN DISABLE_INDEX-->
+   <!--BEGIN SEARCHENGINE-->
+   <td>$searchbox</td>
+  <td><p>Hello World</p></td>
+   <!--END SEARCHENGINE-->
+  <!--END DISABLE_INDEX-->
+ </tr>
+ </tbody>
+</table>
+</div>
+<!--END TITLEAREA-->
+<!-- end header part -->
diff --git a/arch/arm/ARMnn/docs/pyarmnn.png b/arch/arm/ARMnn/docs/pyarmnn.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a900d8e3a1af3cd8c8b2e0c8c2d710080e4bf2a
GIT binary patch
literal 74951
zcmeFZ2UL@3w>BJgM#t`qj&;BWsG}ey5I_M5HG~90CsGUn(jWnnP(pQVV`T&t1p%>u
zNV8A`1nh{2AXTX<2m&I~rGNVdeaCTd&ikFS{_~%2t&_zJl013JUG{xl*S_|B(3q?@
zf6mG|7z}1UL0{VhgPD!PU}i9Wm<jK^|1cJh!OW8s=vWC{y`5PuP8dlAt;v5$%FDXZ
z7*2di1#L-rc{%tG-f<Gp7?W>Uo3Rvhj0nc&1gw|07u&$llnDo*W8h0%!Q??Co`~h*
zLX(u&Rg{s1k2YG#$w?~U;a`R<uFxBPdn+s1D`6+!^YD~Z&{UF@k=+Cz=sD3jJouu#
zteh<T!1ZNySsqT#JouI_I%x7@OdkK!wdg#qvy0G)OLvl?dAQ2Zd9IU3xbhrX&MYU#
z$vcA6sYuJoOUufe$-!kc6>#uBSy>rnoDy~Ncuyw}K8wfw^w@GT@-m;EUEt&9H2E$~
z;6mf0qZM=}noAS?ZOC%)pn0&M!Q^!(KX7Ex7#=jZ4O!Nwt2@vX73o--g0z#9v!XQC
zSzcLMQBGM!S`jC!Bu|&4E8rYdChvkvbNzHNV`xw5iCa+6!C80<=t?Ac9b>E<*Oi0X
zRg;D%I6LUe8@d=On7f(@NH~&;8P=UZ=F2jO42r3h4;<ymBocIejH%vicu2ewRo2Iq
zVq!^Q5L84;it-j%q7#{dh2wAx7D-n@g`w?bt^y6Ls|iOt38(^*h$4^`=~In;6sd|#
z1BSdTR_JPC?5u+`Ad-wN;Yfmyjx1B#OwO2U%C{g=tnn(Q#uiv}A)95$k!6}w4Kz*R
zC=WTB1J+VYPR^TcWMZwNXFz2sk#z8;0vIAND)1)WPKu6tno2ItUJ8mNWh)apx(Pie
zYZpTmyb9OEh;HIegtxe^Tr%GczMyPGFwwPCGIXYD>Ojls>SE0-sVX=*Hzvc(*odvH
zXGGFuD`CAIv|Z#B<f!Iy9!_Lg9TNvDW4yAllfH_cj<V1}C~qth$db(ETveFfjz*4-
ziX?euFH@4Grh<VMMau;1ByRxMa&W`CVjXcjzBfyNv*w#Qko7$*mGl*u<|0!vk1jA&
z@OII4)$_)CVYM7Jt!UQf1OsPT6NbL4o{5#Etb@F{BD|%H^ALG@G2F@K=GOAAWJQLF
zk}=m)kH->e={Ol$&`mAf$zERC&fZvWS2|xvp?Q0$5UdF*CR``75>eBdt7N7uWEp5W
zSy{-MJCY4_97Nhe9g4Yw0ms7J%uSA_%}_QZP*}#s3M>wbVPNde@@5%2=yC`=d2PO@
zrIIp*s!7-K)|J)NH)m<;ay$(6*;Kp%Sr6;L*OPOQSJGE;H05ctEX<vhtj+lPiuxpT
zeLh`BNkmmv(KV&`uqkW@MIu(7r>{h0vGpyS4V9D(EWACGJQa=DTF?m=@~%P;M++sk
zg20-fFY?gknd_U%<GmDwSWh!OM;L;-W?K3##vC2Iys;w3O_oT(a`0xB@@5>oJXf0~
z@}~HZ;2=#$nkCzc;v&!zXzEgxmCOjPuJRO5b1V3a=IX8BZ9#SO;V2t=xLf#mY8!dV
zD_b!24Yj$}Jbem@<3LBp@pVlsxdd$&rWM}ZQ0U?9W@hE;=BQ6}P;sU?bE)1G7grrG
zD-xMzBrjAHkz5&A97z*v<ZaCNQe=3sEUDHy(Dk}r1Q#D?E`@DmZEi*}&}LcV@pLak
zWu}3^&6)#)jK`z$4D`+PnQ%$Gk_y+r+d~hh41eg*ef0EPIW(LRmMdUu>btv&oRlqC
zUd|#zPfbT%1v6c&oR$?$9`7tGFH1Kt&=EQ6T5vhWdTf!NoTduZ+epRDm}{oX#2Isy
zDKvpHm*H;2G~pABeKd_IJ~VBLlZayCg6Fzf8+lM+nChAe`CcLyXCh2KO`H(^^zwGJ
z)O5D+5h|h?W2(*75|O=}tUM?btcxLh!GOtfc6VTslvJS6@hS#dUL<`dce;*=qppgH
zqK>Sx2i+Py3m&iJq_1nl^^s*Va9)b8JU26vIm^SEY^g~xL7&69EQAF3hqI1?v!}>F
z#M8nv$v7|cK3-Wz)5zMuP|<;fu8E~Mx@aomEiFuRRY(NBn<5JJ$_!ICM;6DMN+McX
zv&@VbS|SA<9fpD@Ng&7MThkdz46-xJ)k(`-sK?fJ#alY)tFYK2ysp5Us%NaEgJPQw
z-b7Q$1OCBuarWftyYdxP<eacH7oLiXg_0G~MGj}_X+<}ZBPlqUd%JPT25t-@M_x{o
zF7h%%k&@?#;-H0@lK~m8qeRDv@bcEKUd}iNp_7$^F-6~#C~%R3kt8BAopHK2M-I=E
z#3ftkX?d9{;yoO+9obF_iabksD`x{&64q6jL-cUecVz2f4HTWZ3<s(O*;VANCu{Af
z=P2taD{?UQ6fn6&p@SaRfFhvDn|gcj@l<_zCwKy5Wk&;T)Tj(ak&~%4!NZZN$mUxS
zl~7yjQxr5kd1fRl7grNQJlRx{W9b8H0nXEcXJQQBk+byj<Z4qnK6(VcP?l^-^3gTc
z7cwnPNKBznL1e{+b6Cpj>C)h#@jMGhH%B5{WF@b_)@6|CLOBYJ;ill`scmBB=|HBE
zoeWWMHD`JXX?SCbJH-|2;L6cr=;=DSFchre3_4VIcP9sygN28gj~-szNvLb+;m9RA
zQ)Ibvu3m~9MQd#%K9$MUHFUMYdn%xZ65@F*A=#Q@4DdqNMN3asTi1x?u0_}63OQOL
zoW34Yk%?#NIxq<|6K^;KtEWeW*pHXv(7pK-B?W7Fj<Ffm#YN~qcJZb=x?3Aqa151*
zUVL+=0Uz&W;>j=|3$SEMf|Zw>4&H!7a8x$+cHmPT47`mURa}f{3|X3jn-N=6*AlPf
zV4-NNtL>tKxA1hgA{x7zd$K(_JS<<<$yJ-;ZGwg7(&ZXqopiW-2Wz%GQ(g#f(0EkT
zEIt%VYa=r^L!6Pjro19UkETp>Fo!{oH|Mgo$?%)34@Vgp6ZK$R>awv$A}_3_rh_I?
zM^}@_HFbw0j7<s3bWb+}MMonYc^$sCrx}f};%aJ6l-1*^Sek0`93hCxD{wU(sAf!>
z2hov-&aZ5)EuiTLjdb;0oSgMds7yYMq0eMnVTEQ6Y&ooeWvJ+=>*~xW(D*E87oxd`
zkrhFX%Yea3z|#pznq&u|IfJ5QC7`&ml-vb0XCYCG#rM&6W*b}TxRHt8begh{wY!6&
zgRCRP#Zl2hMPKB_a&$5lDw!M0>M0w!>+-BbJ`TE6H#Y}43!Vo_5of9FBr>MCIU2Cx
zS$Rw{(ZE7Q8x{{uEprDec|1`B_bqFtWe)QlPr~VH>1Yw13{bby4LzOVD{gXhPe+jx
zSy|aj&)d~S1=<<Uw31cEdy5DZ02+8@rj{v*=V|56<QrL&s9we<`bO5?*4j=~c_#%I
zF3Ab+#Md#@(^SycCdhl>%(ayim`-GdrYTXM=Z3{=TWPs41y(eHmYI(t$B?3_X=H^r
zvY?~+;At*$6EKvCOb;Aa*-4<UX$5OJjjgOebR;^e2-yNRu81R4veXx0Ekr6r4`-3L
zxx1sWo`DM4NzRgkrm8$nNH?<b)OE4)axk!l$cJZ};Bj=K8{U~<#O9+zWGPe{S)i>+
z=Bo&~asm#;l7dsB>vGJU-Ps;=q9K>=DCe$XMdP?qcvNG)yD7thCMzrJNakx392{s^
z559pmjlr`PnQDpTEewRRN|u&F0huI6wI*phTkG+Sh1wo`ElW!_Pf6a`kmiK5V!&nD
zMgjsvV-Go&H^sq~Z9p-Rx8%xd8|yfs2p}LTsW>A%33wL%{~5VX;9L0n6C+U2d1H6a
z5QABRA!uuwd7ph(7wBfzmowg__(t!C2zv>#l2%YqD(wXEHRiWPaz7r<xtH{_c1-Wm
z+J1|DHJVp;+@{Sr#(K5jG@GWI@H%)En=$8MNNm!>A7y)ICLS?1R9r&(W3@XkXi@Us
z=-DZqtNr*-q+Vz}cO39j8uokQ=ltjezyH?2+2UZeXn+1OA|~jnN2bk^Is5Q2#v8x<
zwf7<hQ&MJ;Vkg#n^F(xXN>h{Lo95=W?1s_g$`tz)dj2t$(c_C~ANS9|&>lXJ1RjK5
zYnGf2oQXMoIVMKa-mhz)0g>3(TV}-h@bZpmq$@vi!Q3DB@{}=)oNa8TPksS|-#y5{
z?6|m5TKZ&uqr2)n?H_VqVRoE$cc1;~fFHhZj-G}&-qaa5o?Tj27P(-?k3mzP!>2td
zqxb$|BW1Cf^YU@OV17`Lk(u`CEHSr76fkRkx)c|ekevLqRd$!`)R$rQwN?E3>#rWF
z^Ohf(@+uO)N=sLG<qg}$-3z)i<<o1QU$1xUZGR2V>)R($^&d(N2?;s7V8+bGRSPhC
zS1b$rbfZDWyT(>v7R7h>^h7S0_N`Bs-`jPddK$F+?RkG~rbBY`JTHqu3(tG<)d~Oi
zEB~by3wr<P@naJ<`-+TxZtaE6_MX8ekIdd$e(|96*3Fwu?d<Hrs#wL1U60;)6uj|h
z-M@-!?CczC=lyCQ=V4i#(P~8;E+H$6dO`Q-oNiN<3)iku2n0fv4R3sGbfua|uzVd%
zh#jeuegBuP3fkr0`%KTk$f)Yk@%4RAw{`Zu^&R$8e{lakv*etns^`1YX}A16%F-&)
z)tULn#Mmw_X12DGc79zteeG37s{==_xo(%hp_xz>JTciE=Is$<&w^1mLjm6WEltpB
zB5j*xw%IcIL{)p*DNA$nO<T7b?c29+rGoR3D+vjTKHZMe*4MS;<KrI}6;(v3`C<b`
zydUJ}bGlwX5Ua0g>Di>DWKnoZy6njrg`_tfp%KYxWMA=MJ>P-%eLGJ@Mkc*}?ckGR
zm$WnEDK(c}@vAg7Rlh8j6k7PE^eXLMDz8D^$;m1H>Q!SFi}m=)lk)zKnj}{P1A`MM
zPhPrzU%Tk6ifLb8U&zI_HZ}wy_UKUG%I)4)_b*)iKEpPlUBzN|za(Lml1s@SYqhYB
zj~4y>^Uu}=B;Wpb&+Z>woYYr6R;QlHs!F!a>Z{y&G9=`DCcouMa&mGR9SJ($-9?eX
zL~j3`1!*%dcaQ8|0dL;9>G9^!{sm%JX3Sb}`N|ao=&LO6uJaPsfyEIxqt$*Nc0;rL
zym;{iOFJK(`Sa(m#BtA-mX`KDSZ2>Gj#7h=s_OoFZ^Lkgl#HUHBJ2588ib9L<vt#v
zb*dd{BrcoFwea!DoH=j#xyO$UPM$ciQA4AFircnrC)|6;NN4R5IJY9F^6kiA&#zmy
zl#6-Qt|eSaN-{Sw390uVuvNrjAGP<f``^6?NtTk%tm7uxW?Qs8jWpHN+$}{4FJGIR
zEWSD0f3z<pu;QHN9y1ChrZGR5(^ei|6<I52E8lb}Hug$lqG6m-bmiTBa}|Yc<uc_c
zl)U6T(U^f*_cML@JL7GbHBXkGhH<m#NM-g=YSx=mI}@AEU%B$@gNF~>N-q)N((f)_
zym&b?(>f&8Z_q<+=<S8d^n3f~mocLKn+L{{4iZDmsMISlS_j(Ry|@u6g?o`y7M0(4
zyr#nfp5yiF*XSvo3QrM2a);VB!&5}6@O7cRy1lovS*+tt(X}T|pJw`>GEyJBZ}5Yt
z{{^G3L~Eh-FlyVO{*HuGr%o-XF*i3C4tZx!Jg>CfEvwBIiC66s5)*H|Z;Gw5542sl
z{X<Y%J#_h!Wy=_13*i~hZMKnFwh!Ox%o>h%!|V+%e89W>!;yc5)1X~jY;GFQm^tqc
zxYhhdii=BPz=u7n^|QinDtI*MbLwCe=4nTr+-%8)#iFtSqJ1if8**`lf-|!qL~>ea
z#Po^Bq_9}=3s$Qq-aD|c9Uea*rGa0pR-GkGICSVxs@p3($ET(#HZgHaUU6mkBGi6)
z#gYVW#!mI<l(LTLk~rZTs}~)dr)O0}gKxr;-l#`!-Q`jCzAk$ki3>x*%*N)dFhET_
z_;q7r-<vRdPmvAuRM1n|@p*#P9Y46ubY1wxLU7ZrsHiBb>*1=tvEk0bmY#x^9u^E2
zkL)^CulF_wvVZE+_9%OHK@UdaMf;K0%itkYcgE$U2VZJ`vVZ=nraqCP)8i$s4Y_sH
zdZo<xQ3l7iMJlC=lT#_eUD{D2+>lwowGY-9yV(#K;n`W=5mt-k)+)EBN$*S-gx99B
z%A|_I6$_~;moDiH4|Fc{u7z&+_Pz4*M$>hVEPqDxfe@O`ty>w=Bt?K#*~G@i=5|x;
z(@It+wSbhCmIm_@dgz+?{0+7+r(9g1AFjm5Q_v*w?Np0Y_ow!>6j$w8sQ$cF?kyK4
zU+%A)H!Ctr)^t>-5rsnGP=7{V5+EBwX<T%`m0bT}HiTnUVcUuB?rv2Qw|meJ5KNi<
zSog9>RiVN6-+v#z-N)m0BT{_@`v)24hM9Un3QXUtS{;}mS^6$`|8}3~ZOj3LLIhU1
z-dzsx*qa>-NYMI^3kt5mP3Dn(s|;YYKx4PRI<UHbr;=~Ki=>1Evmw_XT3w>G2-jd{
zVR7oGpMJv9AI__&s`3)e%ZjLV%-Bk(zUh8p|GX9Fj~tnoGt`#w_|c=Qb~{s<$6*L4
zIzL(NT9=hphFWUt)~dJ9E-1Ra+)*d2vV|78rpAR^-r);#yH0Q0^s|~ZUoHSId8HT8
zmjJK8QEOhyO*w>ao0CYS>P&u|yu7?%u-hr_9$Hb>iT4!m7<hIJ!zk(h5|Y?JsO4=^
zQ?so}cjc=tfg{gZ!s7N;z52(O^Qy(a2rlpu27};PiGS+)+bdGlp2v6s0F;xRG4Tmz
z0wGS((i5NVoj&#D(T4~)?tXtkY}(Uv9bd+F^x2|0Q#S?rYy!h(1$%y{yeECuw(<pl
zBWF6J58$<9<J9vDeKvvI{`Ud@XZB!Nq0UzX2k4H0{+yuqF)%<L6&HJ{ez*Kc6k7W+
znB|8FPU!azWp}YT%Ji4FcdPpLCzjD}8~0A4>f2U^zfKI#y9e8iZ?9;4c(kk_bOTV6
z{pT4dIAAaj9ZV+9wTNtUVEWUWqy07J*4C|p<+4lE5az;QWS53MM!$pHUaKx~d-(dn
z;l8$n6jh)uI>l%jVlYP@T$D$@*POk%;QO;(%@I6y^OG~=1JksgDNN1{x?KFkp+9U$
z)|%GR^yW?L<CB}=fEiOC;8J^&GCmkRj&5Vol($h?o5?q~vU&>#%$xFn`krEql+|M&
z4pgVILXU~7UAe(1Ru%i{7u~7u`H3;W4p?~Gb~X&1%I!MJ18gkJocD{e@wIE$s&Dz+
zC|f;4ZSPcVewqdgDGI%&MrxN_YtmZ*`m&i!H`Oi5|2qD{ty?y;XU`UOq*Y1ytHqC0
zb5b_R$x%9Q^&AU*g+dcXv;vx?$I;R8^2Lkyf$}X1i9l~-f+#}+ohiF#%=%SP(Of2s
z2(T6w!nUfE=(3o}FRq0z^5Khb?=9320E^q-BsKBPT_*sOUAE5)0Dx!F+K`tQFj#1>
zDz*}Rcy1YVr0l&AN#afVVpScF#S`C<CA+w|9GDzV82mmME!OJMMgVik2Y$o`=hORE
z<7(aZ$SWxH-JQGDhM%qOKZNj9MMcF*MYh@Ub@PAGl70O6v7o_!pwCTP?0D^W6Q?~=
z)*o2Zo#!-hyD<-i8($`#Kfg|`uiSL<ycfi_d0qyN0c)J7>%p>$6nhPSAmHgW$7{<K
zC%FIyvp#Js#d<7b3p(SPU(g`qcjgSRA!2_zNsjNpkaXeGrxxz+*KXORR3`0YNZY5e
zPcD_FUSyTU0?$FnwmL5`@A~!YW>!|0Q&X`{&d%)N{*Fo|Nl8h#aT5oJ^UhDt#t#jN
z2qr>dng^es7BDKBFTULhaM3_bvN@lBW9b&FbFc~nCT(kti#{chw>&&NytkCNmG$w%
ztCh05f~3f2FYlSPklNWb+%Rs}Fw%@eF#h|wOW(r02`C^%`*k0LWy#Fi`UQVh|M>dM
zN%3HLeBYm~^y)UW(%ITt8!;6R7!AKw0*--}e)HxHx&q(=z}DwSM*KK+@7}qXo0;i%
z=kZMq4UbeNT3-MZkpS$(tJeL1M0|R>1<(P}K%G}bNy(a&s)QFe7{sjb+9uEJG|vud
z<TfudLJC-x$$^jQE-%hgF6g#?cy+HjgBR^Ur}quEL=Qi3>wQb<>uj(Gw6E&ZeGwX0
z+^6YBLaE01pb<0!d*FQ?EZfg+x;G;1v^lf&`8E1Vb-$Z1C$v2L2;52aL^n&&8q!=G
zopN~DR#h=XW-x2!xIxI;prT?8tYoqq*JsouUA!pq;K74UQc}9W(n2Nt`zkCE_Tx7n
zVYAsyUEbxTrRxebGGPKe6K${E5bbv!@Ch*_%D>-Qt+zz0GM9W-IR^02l}nc{-SY&<
z9jRW=g;Qm<#zypJ<c*R-1l>&p+bluB*8Ovr$ym*H2CzWQ62wEqT2DX>=Dg0~#DQVx
zzT`uP=A1lvvK5$YIYKFcV<Y0*qB>w?pR@FRpV@--9}>K2X(@c;F?g~f!E)PAro+mA
zQKlw!s@C|seo9ES`fU@L<IA|94^M%}2147=+9+T*v=7wsg8@HYfw>`VbF&0t*G-!@
z7gFtPZCxdC()PKx?(u3qLvB?f7g)!$SW?7^(9jFLy@H>YE^PyH6d;Gi;o4g?#xoxV
zuTIJC%-somJ|iST+`pHi*7HO`tUd#n6vPpu?Ol5TiU~(PcD1VsY+w7$|2}Au4}kYR
z`|&<|#MH}TKFYl+oS3xGSLX&=TL&4<H>RyoJzdfeu}I4+)qjgEL&@bi8daN2W0`0o
zb*C4QTth>urEqG)?{0L9aw<7VUfemUwu}POfdyhFuC7TEC_aYE(tfM=Zk(UDJYXQZ
zf%WeBwMwy6C0@OYBrb`XW}q?rqBwD&CdW79(BZ@878bG4)_NfT>_=a}BdA$y1H}7F
zvBbsiWPE&l)Pz@d|Nf!fe!RMkNm|@Gp%S3Mb=weyj^AkbKpuWxoqJMKY;`sD(#4C9
z8oR85{|HT%TB@j|1UFhyUcPbDCISSLC~~xV(qK<Z`<+><Q!CpIJjxV`w_m8?W|PK0
z=4+Vk+?jCp>@Tj>sW5DguEMpUF#$)L+uNtiUM%(2w_!L+qV@Lf83rU$#jMqS=fcBR
zl1|HN<_%S(07i}-exGd=A{hDT4pKz>g<$nL{jxL5>1pdrH&c1}gPN%Xef^R%3XHQ0
zE|{k3p$CB1j*Wjn4E<sM?9rUra)SJ{kO&D@@7rghkFO4lo+^q`+X-t5M6|%$0Hj4d
zMK~C$m-g=cuIRL!7Dy14B3l9%jV<RVr`N)~I&or&wAIxeVO#C{58nY!1<N9El4nf?
zq0q!q!TA?J_#l+Fr{pF7xL$W7EKeFk&w>T5rILAYdTNeHsL##S@-RK|a%4?7h#jrh
z^2Pb8M}nBalQ#>Pat*|?Me`O9`7U<&A&n3!6@TkiZL93Mp9)Bn-+J~iuR9j}gk+Ug
z+3Dxi{?t@qz1ksb__mYu`&u)IrK$WDvXi5ul^5}D-`dPg25te1NX|v!mtd7p7Sr7^
zb>r-Pypxx63FZMTz(uXb>z01HWC*kfP~f<GKm6YR!>9ZF;d`{U-jR*T!>yfp`fyY1
z#yf+W*n+1|D<|52{hI3apN?F!L?5EpclY*=)$d)tOC}`Z{C6$0o=Jdy)KVPn_04Aw
z%GBo``swUy?NA8Oaavb(d?yz|3})~20um-@*Tf8(@kHlSW1hBaAO$0`k6w$;Om)rs
zy+d4)-{cXPphw!UVy{^))jJ?{AHzRhyF^l1<9DgpDzCD}&qPF9>dS{Lydnlu1%QBN
z*pnwuxUSJ*VJ%JWZ~NZ<u!v-s(ixjS>(i-!|8myIhM8|Wo4k?SnU~S{ab5U-bes9h
zp8bcnnfs5oInftgI#{`!+z9d2pKfW5JOUD*Cc$kR37pRCV_VN3Z0)P-{rKq+%<+SN
z4x(wz>(Hq2`|+QiY#ACKjfa^IB7dBk`lr+Ea9sE20G{l`U+lFPm$hfNc72Y6>lXj5
z2mf1|=HKbTp!bFUAp71(PRPEFl+OGt^}JX6SDiR>CVm-czTsWHtz~*Z`ax!_yiB?9
z+lbkn_V`gHC8b|CZHk|@t}T3Ycz^?8hgO5S<ov8J8=!)ow&#$u($j^1c1VfZYX9Ej
zko`~u0V=2G={B{YdVrB~a&jxN%s-BaZF839Mb`?Fgw;LX+pDk953F3Z%G}a2ZkfFw
z1;o;_{MCUjs}>fJ?yQ|aMltw*8tXd-wtyH+43nt<Hu0uUl=KGH`^SZarZig2!y_v$
zz}zQAZMP<#+Cl+29058;vG~!|8g^Y>UDR9N?gs|?2mKBmJz8<ysSwL=HstsWH^p)$
zB(Z1pIliN(oEVIRn>VfOQY_mmlWv?!fz|2oQt47aIv}ZIU7pK!)MOZ0TU#TQi%Ja#
zaGd5p?8%7;3Hb?RVg&LP7*Eao{c|oR6c92qGpTt2IWrfEC%(FOVA!?l3V^z~OSc$%
zcw9FzHAU&Coi#Nb1DC?=N&y!r?z)X3omHmHS}2}sS96uN2k9wmwf0}WeEHGV%uU9(
z5SNP{A7KzmvxM#Cun^eZbkhtGRM~pP-@JKK>s{XXXlGv$FTixurcKnVyLQXi-Fglo
z)%fggb+2=8LQ*+ZGIgT%B(w@3GG%G$Rpbg?T;lu1vUA(!$K$oBK0VH(O&c_x-SX)<
zarSH?5Xha2{dY@QU%DN-=MH&e4Jpf0h_Bov`0mg7z2if&P3C!|r^2Fm<;oQkP`cm-
zHWDfudYX&M&=Lu~1F3<xCoSzLT3UM^9$gg&pxhrgK=lnqu#HR@0J&Ea(kuY9l}awB
z0Z~c{qd;CF`?`ikghxb}+u0>gn=$k9_3I`G!WS23R2BgR(9zi^F}@9l(hi_$sRoYS
znlpE<Fkj3|KZMQUAl=Zb!QT=<Cm@t?HQ)4$300+ltT<K46+i*ajE&F1;wAi@2cQv{
z%?Tuo`VaS`yg_u}SY}5+Fp$Zy(cy&i=kXhiq96}Zu|9bFI>Hd3kSm&dX2!$&`N6BJ
z%HvIwnxwM5=gyha+Z1YK5b{26JOGye1h`$8ZwS-|jNG)Z9Up1YI2j&(r7qhmAvRVQ
z^(p|nk&uXYujln<&XCpskq?FktkSW7<~XnJ?h98HUg_|Km~_qy&5}=RZcyersmyp@
zL#}9SutoU#{_LNZEZMkWgI>sV^7U%cf|=W5TyU+`6|=45N`74PXE?vZ$>ER_c5*Wx
z7|5W00u5QlHcO(lP3UM~a0psil2917wF)o`+jSd^cz@8g13DYRZ%PmVVXssbTz(p*
zR>PV|7x-)!2WcGoVv~#v8Kyin-Q|o#YeGtHNKFS?Ud>ER*8|cxb!sVLtblaz;K549
zAT&*4QzXdn77NvS|42(&yn%cf9s=-f>s4Er&T+wKYOXsS=4Q1@jdt(?LnR`&`<R5u
z*j}&e+&6cbpy|<%1ZbVmQ!=(!RQS#MAxpQ~CH}nG!m88W+L|Sad+}fxpc?b>iA|M*
zgRMq^S0K^E>8#H|DFW5|wpLcm(OFvvL!Zz-V4ZB`8UF8g6xs)l3e`T`o>4jWG;&+q
z&6~<;sj1)9BE=+U?w29(XSo8>ix)2{8vs~YK!Yt11C1o4g6kpse6~|Ip(>gDNOj*a
z%%5xH5A`UE0dpG~3w}_7VwT3o-7p9bB6+?tDLJ`6n`@KlaS1ZouA^!YMr#0ph0D?R
zpW0&mlWgXu64eg);>lB|+7Ns2`*;;PpnYISZJ_o7;HxW_FY5!ZNj;T8;sUGf9(GA8
z*akf~Fl-Y7yzfoVplV(64Nfa`0-#xfii!#(>?|Q*%Gli8mEF+X>^uqsvg%`3qY!u_
zXQ;Qe^49IMD*WEg(yAYUwM}>dx(l}si&a&k;T;hke&UZm&S@Q3z^c9Jj#AE$S?hzj
zKn+&ww>oeWmV-&|fo|yfVTF>G1*x}yp}M=fTa3aQa40yqD$G7$h1)7<+g_P3V7sj4
zeF<D0%+-LxhsVScfUO#|`Tb+ll7XRfZ*}SBjE@b2A`U-D1+L|0Kp$4U(zw(6@1zd$
z^sd64vd3Rk*Jl>A_fb>Q>Su2%@r<-3u2vQ3gOnd~5ju)_ckgUb<58T;z%UpM$Y3%s
zhah$_fj>8?XZjcE1+SLa9bE>cCpMY(KBot_#%YYklcToB16D-{K5(RQ^|t#tkY^ly
z$I$4hF#`F*-Xqj@GDJrxxt1nc-$)#VvBw0tKfUvNkc8^@eTjAsoo<Rv_u{f6W!1eu
zoV)mRgLQ}0tUZy&=@EjCw!Mj_Ut+wVm$yLOTPf$Dec;WRGiM;d!_fy)5f38<dgD_f
zg(HcpsI2@|Qc?%7f<gE3WGUPPRa!AMReIOE{JX4E0nKL^f*RM1*45)551=3mw3HN~
z^y0l2gtII_0m88nA4m{zpw))z2XZ&Gw$b0toWJs-ZMJ88ax%GLtUu%A*|UB{-jXcH
zR6%S+qMJl3D2XWlHr6>lmZDWVYr!wZu#T&Ld^rPd2K4~2tAICQwT{ae=ZeLcnZ+%p
zunLcLhvmJ6==<x&jU_;DTpQ+2IctFi=M5I|kQ$I=bq!B{c;p!*$|Qv91#Fy(zQ^K{
z>@%N{`5mACN~Yw<)R~g$w!~;PUke52Cx1Ye5kd#jmSCdRNa0X&5f%YccvH$KD)IVt
zd6@7&FIz^;Qsv1))))xV+X_p2b6wqsij5~Pf=rQVBhy~HedNPEB<9?1>>p}R;oSfI
zM*~B{^5B5p^%HVOW`WGp9nsrLJf=*$?1x5rYx^bpNdBF`4OjQi68C;N4bncyCsGB;
zr?w38Fu@|bKr<vFsJ`ekGugp_WbVc8ZjZQoa3ajEJzo#fO1;P5-<3#EQ=R?=rglXs
zXUOhBe&{7x1HKpZ>aTZT#B~Ts<9s=YF{yvc*I=r4e2EgR)7FD0KqhSS+mJJ7UL<w&
zbazjm$qK8zFTtt$%hHE#^ee)Z;Hhj&Ia?pd%l}^@f|>_|`74$D@8f8|K0)XKnEjg!
z#HzY#^{rcr3mQw>pM42l&<+0CgX-W+XkE1Hs(3<-l$qyu4}sTD$tBB1{UKI|gocV8
zo#wp$lgKa?5q>2I3<Q7yMh?}LGoI-F6Jc*13jj4bYoOlu-#F=bxv3FE-AB)s?^60C
zD@@6q$pZtRzr!+i>>=jHKM{Sm>v>x@ZF&ZZ{x3Be>t=kGKcb^M1wW+!O`?e#8eXU|
zV!UF-iev9q9p5)8fut$}PzKdEa>2c&9k9?(ycW%wSdU+62ilvM;2joc9r=7P=&{#t
z+?eX%AKS)q!BnSr?3ZWht~I^zsGFpvkLo>pmG#G@RIwCN27pY!xVBR!?C{raKI4ri
z{x}g8RXh{Z8&K8OY5BFS57Oo46v}Zwb^FumD@1cYHJ<~xiNX~*r^;yPzTd93DMkv@
z(^tdZouOf2%Vej$8Xah;0)DGn#|88UCK#Kr{%6mhL%!u=)V%CHlgSK~EyI_SlNWf~
z7*<s9uLh0>92Hkno%iD%LGOU9ZE%Hg_U=Dk`xO)xw#S>!M>%X^?cGJ_ayqANkn(c&
z-auD<3m7PnuZd2-fLu8cyW2aBG)A$zekzVPjq63?EOB^EW`U0c?v?0S!#x3Irxxq%
zj!py*;u8~}w93MnFujYC8<6PG^1=U>S0VU}0Kp{XB_t#uk?-zZJV5VF3JRt*8N9Hj
zz?N8Wx<GaS*^*!xJ1*h+{Hmtb{&~;9WMpP$R?c!LYA%ZC3ymJp2JU4FbQ7&AC`*x&
z2e}KR)+2k*!NnT@>@+m|$!<qK9MC8Sa+x?_HJM8(e={n|sV8UfvV(yVeZO5M#0*jq
z;0%$Jl0qrJD=8@%#~57hgz@suo>?5oiUic8sl5-~U{ndC^91l5$ZK@GxS<CoA4@;K
zTL9Pniu_+h?ospYXTH9Fuu|kY&W*19J2~Az04GxAZ{N>F479x5Ol`EcOiX>&x>ciU
zhRf0vPduF6tIl|OmV(Fs_Gu7k+T@{LnHU6>)*e%2NG={LAVGp89LI}7pkfLS5`ZFP
z-)V^swA(;FYYd(x4-XFsf?lw=IRG*kh%xYFkK6m&j_ZlH3Ia>3yv#v2p#%ivrUg$X
zX9PZZo;4XceA9D{CLkh1dLswKU`P@E0mKo;N>XF%-?K4|r9)#Wx4I9lT)7gtQ=qq7
z4{yya0XrK6L;-NS8q{O}&5+pxl7f&EDr+r^O9QVCDN@xuB-N>K>Cu&n1_u_du7q63
zl2xk`iZ#Y>g;MwF>jW+=({Y`D$vwOkK?pMW*vtjLT!y|X1gyK_j?@p$OT%_^hi@Xz
zKb$x2oAFx3?PU-!(pa#!fr9=PCSV2ddI^HYSXc0b%c(sI@QVLX?=x@>F{Fn-+*??Y
z!Lvt8J1_!dBkBupZ8@*NZ+`MDC2*88i4aQm+zddv<HV#%y3vGA-=1-G!a(K6impZX
z49X7SPeNyEZN1_{#x-cx_D;Y_n*pLiR-tqX7AVNlXl-wJH<eYZwL73AoflEN<n7Jv
zkfaF6%a2rc$HQPYClZMVI)DMI`ropJ^dAToiG8QG+9&I+_AeLtL-ug!-Hl?G0Vl5o
zkU{GOfYrWxmiX1mZC>xsf<Fw>Qs==!^foqvTMB3vM<@lAWc2B*b*n#*PgWGr9BZ%|
z<^hite*8c~;{fRo+{`yi=`;00Ln4r50tyfF&Eu0Orv}HzUm}K4?RP*Tq%Vo*Ur5bM
zO*I3oj$m1u3;5T-<#SzPP!gweH<Q^8nKx>-XOdmOpi`U{iXJ5-hzbUfTHvJQNVT@N
z4^-LtBHAr#iS#-<4{6T7|2h&?nf<+Ji)~Hb3`yM4UsOt|JTP(v<URZ7mm8FK^e_Ai
z309EzD^>(gWu5;(9l8yB57U}a!dTvmy1#hF?zUH_)b)c-GhHm~O<@=ZP<=pW$0z<N
zV+>z<118R1-{Rxz_1bO}tKX;#t5adZ#0MiLIWz(k8^F68Ankne<413Q$V^R{@Dfv>
zyKdRP8u--y{WC#-=%~xC;tc|p?;Y(7v<DMc*piPbDODn8*K-}JzEW=IS_aeo)48Um
zrnd)9WR2Op8t%-qhrvmK1Tae9LNF!lj_$0f90%Uf2KLwzOM8DOX%1xEkeeFVP#a_e
zD_wt;WIa2tL(TQeM5(l{*E)n!(9!yk)-$A<8K2N{mjJ6b$nNlCKqjY;jplUv7q^x`
zvtxRLKtbYwT?6t(Ckx*ow=U4sp}zJO0TVvnVfJbuwI0oibb^v6WYwyCAtyfiekE!i
znYxK|Fs|PF!UFN_mm5akd#&8&nFz5WQ3&KA&1eogqBfOwZzlLyNoV9AbPn7}epkgE
zhV0z#V};Pxo4>ZhdA95VD`;hAXtKIPGF%j>8w--jl_F_r>B`RErpVnLT@d~K@xx={
zv4^Wy8Z=^w;1GqBBQN_{9pA1a%9@j5JIv^<Rx7p~8X_L4^R+AH@8jKkH8bbq>%)~j
z{=D2lXW+Dg*1_8$Sohc3{w>=Yp4IU3lK3r8h2EoR4grzWBb0)}ED`C?kRn7{u>=b^
zP35?VOX9XY&Xlcyj!OYq39Km4W<gx)4d<iKgp5(WnPG*Opn==N@<uLAIwdIk8p=c&
zQ3hP_l9q$d6fH1-Lt1MZ#?k=^ZhOarr4c%62n5p$5YyQY4*iTg#b`?YJ>O?F9<K&t
z7}ofSBs&PKU<8d+-~E7#g8ln@?{Ah*nOL0Xy*)b=8RWA0AHY9w1%xi-Ngzk6Ub=Pb
z7L0p8^=M?yh5OctBS)IRdeisaX-1TfZbqGF1jO6G%aGFt%?7e}rU#0Dd-P{CcS6ws
zM4~>g_LsF(m_M%3J?w!e)a;IGjSE%peR4~y3P_%vK5E53i^FJv!+QJyPh|J{?V2zQ
zM+J`gjJD^EB_Js}veQp*b->4%zM<g25xN?PbV#d2(u!@ShYt8wOAh_K*$i@@fSatm
z0@#!N@d8Anq?S>HyneR<<BZs(i)HV!ytqED+R0i8!YUlM96-s5<HuuQPMHG&73<G{
zm4+0pKH+`^E1_VxBcr{{NaLi$w^*=ORv!2mHm_Z)3BvHd?1}Gt`PCg6<_u_lIx;8*
zAC|}&Y>q(24uAxZ1dSOQ8Vbt4u?<>bIOJXN9Z2&pS-m<%j{cAd;yze@36?y4IZ-cj
zKG=S|>V#q7H;^DOilZuytWW^u?6Z%f`%Ph_I8R$@$oI4io3rs~*%n?--zk6viFsS?
zb0<8spvw&*tr22SwR%g%`!i{tHEDsmDRn)5mr_C_-qrVrqBhs|R~|E|it4OE2IYX_
z>p9@sw)5>!1VxZKn+zTZNMw`+jt>Oxj0SZ{3{7#s)3K0kwyMdH0?)0@X_vw7H)zm*
z0z4oD@m?pB&|FaXs5W0MzT-l(sQMxy)XdIKycgEJS?BMh4Gn`;VR&>yYWt?a!9j>i
z768}Y=M30^!2`Uu`&vU|GJdb-N%*Z$C`1boNS__`IW-dj70uNoo6ORFl@!l2#qdnU
zz3IMDDZN5ae<$X6O+)4;a4I34{31tV^+)hqB=v2)l@28$eXkF1H3&gQEU+`N-`8e}
zS}%o3*phxj0$|`sejxM3b*D<tqymzy$}*|pd#lIJNfJ*%fed;a@e<L1)cb0bNLAj`
zP*xI(2X|0;`9;tKe_g+R{maO`@A_*_cI;*@nv#4p2;6PY_ph35|L{rAVfzt$YisKe
zXvqm%b3fu?eW)798~69`iD)Q?G!mLNpeR8A19h`y(7;|}XVrVPl$Ol94IdxkdO`VY
z2T=(r+bH1W1SGZ0sH&>k3^tYoi=<KjMuHD7HPyyPdeFKHe<t=J=^9k$;w;yT5@?{K
zG*|Rk?*@)F$HUa}>Ar%omcTK+P$?2HB6vWI+PS{>q}nuO404;97Z<Z(-@44!q>OqK
zkthpBz|!)7?CWZa=AV7rNi;A}_1`O5a^~`=?%gmsg`ECs&Pt7dTWCE3H4ggD3er#(
zc+$C+TXyYd(srkFqH7%ow%Zqs59aKyH*Fi@?^_LS9%LB+GeNk#(;;D1tc7RstWy_3
z&~0rP>u3P83etBWCDPiF(a^uMC0}Fg8d#oRWxUI5i7Lb8cFl*0`jCoDN}F5mngQfY
zk6Mf#WX*Muoe0R*{NV7i$OWKXVtzLmiJFOdb|-#p4E$>4FYoLTdx2X=EDu=!s&M(b
zX~hFM0bpG~?r&h07FUZeT>5LN7y9cB^-^5RjcJ&i=+MzoF!;?<>$<=ASjXz)YuS0n
zKRC^kY8=eHvkk~B7gm|LdslVFKWD{1rT*Qr@{B<8hQgBza<^S5!$<G8KAiph6?`ij
zOiBRv+Xc(h-v+|%PP}3!#U<e4(7`4^J-_(Y8o@;I)19TGpA-0>O#^?`0}jK4BIC%I
z7`kX!>2De5xZz<?lsDpVRUinYLtnlYG=8^8H8z9yw~RjAJCDgk8N<Kag(0tDsw{r=
zga9CWx<|2mtrQk6Pn+uU6ON6b%=77<C9*;Fg}wc*-|`yhxOSYQVRFJ+J8Hg_eLqxP
zt$7b4+FDkV@vXG^&uM9U_D}1zo9g*K*)le>IkV=^c$@Q$=K@cUtYvxSf|WB%e9zS9
ze#<_Qd^t%BJMY|pe2)J&4F!~)+nCFDbUOWS!+hnm`2Ve8*7Sl!6A1+<Y4vt^$WI-_
zZAfnM0}8?5a3U<M>Wxsf2P~JHAh`)#2=W<wE0b+&E?gsqKp9Non-HkgnvbhJ_vDFT
z&WF2mt7?;tqSa9u6R<knqbn;K-4QoB{@Y&^>aT|FnpE|nx9gz>lhgF*IHF6U-W-4R
zXa@%eEYxIx8R-eNUOlsw+Mb$$Jff@BZ32hNq6P1s%jKXH3uHx=xV6tObop)<?|6Pq
z>*&#=P=!?n9R6~^=um58uts{_F}OZdG5z|bQ|Ie(A5?;zi72-TPy%EIYGJ~aE6@aS
zAORg+p`Z&07BN6b44wx`1QnbCj{md*q<MF}UeMXmGl-ZplrFJApQ-HbFCbM><B}kg
zycygHkWu-tbL`<N-diZP>AX6YY@0njz%#oF><Xm+mWnv{#)qxNfnzr$SfHl0flV2*
zkZTB8?aA3tHB|d*yMbk=NYU@3+x+FSJ0LS$4myP-Ap*y%Gz#cmTm%Jf1qB7r0v=&@
z9JNy!^`Zoz?O3Q2f`%cqSs`!QqQB-ez77fa7FhF9e=Yb$`5Z7mNM5&sQ#ZtMRE<}^
z-T%X#SzwH@0CTOK&zqm2mW>IL1{e{_z^@Vmk`?&GTcL7qA|Z|d3GLqgc%bMeEr2rH
zQ`6}A3q}j`Mu=e=7wRHIlZBR-fP_KvAJx4CO`8EO;CQf#fQNKvyE9l#z&Q{y9vK0n
zEY-PaWu!)+9aJ`EH6Gh0K|m!$1_q_z=}Z8Y3(O9s4@)bWq`=yj#;Ixp=85uM0QOYx
zTY`lFjIkymg7MLT&FboQAR5B>KLttnuEzXrXY()U1z!mqAA?kJ38)lcoauYCJ{YRT
zZkK~@gRl!KTbnay4!BGCMOBEM!03XLw1QWNdebc?mK^zOJRO<%b_o9BK=^tM`6040
zyJO07yxRYu9B({0Cp$R!ih9qu#`)umq;P+f7@pdJq3=2LU18*+L$w><8)VG+KBjHm
zvx@08=2zBSwg2_z!;_Z|nO$WodTyd5tdUugfPe7Aug|Zn(Vahi_IdnXw+-LnA2p2*
z(*sl@2=bvc`2s)XmOj7jPaA1Z&z{LVT`cr>e#($3)-MGexjc<y3A)hvXU(y@^2__6
zcD5Bv`|}4Ip4T>`7N0aJ#{*B!>dYOMAb{T^3|jXZJs<SYb;0@TQ>r!><V~8Ci`2(Q
z_#l4}LsFs23j9bfM9HAN{t6CfyOfMOqfleF@@OA0-Ex47l7n{RP{b7`YC5jh3q@CL
z*9&ECyx-~d%jV6S)4V$E2qxfeK`wO=YYn;;Wx=4dGdQsM$qLC08#s_i)8y^^giPx^
zA+ZT80#evY-T^QXl;;+K`_S#QEckb4JbCl{-8oszuzb#^)l-@Ql#*SQ9JyUQ7=9oT
zAOuS(v0L$hqq8$Lf9bMi!-~(JL~gSLJeM&>Exn!g*$eRW$&-22!0k{16`0a=CEWL_
zvB`O3eJMxnGLabuEZ-JRPM1ze?{prGc&ah(*Pa(9y>n@A`Vd?QdDfBi3vp|r4z&X$
zsw9hKn|PhWOOZ#laxANe1P-f!6q+l)4D`}bI}HNlhe7e=^adPN#DV{PBTzw*;mq;^
zM$x=EDz?o6W#HfE4s!|f^RGcI^FTvj9;&1OD+(0`e?Nzt1s=GOs0%>A!Min2wA0Qm
z3RJwrs?x@;jWA<?74SZcK&!be4F|jED_L4DvWpg8d!b3g^gD?@0Sc-vtSL~1chRff
zM-QMj+!gYkfs_MD{ueLVn#Y;kA&?4@>F9vCXyY!vDLua&`Iew=m;<ykVAvkw3am3Y
zWpxt`V%2HjJOBhO0ex3MHDltaFe0KYZ@)05_C!Pkr_RFnP3(?=xl7t%s33dBgunqj
z5zHWn7`{cJ^x}R?|A|Tpn9E}7epVqVF`G(GMzHygz#=tyg~i2HVAfBp0VhAOiORP<
z0FQ55Gr{V*_uk&Y39D<q^Q?W-o`<#eO(H?&3f`@*{UR^{?h4G{)}?y9dAyVbg@MpT
zVDyohe5j0Fccu$Fs?8HDQ!2qbmj)ey$^*czH2;`uL33<rsR@kHLhxT}tlcme9sx{n
zWol;p#f#dgL;?0gKvo{)M?>g8sS)qW4c7?Fg_eXkBtZzX6G5a5>4hu~k^S5Tq$3ks
zVTCy=fJA{CC}kTob669H3mT4Ss5K6?G5CQldv(@>lsa3q)7ttBQZ|ro3yp_zML^kC
z`u`g<gWla#fD(JF-bI7n2|l$dhawUzFm14imNdr><(IcW1pU|;+#W)dp=5h@Aa5n8
z(y(4DDW^+DfWrc^SjYu2+K)Wp$hHmEi`&go1zu^8^4WrFDxaK|Qw5z}Xmh!U1S-ji
z!Z+9LhPQRzGKEl)&P~xc2Ax+1`74zD03$joq6R4$Ad)3$M7qJ^Nf6nQB(i7b{8Ct(
z))F*%p%;bimFUv!hTI%rY%$P)y3xqR`4rCw2k|dbPbM71DoWcn&bys!nDU8k7%F*x
z*ac1^nQEH0c3t%4Z4ou_ouD26G(>_9LeVR#s{vyR&_Su{SK8k}xB<q8{ipe}>pYTy
zKqW;6^Hi;a%hAD@MJz0s_wUl~`XQhnM!&J=F6Lcf@6eyj>2~J9X7G(G8=eN=__SsZ
z7f<8mLH^*^_>%=G<DMbo@PXMq;GVy2bF65=oEcZQmbHJuTw0|5>E7T*FfVJh|EZ<W
zA!j50sa?<!LHx!46nW7hZrt<8$hTl|GnnUtGJlB>TQYN(eQebi9O^0Qe|Dq576;-T
zi+*z$L*N>{Qooy`b$<pdY8pq@*>)u8^uHLk^zJD-6IM2q(0I0B{hDSpX3=X0@!S;6
zg#5QMGRHiZPm>#;JPw1QPM@O5(bfm`0mB{413xb6`Apc)NtXP5oTNa7{99b!ASo3}
z2L?ae<4b;;qN}T_(>N#;>;3hSVkE;Oo}<^<YdR5s4!{g3&;a(ZrJ9C4^u>9tU@Mxl
z3Em5}WGBaI2H2o&0Pi00Tz>S+7OjN+Wm<l%zqjbf<fQ0)!HBMaoM<n=2kmW3G@fXD
zYJi|qw}%ukFKr(9F8|GaVs<EuKm~aPDr55d_+e(=Do{v9F5h3$(NO3_pW=0+b0+3?
z=I;Zt-!)Dkh0hHOzt0DYFZloNT@>}f{0l!&F2<|d)jf9W*7ggpXFH7p#65Z_KY7?E
za@Bi{W$pV$)tmVDMw=9T?v7P97}Zg{nh4&*1)=+g3l?|p9c_6Z&=xit^Mu|s8{X&-
z-SCcAv|Vnkcz;A@^mdwt&)tCjjHhF#UV3Cwd%N7ZeAGx^JS66s-ojavP51xYyXPOg
z@c{Y#tJez$dfkzuMYoP-Iimia%Q&p`uUHerq<xyVL1NmX$iqT~bAy+OUj<mq`F)><
ztmlr!-|rG{`{}EY_2p?ww5OdukoDbHpkfX6^R(YJ3roQ?$H2Bne+5^+Fa12-pHCw#
z3G(?LejCaZ*Z~1ZrvTXX@9JDY88C7KhHDyzI_x1A3m9%`w;W?t#=n#apO(ltZe3Pu
zd|cFZZYNao0@!_VHV_JPgSwdKmA9ou6M5Mk=`%BcT#&${2z$e*B;;=w^<IYMJA6AA
zTt;mb302l=>8&3V>y^LN2jrfv{MDPKrAc6(Ld9Y0hn@pIWtYVsh3dY`kbrwwZJ(DJ
zA5VX(#05)sMnk|(Ff~+wS)dmcu0m-@jQVJ=DWQM-E~xH1&L4d30t671Wx{G5Fkc0-
z`;fOd3_O9wo%3UK#f%Hz=*gE@D1j&kPp~4;a!@F!!-o$YLZuVPumb1|LbnB&5Kw{!
zO4R1Bkk<wVLjiNe&ybt}PXo^T9e8MvO*(h@eHp9>Nuzxc5#g%MF0W7d*`M}<YPf)5
zup*_wwj^KOZ9#`;`v#4{gZ5faeLHjF5cr2oodv(_ih07P{dy2r01@QsG5hO|2cbsw
zJb2%kkkt(+g64NE{n^qc2FiWw1Z5;t6npV@5eW)r5hKU4N+pDEOEgg~4l<L%VN@*$
zTb?9<qcyAh@#dtOw!l*<P*Vd&RIp3TLe2@OEOe2Nz2S3Y=wqVza^dW+(8-QeS+ss_
z06zx>wg5tvB#@JVJ0`&1H>V`pJCOztJA{CB`Wi5!3oTK;WKi2Jfc7A{EG2iy0GbX2
zuu@P-M#b~e-4XkO92-gzU3?f3@tYW5$(=Z6f7;B_k^_1RqI>m$<NjYZTZIyCw0L|Z
zcnm5*E2;yF)dt?8ks2NWq6cCzkjOwqt7vnjy|Wif5P<$BqY;I2q>#a_fb!0y=2)OF
zo4}rdN-7|~k9Doag0sNH<U@1p>C?+6(urVJMrD1FKhzJ&9UJ1Lxz<<!?lpr%GSunx
zp)8Hbz2a_2H5vmoM0Gphw=##2R-NPPRdO2dKhcQ4e7g}x^HMpso-dp%_!Q9WVVpuO
zfg}NqHW1H2jFTk5h5(h|%T642c64Myg9ilL*FaSRr!o;WKU7d0)cxFMxmyi^iNFMe
zoq6JvT&v&0SiHf_7+!`Xyq`!*7zyc_sB$_$gXHy0XBqNwvAIxTbPbGFE_I!d=Qwom
z;BhegfPkXNc=}6wIxx}(IOOkSeWok-@zba2W9s0rgT!<i)I&n)2OCUlA;{>9Hkbln
zaH9OfjS<Mf3gBw3yue}L7_i9zJfbFjPM=&`;|-Bpx#U+6AM@<)2ynf#j#D#z|FvM3
zWr^*yu}K7<5jC**GufYt;D4HZ;7->?n^(aXkSi%~xMp-N+D+h~)S(;ZH6RL%e+(YK
zjK&PqkCd8Q+DZ~&`XLstK8pxNAr=rXk>vu)KPo%rI1v$G`9zx^`S0(WtPFr8{vIV<
zo!h<n0mI0M1-d5`Dz$@JX$c`h2*u&3HVtO0P@~W{#}1UV1Q2^+tim>DQNYIcJY3R7
zFVAQ6z5LcXqO9`|6doqfx;*DteJl<(sH#E&oYq3MC08@!uhE}MaSO)dWLDA#wi?0{
zON4KHYEjo53pJm05aNe|$3HGa8<B;KBu`TJl>WzvG-ko9g||>iRYK+_4GnuR$lu^5
z<T!*b8DX@xfnw5s;{%k*^+LK7*#l60htLI1oE2T?^}aS9Xf&Y|_%+%x2dUeW{r&oe
zsO*~$_+S<9R*bls2>ZMtX(tZyx7T&Tp_CY+9N3EHKCdl*F2`7$asSV;%;lTHGz1?)
z`3;7VKP(<0V=3Qa>XL&2`;#F*4Pz=4G>te;a;C1Bs^F@)2IPTL^2V~DAm*a=4QC|Q
z!~B;5V+MH0sSy0b6&S}v1MicdQW|aPW@#%tiIhgz#*7KRn5?m(c1cASLlNvKhpJ^z
z*%1_%m;(ieJswwMV~tl|kWo*-9sLHx2r@N<%0FQ3ygj49(~;ysP0Wl3EeMQiv#V>(
z#!ufkxbyX&qctOw5R#nPyb|_%cs`V!4u2uV7gg~At47)uXbg#sNQ#FgVk2x96!`Jp
zYA~vD>!56373q6W3t090kL!^FxXGJehM+e1q!77>-S38=KS3w_28ri=upR>$85vGc
zoCytuZO+UPk7gP&dX}fbLXZ8{LJwmvNUTKL{e8l)DgWxM|M0L2nFRsg8Y*xu3_!aF
znZeCc!~f4ivrkX+zd-f>8Q9J`7)n%1=hN0lzj&8=yx7IF`Vm8^JLu?kw}%fuUOzs|
zg%LU{Mt-zwopd!l-;-ORm#Uluu=Ds|l%hq_XKn5t#21-%uV|Uxy}}c(uC;FAOD*x|
zs~YDX+#VF%+HMlt65ZMr<MU2Fmz)4B$MG+kxc1x)VQ%Y|KBH}DQh2x5Z@;$q?E~Ad
z;vO#h$p^E#tm=|su|`>=R?l-MzetNPsK)<`H2nKgvdk;(E$8o%A2FWK9-IdkT<Eil
zcVw$XH%2`+g?J_^FgE`{^R>c{s^7YEEA0c3e-vL`+`R%_u={rXa(n%zX?}+GVe)F@
zhQSMq=wV$#s_k^ly1&S^YpBYZdlz5(eZjW1(dgFE1)Fx~$mRa=XvW9WxiU_ou`}YU
z!}Dbt+fyNCYVZG27~7HR<}rJN!;fOnTIkl%1s~1urTeWtKI?;=zwP7R5d}4MY_RRa
zDi%Pl-{*ZPR0T0#HT^`szfKjk7Th|z;2*R4AAZC`yVHlVPQcb7B0r{h_K}n_6rNUm
zsXGEFDUdw-yN&?07Th|z;M#@nj&|*XcF*q>ZYt|3?)_M@-E)O~Tm>K+`7b3uV5Ff$
z<sT2fMXmKexTD?QcX@B0G=i$au21E#45JN@NcEk+&6P0*`~eGb1;Ejz!98afQ50C?
z5YB`pn|bHA@1^@~K;c4F+F*4Y>Bsd^0Z$o$dDi9H&6ivL^KJPQc4X8;`xhcl78syg
z!3C8gbL#`%ALMh*&Z_VtQt<Zf-?O|rlaXz>!LR2i+O!2}{NT$z`jL;eX+lT~c0R24
z>mlXUgo~1xT)f1YFEdR4SF=fZz<ay6|Cfvo2xA-Qa7WVu>u>p>v@n)kz7hNYPyvTr
zd&u1fKo5ld3TRU(d$^NT{5Nv<hXh!OIgkEWi;`;PnS6$@v%an~Ann*MD(As;j<$;A
zjnCbB>s&MxVa`7W^$(`E136b~*a&MQ@(RM!$%u${ubo$`3_zzn?)0TFJsEEP%}M*Z
z)-;0_2OtFQ-4rqfy#Qo66<5qr35L+c(=#1h0~-s*ACsUo_Zc{(5<x3P8{8tRy(Fr)
zMi3M%uE;))vi(p38RAz&g6ehHM*!t=VbiZIfM9NF`k|hc$s6hZV_fY{^16!hpJTz#
zi1M27ytX<zI_OM5uaj!_<XHSvMEZTpe|}20K&9xPTQ~hXSy2ig;gE|^l3<e~>I;!T
zo1uc|Ha)8SUrfo>E${sK^STkiQNHh0Y<)Y@){gszmrd-?Y3u&rXQ;Qf^*jT~L;9%L
zyR;A(c8>qWVVCq3m8nU|>8Ao%Hu<#S!$c|$oGFQ;kW`%{F|Z+&UtT`{&t&9kR6*j}
z`<Mj09kD+kAZacyQ1K@GL<WuxRt9d}nZD(%YabGKU}w)8#}2~o6CmFH(#_G~FJ4#h
zWdiR1Dx%-djC*gCr05L0(IIXQDXuhdfO7&7b085Qi-_vtq4cIby?Qh5`z(8U5vmP9
znIV9$Z5iWe7Y30?w5Rc2j)&<#FrrQItyPDbrL;r-)62&}zLDwtZD=f$$){jT)BrrW
zTM;J){DjFjB@hmxG#~89i8hKtyC4gp$eNnJuN256+A9MIBceVwc>FC9Q3v1rQW=})
zFQ2<bM4;%@2dMkb80c%(^NCnojN-pBC^DEwcz<em&Kii5PNCsRJx_Oy=zFu1bK&XQ
zGkJeqPNDJR_lprgmkWD;tB0KDzgWu97+l9_L)fdCgl=0J?Bzv-ydMJikgwb&wv5dW
z1vvnWK*>azOl=qnD2&3RY)q-DnRpshq6Ml|#+@;kr5tdbph4a)|79Nf8(<?zEf?*i
z4g3uyB5@6VH(mr=!tN2ETU-XFHUvf0iIW=I!|$-TwbfrEi@Q;1ixS8k>TH{LoAMv7
zx1HF8N1wd_;?H+-f;*Ae2fL{hfq(#eo>YzjiJreo$q@D+YOBjen|h)0aLBN11c?Zh
zsev;9ZC`?R>4xnFLi~;rLFYln3asZltM;xo98OVa=)W~@<ho4mM+f9|fZd^0VPAR)
z7Q{}p20-NoY!T2t5IB6`Cmtk3-`{LpKnlkafQcG|xaj%r`Pz6IAML9lc<{63MlkE*
z>I8_dZXh5DCIHItiQTp;FZDErjw3JG5lYO{SAT;p{owwLLB)4*ae7U2&SCJ3e`6t_
z^8Gkp2j1)9``P1BSXKzCI)lY20m6X@Hk1=USVxOHD1fkOZVIa6g0)U;yLSoO2FyCf
zy1M%z<gJhInzW_BUdpIWAM8-D&EG*-7UDpC04o4gpb^@uQmSB=8CbUxz!;!!@~CSm
zYzG4JEfiQ?L}equ$>TvP2^ooqKolLcSCmsmRhv-%hlO1yHa|qWx4<5NRf~<b$3S`+
z=j(EruiyL?h^1`>;qeL#wp3AH8}q<)<zxRwWZ)EaCFT4?Nh*}de!|A0{gPqZ5vaYA
z7!2PB?AP4ZR+@lHqhbxuIMm_Ruh)XL^7_g!2^MVV(pH_809(U6gJ^M;-iA!_wqA8k
zqoV~IqM0D3d{*5$G(chmCl%})r*$mj+_@cSmyR)5&Qb4!TcnpWp}yIdp+YTGYlMno
z>_ELW2a7{5Se^GPPv1>x`Qyod4tJ+De<V-F_|Sjt&5aDngk0xHFebJ?hwaw--n}Rr
z@&zkA%6av9IXgS=X_g|O7y)Ydk5^%*4>=(e@aP1q4>9+Q0zT|NtQi9O0E~G2#3P{%
zIUDG`9W|EV69W+pW*E$_8+MyP(TB3Bj)uI3tOlgWZq`#=CR|W3@}Gj!J2^*r|F9xV
zXk^XjAOIsB7y0L64v5d0kd=hUp@+7>OU%RZ>Jw2N7;M82>r+~P6)0U<Swo585fhr1
z@CAtI_Iy()oCJdfLkt`hpa?6qcOqjA^c?I_GElQxrtO#i!`@p*Rhh4U!&@B(#{d}v
z6afRfFc4`SQA9-%6dXWMECea(HUULJu|>oJ2?2|gPyw+BX-Nrb+>)Ex@O-YVjs<h(
z{MPST@4MDBXPrOJFdW$2_cyNVQ<rDQKWN|HSiwYMfQoT$=LAH;Wg}d@NT0Px(5<@4
z8TP?oop{;&?ff40;kR?FjYT)Et!Rdv3eHOMNf>PjCVLmkBmKsz4)QXF>cpRNk9~J&
z*c~5f{SmJXPCY-YLA!`_KU6;!$Gl~Rclw+IYUhW%YXsCQ0?gVxTfx>+!@$1e+goWb
zoPE3oKT2l^=_h`GuzhS~@?$0o*{#E)q#})??H~`Y#+{gb5~eRT+R3Clgn@{X`9VD0
z1R#j)pl3koknf1o!xm^I*ojHlE^EP1c-{g8RtL#!Hcr462>3VRi;%_a)&1qr%~Z{t
zsQt3LKJ{sT*gq)_IwYaKfAQsY{3WYR`tCD_7s1Mm=S5~zgbmGx6PeJk%k0SDqC)ex
zyDt3M>z>~L=0mACZT%K`;~B~r@sPxqQ$qI~=nA^uS5`=1IKgsu8DIPm>n`#`Xjban
zI-NFR@XIQQPhR2=%#H5}v#7`VB~P7mOQh2KeIjoPM_(!zx$HGi!o2zvC4C$*pOd0g
z8U^Qr!0VOZ%YedpSn5^%lG!vdZ%;MUH8?@Nv5H8v8Q<F#4*_l9+q6HoZViUB1&qdG
zTf1v%Yrr#Hi>m1r$M}dM+`1#$a5QekY2t=c25TSJ(kO_PdKR?8<GD`8wnZ5?j?trr
zC9^$#MhR1A6(2e@5Y}-G@;IXmpr8}7ns{~-?n**e6bj&zLXot%rGxeEFi-7yO{3Ey
z6lwyf5I54FH+$WcFR4{I^>&p6YphpN+6<EpPVKx|X@cC3u57r3e!jEn<myDeYMFUw
z(-s~q4qD|tA%Nm$m-~n-yE~f|XR9!i-t4yRIr_5ij(wqdSc($*0P@3urOXO9)HK+*
zREu6<XUksl6~pu#2eWVNfs;zD%stcn=HBp{sH~l}M4TM6lyHS0qjRu^pr#P(lF)px
zF){UdmGi*&%}C$1cruu8#*`@u`dQbaUd(x;XJm_nm@H5?(5kM-Bnn@;k`-|hw08Zm
z`*Uk2qSy!J#!07vwT<w6LW?QV4Gw3z*pZGRWdHHWsvTQ?;_eP!ot?J>&Cjc77!DCE
za|i3S7Tn7S-5@CF@OFw!lLZl#;wfofeGPJ|ItzXzeWB{3XkJ|Mn}gJ-VuMwdY*(#=
z?IL^Ip5^V*#^TAqkD{>@L|9RRGhx;^ko8<F*Ne{OIppA+`toH4Wluv)%;}IfY%p#b
zR-{~C60d`^NT#mqP2rJFh}vMLO4M_Cb6~HHazP;}fvFvmW7li|wEtMm=7UMzd;6CJ
zfK*-o_C5arzEz~K((k<WeD@`p&GR?)17TecGBgS-cugH&Eci&zTjXX{Ls~EKs<T5P
z|DXo3gzaNnJCi_eXdWRQLt#?uc#sXEl7n!A#un-5F)o2hdnRs8P{|s|0cf@6+iHHA
zQen2*ygx@LbJzYJ@H;B#Un$?;xHx8&*I5#JvSGkw74W@e*FS!^()>(Tnp=LQUw%?b
z{<MSnJLW}KPmX_;I(_<dO|jo+DD?KQ;%DQghZ+#_P8t{xxA?*>ItRgEf<0?|B+rKR
zuJxucdo}sDI?|j%=yQQ)pv1vj?^<>DRG4jRZMH#gO??d{sJ3hh4if3D^jjt|dd?o)
z`gib<3z;^k$S|*iXz*2!te81q?#|&KU(Ju#qKh;cgkbft=~vTHV9mo-ijci3&qMLz
zd4C#+bBG`WPY0t@_eb0fG1@w}?jRTtalR4m$e+u>1x9BtPINt-car-nT**BT9u$Jz
zc9E%5_hN=YCpP^<-K3!?0aS@V!<Hdw6f)qtl0JTT#0(@#1v3=l*ln6*%GNg;+NO0-
zs0$x)6V^S#a_>MmPPHOTd|1^boG~mU`bJz}7HGLF4Dy-%482~T_23b<F;Aj<vj)Tj
z0^RG@M*1{cHst%lbDLb7e*G}%Bp(mr*%EP7s_NqNENQ*^Mds*`XiQHX6JfvA=pe#-
zxLcyR1VV#CqSxScsF`g?aoQSlP<D3q&@6cK_W0(!=21<~4bd|is$^)U8qAFEEjI4q
zR$GDgnPRL_$2c~+>CC|U(~K}WH@f@E%sk0IZl&E8p}gfy&@v;9Zx7l;%I*f&r_|G>
z;YrwS+KSUBLhrF6HBLI5|E*1X%pF7;x-U6b@MQnuJUdg>I*WtFy@wns^@CZP{&ss{
z)8P_;XJ9Qg;`3tJq<g@JLn@%{`*Z5#4F%D<x@qh3>)^5T(fxrnrm03-SV8Q8U9{j4
zL)U^6^3OC0=ozS`heV<ichy-$eiI$<X?)!bFWb{zXT~$+{^9__52qsu-V9xu`2l_^
zRBN94nG~h##cAB@H08w=?^gO@&JH}?@v5kaTbqYt%AVNyn_-tb;}<-5@IYX(I3<$f
z_*KVP6}kWkBh1-w@|Pe!sVP2#hGiADK&oB?tn5rhNnzF|Zn5(Zu$Z#RP#y0eWm&Hb
z8==Tv+%wc+ZQ8uq7l&N{zQa*n_SN&}`1;{)N9|QZdy2R1!DT=P6T;Qkc%n$Bh|IcI
zCAigIL0JJQ1{w_vH6dP`0yIH()h(cKGhD1ND3Eqx$&#GT-%cQoulk1Dy!+!bQLPJf
zOF#R<f-^s{;GCr`g?yjrr)NbRnqExD%HMG~4}BlW+gXtDeqq8DdvgoFsOTj>d;*Jp
z>nag)6|4$-7B_NumDyF1qW!Ptl3aJQs_IQFI&EAk_k|V}N+iqsRW4UeQNn50S<2&7
z%^IwCEjKoxG0^bw@LN55d^HYU_PT8AW02A<YGL1SywWc{%NUiKcC2j;Z2#W3-ia*+
z56SM#jaS>e`J9k#^`t1RkW-hv4h2t<@3aq$vuUo}o8`Ze%BrT4Tu<t%9fXeWHaj!A
zJ$BYvfG#iJEQ=DHqi{Huk1J@=%#U3<chj_jTd(T>hdVk<6Mq3bKYSnC(WKGi@x0~K
zdD|&thlfl~wJDT4j~v?`_|O3j?R9t?lmeQ_UUVEbk9ypzj-7iI<Bq1Ha7>7v3o4UN
z8_?~3cu8ci_GJEEdVR6@QCm4FQA6G$5j+I_2<TWyAClAXdF?&jYead8B8vE8COkqd
zh_O?QDBQpH%%?*a|0k%)2`R=>(ZFJ^P~IW&2w@}?5<#Jo@UlNdkxPU%i7=s_kfO#`
z()_t(4@A>%D8{soNyB3B9V2vkT8g(>(c;4frEmsB0OTU9Bp6BWT@D=p19SCJNNZr8
z3{KW*ida>u*bHzJgb{{-!-6_W;3*bPNGC^6+=_@>$d>SZ1|QGt#IDXl3W5Q9tD4$w
zG4a86A<H~uFf)jTLXGGNZry>?)I4E?c)MT;B=mK&7NSrhtq2BOvE4tReZ%TLt+`H}
zA<kVA8$2od3~lz}_7gc*J}tCsl@w^ge#a%3ew0t!AI^{^?HLby>V#feue={M^DHZh
zb3XC~zj`RGcPFam<?PTb0KO9*I_uYUpyY)CDMo!pic)L#Rt49Vj!j!JG)x(XVk-hU
z6o(qR>&+xKNtQ>#7b30+hPAPgA<-7o20Qi+Yq=C8R-&qu7OyPPH^t-}@&R^$P-PR+
zj^K(rU(wZZ8SJ*k1+uC%x59R^riop06(^YfO%FS?QrPa4Cg*q|U><^QVTK|cbS^k>
zAabWhTOXu$Sq;b>`zodWDJ}WBoSuXfn&U1!$-m$akOpB!#(_~Y^CmkkO;IAVR%;&4
zca3|xN1*}9+ZmfTu4BTMYiNOO-yP&4?~>-1rDR&Z>U~88wfE4*<lyKHq)d3CCrG4*
zq}O&qIk?0q<36VZ^+NvrU26~L4T=@d+lnijQYUH)B0`bmIwzp?++4ED0SX-<D7gtw
z^^^!yBLuMJEI`gLZp1Sc1`})FKDcdBfY7DQ8XB_{^B-A-qASaLx7HoMH<|fT37AVb
zw!Vz|wdAWv4{~e`(&>;XMtvfXIJTZ6^Z-P4hKgK5^9cnr4O2IQm5lp`l|z~dd=}ut
z0Nj-3$F_Kih#90VI2~X!(m#qsf*A<Fyj@#c`zvd2y#>#%n8|MoN5AaSGN^P3D0j=*
zr6Kgoqy4IP$9Ez<*rN0(UuRJ$#8;EBtTj37CK!kOdR)*sdjw`al(^wbOkk&U#o0<f
zKhAx5SMtoj_};;tud^>R1N^FV{IB#)YOYX^hziJ=@6@rj2nuTQxnRe&X3q9BUhu$j
zqkq6`advz!tCvh1>;$>)!`XAf>{vz?^?7fi<?S5~j+K;y3koSw8eNFb9iMT|>yKC;
z!HeOpMxNVJlsuVkT>zoB^r|=Qy0CTk*2}R7!i5BJ`|}hfN`(centI^gk^I8N+n*kp
zR+of6<MAOeagwH>P)x^QzB{wU#m=ob`Mw@t4wZ5`y`8y~g+oOky=OcHqsKw}Q5Zez
zl$tW-=9~E=2h!?TyF?u61|)q93rCM^P?O;sT^krg1-{@?lVuR2WVPn%7#Bsr)Ca2}
zQa*Anh~NgHn19O3$FruS6im!Iopm>==8b}IHhg_8kY$q%5K<G0g3__aJrqYOIqZ(T
z2&agyyljvKbZwf??DQQF`D}&Ak19e6(;_Pivh@w_N6~P`?|XdBRulA9)qp1`WD3EU
z#kO^GF(gEuHWSi0p55(jIGAUuZGa_MYM&a!#O_u@$QGEN%VM8Ox!OH7<1_Q<7&=-|
z2*^$!ipxY}of%BC;7h`O?(;P+pdX2ks=={Kdx3_qaRpz`x0Qte70x1~q2{SsS)4je
z-^U5p9c5>g^yppv)RIc?Os@2Mw*OJUgb^*;g`2z;ugCUgv;XtjLM^sw6M+;4P{>3h
zlPnBwB9vv+fl=~^2(luy;p@FWcA>Uy%~Xq;jFmJ<1(yzu7(^Gpy2;_-)<<{SX|xU?
zQ40MLq4$nJnQSZx3$(OAeY621dXBcfo{7LN7rE~16t|%m7qmXhP=(;+u)I#H=9FeD
z6{W4GVr==f82=Iw4~W$ErhZ3}<12=>XN@ORyEVCtye!>r#2(#7Xwzcbql0SdFMEA%
zL{=PZiZU`gBf@T$>wN(AAtx3{U(#jZ`+80wC-4wlj2Reub{n{nMjAr4mQmIceUTVU
zOcNC2TJyxqi<ar1Sf<$}S25xQ-hp)(%&7tmBkW5LF~kKY2d0A*MmvRS$Bws;y8pcx
z=?<%<bA&9S%hE3{*g+vCpL$^pxn7N2#ExY9uH^%oUB~0pei)hUUiKl|JYvCEfks(@
zM<3k~UVsWQd*_d8CdcMhs6Qze0+K{(FA0PP(m|o?*fWdkN{acVQVmWzOyW<il75jJ
zc^`u_6*Hi~4MfIwU8G6Xgi)8ya~&+!y!8=}%rAL1I$9v2I4|x!8B>Qc?7<2%)rNDG
zH}-<ia+E9I$E6e-LKf?m?&dU~*7D10Rk)R@+{QmsoW>^n+K1!$4`rug|1E}f7h1>~
z++^pmXn+=ZFA@uLA6e5pQ?WC&w{t!jw#j_i_(sxaA;}xk1Wvoo&$H7CHo5F2<k-F)
z_{Yn9yo+vk#LA7G5pnpv-ZfU4U+ub~-93Eui_{HbZ(qLZbbWt6_}ErMGfWIsp~4N$
zJRk<h0aWo^48Cyd{ufhe`!9wH6w5ftcv3lans@;#5M4E5{NmN{U7xGy9ULw~+&9UW
zMX3EDS6p_0>U6?aK@D)%tPQ2GXSjGtZQP1Jml=)PDR`PMF!ei6_+l*L+j-38q!ys4
z`{iKMjeC`T=oW)e=hJ>7b30D4y&%Oqq4rIC(D`RH{uRY5;bhy4Is?&mlN4zuo<mj-
zM7!|vA!%g=)X%uy2XlH2fuU#9AjJk>GW)`-UFcHItvI;9jWR>-*ibdE&wu(BNE;PG
zZ?HB%4`gp^lsyaETW{=>)OD|`AX6^*u(r0dws(Tg!%G*rQnK^-4wX8ldP!M@5POB?
zMJ>4#yG|##{HeFux}CV~x`qovpQu_srg~WUGM{s`<mCH()F>#|?dtRwD#fO~Da^9R
z!XQe$A!J&9R-FcV9Qd-u@<m6T*lVF(P-tFkQ&mHu+h1Z3Be;UDWA%~A98@}u;z0`x
z8S}QPbx(45Cp*Ya?YOL{FW_MifqNw6g_F+OpB7E6TxFVL+ag7!c#pv^8@5S|uaAuk
zXpLURbb2Cd7M5cdSku8bt3L(H>Ah~%%gjjhbEYW@nr(m(fK_-pj<2Yib=hm%WQc$?
zbnO*Rj-LwoB&uZcl)IIrO(J-ml8S&PnIqp`54|S!?{qT2d0rQZ>!#-T>=23zp%9w7
z<AUJbDLtNGFEkS}?e7kseD)C|AW8GCB&!W&IZYW*5VDa+v<{&~#3CzZfT1%-Ppsjw
zk;TM=qFM}Foir&(b0V6GCPZPV0s`SW|4h<UzqX|=E(8srErQ!Dfc$Ezzn3Dut_IhM
zTwRd(+o;sKCE}Bo{xdn1rMA3F^XI42c6&N<(TmE`#uzRZe<BZsQ&`J6FSTF|U$9-8
zUFeX*P%KjPeIHyGh1&F?OZV&>i4nK%f#764Zb5^lM(`@{k<*B4iSp+k>`uu*_c>Lp
zs4&Y<Nr2KRNT^E`9GvnrvkDcc8}LM6w*32J!VV>Auq@cf4x$h#yfwr?$|DAd=nfHz
z)yYvpuclVMt4i{A)J*e#rW31=apkW1PU{tBZk7q24xba}=mOk|n0AEOyMdp{uSSZ;
z#8R+?jF|EuF#IXU5EBe&J}YigwL4p`yUmMU?L?@bBE$hW{`5a~qnhr-Tbnwwx*RMS
zLL=UE21J0-CFDvaGerqiQ4V`|+SOnHL+vuX=nlb6sokXo06K~!J9o-uJ1RyQVx*tv
zLs-~bKNLy5Gh6&D)ndJwYW0ugbKkPIw>G?jsVlyfrWw?rPlyKUAGZJfnlD3|D;}v7
zA^GE-l@EL@fk;FBM2LZbyBGe657R-355x@CT@N~8kfZ^yB|cdKc?Y!R;}6Tw9-#JS
zhnt(*pe5plV0S>Pv@KP<j8lm$X>4z>>fi~FjyB%<L^rrAdi`gL?xx2^Kn_Qa9C^?=
zQ~e(@RX+<2BXabNGyrJB1tDf$txQp_p5wJR%4J+i87LNL#yybFKO+FL)J?CX*Ga1I
zxY?sAxom#S$3rD%!g7vUP~~(=<&|d}s{{fF!d$(&a3C>PEX-7AH0F$%qgK<aoeR*S
zMm@gsT$mH$?<mR;)XGTNf3_`uQaR3jE3uk8`->K!&>WNF6jd-<7>w@wM(aDyw*kDP
z?Wg2c3c$cnfviVX#$Y}C`(dpRy}1d~jnfIQz@JEMJi(n;<3FTj47Q6w=b&TXc$DHY
zz!d^bSdFVjst)INBVzkhN*ud_bqYDJ$f(a68`I^$lXcG#e5(6DQz-`fKC^-Z3rSzf
zG<4*P%{#aNGd~t1NsdO%km>`IZ{pg^hSjzGWXS$qG0x|Tz_+sRtW}}P<`{<Cv9)nV
zdih`?3m<)mUW>z(#1kkxZE{kSo|IWUDnfr_h$c5lqt{|9L=5ofUkBt+WLaQ{o$Dz=
zGvSb=I8=J+$!kALR{o&^)F8MK@`JlHPm#S9u_E9}m((pdY*Th@`a|3qW9_<ETLX@v
zsESqHrd26kx4|0q-1)G=d&NSh|B-AoC!<{RFPTNx*6jhlUS4Q7DFg;9<zb%L;#>e<
znCL9<CK-|en#z91r+3(x|2e(A)`R)EFb&p@%{O*hD~|M56vwRub61;PtVHjDB*rIy
zmUu|I?Ckkh&)SdV`X6c_`#$0S6)yk3sZf8?n_OI^ps=~9OITQ7h3LqEB8wHzELA?W
z`gGZ#vC8S&HX7G8+z*>^$n}D4klqFDT;HU8;msQ#>NSK}ZFbW=XnTFgmV2{gwN@r^
zotmt6Pwj@t5a*_AlZQS%b+Mwk>#$hW@6WGPycW6Q(B>GQBWZAH-R}D=d*QYt_TG6X
zNuO?Vpr)6j<%^Tu`RsgqG=X1j+y96c-#$Y0!UGC>8M0QP+vG*kn4@m?mG^5V`$X-F
ze!gd3(*ZYWlk&-NS``D-Pf6>|&J+%fJt`;_qL;0(MT<NIU82`ZyzU|~<}Kyy968$a
zIfPI^^s_JeR>BcS<EP`d&D%rq79+=Yehwtq_ju$d$I2=0QJt~B861eZLeUk=wp4%i
z_TL?_;<GJUa$PlR+8xASP=E(x`}NQ12akUp{@Gbw^S}2Yo^vynK3#h8Q@F#nLC!N6
z&B}OI<`>hMN?S0@(30_(_t_IpekFtP^B;MC5W`)RdpB8f7>v9FlZHN)CV!&fv=+Nj
zIT&In$S^r#Dmo}2Nq51#kkwihi!LbfrK@ePn#IRvJU-29Ilt_<TD&;Bk-=vnzjZ%S
z-%O)SkER?Ed#t6lI~j5cv3+@W$I2P$>7~PD=nLh%KdL`M#aI@UqrP6UMg%>99!g${
zUI}6iIB;$0#e-S?Jo3vmo6p*>8hz^akVE$O1R4jQl9#)yX_uVak~~ODUT^^8#_>hN
z9q+v6jxY6d5q8{yV1^h7DQ`hmsY)%9lF~^kz@Q2wWeD)43?eFwQAEe#mzbO2Z`vsA
zI?&l`+o-0|%TLX>oD#?Itmd*@<iLNdd(c9KufM@P$C;Bg4+-~bPpX(PFNcPaW`RGb
zF4_aw5ORslaPGhCVl0#V{OJcIDBb<d=J>?te2;i_7>X@}9$k_>A>T9L(oJ^;V`I{|
zU*+(8t|ur#%zT+;io9Y{3qjAf5jX;+1-CIvDC>eEhd_N6pF=?mk%Dgw598M1!{b(;
z>|8clwD1?N@1BlVO~UF5;ZqW|Ll`4BU)la~$1ck|+OZl?ayH63LcAYa>Ib<eshLiR
zkeHv+)u3$k2aiWFH^mMZ`A0KcI*Pd%8Bd-JzqQI?;!u90LrTw8e_0-feK~OT4dJ|$
zmhB=<JkKXs{DP43RMGZ(3#D>GbwVNEipBq#<~pG&z^@|BOJ`0*oG;B%A`sOawTV6=
z%=6<M9wA$+1;98ZW09fI0?vl6|L(UgkqUth9NkcjS4p>D4@P6zC2*=v$B&1F{Xq;<
z>Kd&%LOMKK5D{N{WYl_PFvI3Kf9}}}#v&NM@KD*dUsZ0(jvPE({K>S6nTICo)UUE1
zAo7N%es`zn?;Tt_-6aPy<j3sc?>_ZqC;X#T^LbZt7V>{y3Ol(N6Shxyy-Yug>&Ai7
zX?aJaSB|W3D{`HGK&10v(K_K(Fc|6H<G=9!!iaruxfqXS@N&yJxfoCX=pDfM{Dx$!
z+PLiM<RMK-Mu~jOerG>6Y7o?+<L}a^BQ*a$595H{(u8-(f1P~&zhE-=v`$(So|t>y
zfx#$Dz<N!k?5T-!T7)Nx)ehSD*kshcKfe2IemqmiL>0<&>E6`1@cKcFcS2YlrhgrQ
z{cg7x@Gv<zJU=nFF>IGGgTXR!e&X@zgdfJ{>VKt^SFCQu?tRBX=XNm|kuEq5{yLEV
z_DQbO5ni}st-WDCPqMF-;0dQxXXmMLnq>yBFqZT~>F~qCe_ao!#<w1vu`TkqC6hMr
z{?ef<UN&svF~@D<nJ%Zvk(TT7I`4NK&9(sy6CNz1rIa9a;BLCp0XpOQTpBxJ2>bVy
zeYg1kFT66-3j7D5?Mv!+$^V|9ltSVSgenyBKGM;YUl5N|V?U)!D0Fv4d*0R){<0mH
z>kD>_Z(F^<f&cH9ZE$ew0x$<jy7zXgPd<H0gnSWj(x^cLPjoz7P0F~FURoPcQrMia
znZTN^Htz5Si|G<9H`Xlxr0h63OHD!PZlok!-!jMVrn}@C+zkX#Bj4RAG3WP0Xk-Fs
zhsG?-61yl_SfS2`=nu+V1<FWLIZ_FxcogKD#i1X3`0KHZSRGA~<}P?zHsSXXVaN=p
z>kv4tnwL=3>PmqGCD%h!PS1xX^R_!i1X_G3{PXse{l{^-ms~Fi^_3Jay}~_+%~<9+
zlRfcF?%kSN|J=A~^d!!9uTARzxCjf+Q9!2i@5hb+zClLa8sh^iJRMtZ3KObr3pTkG
z3h`=UI-t5gd?MEpO*VxlK)03Fey_H!?%ZmTv-mH(2}qk$JSI=k;P^KMwdtjPq|3u_
zn_L?00dSe5sja-4Yex<j_pMat!;i?Np!|lyC{>!enhl|&1B9`LwpydEHecfwGLnS0
zV2np{c6$26W2Js88l!(%Zf+j;8X2CrS~M{1(UMsl3{Z~1zz^xnw<_LVChJ`eIx;4{
zIO}-og+ZSA%EmBbWjg#kodPcQzi-Drl*OM<<4Np<jo;p12?*sOII^O#@o^v|$z%_)
zryod0eIUNYhVcDmV>n2qgO;*#dVSGkcc~9c*-*zQL+=HW;O1@*yAlT$<%j@E1Eg;K
zp!ZBJb4ktPk<Xr#M1+Nr_MGhX=)y7PJZInP9T)^p3U)x!1UbRkpY^B#vgLHZrBq#b
z0f@efQcB47c~?G|Im*mCEkbfk!sHPDV4k5ky(WoIXJ2@ZwR8yNr|QrpssNOOtggx~
zo+h+IVMQh#eBn0I`!Nr9HRTgBg|soVGpXDZ_P(v8<Chjvmln!}G+MLhQHdmG9neT(
zVz!`qrO724J8Df-Gp<`~#8@7ze5amfpW-TLL`a;a7*Ka3tg0JIJT?3nOH0cKaw%^#
zPN)`b@C=D88wyb!h^$T6D94j@Ng8&mrHQlT>E6eSufw``yH%N>R8ygeL8jYf`~m_L
z0bLO)3^JBXj3hcBvF|<Il@CId&be{%h?9G4GyX6T%9-KU_8iEaI^@R9W?x8=!Ln+*
zMRa7JFL`@05Myr3JvAh6vpY9R8tgT6QmHL|bG4d_;kmAwD>&A<wd>Lg=AMh293+c!
z<5W6Bw<qX!ihBOmQI-5RduN@C*HtJ%Q6t~#?gZ4vyl@cYl?w1_c$j9r=oL;bv17^z
zI<&k&{z^m~<|fu+!QW(AV7Brb(A7bxwbx>#E`|C(1URW?ymrl=$i2u=@#JJ<V)Z~h
zcSZzJCbgiia**p02Bs1il|Fz+%*)Ec3b>8k6+t=4NM#xAmZUMkh-x_Gb~4#dE!yhe
z0@hMN2`hxedMe0}ru;EytVc3IL&onQ&3(yoq_p|Hx4r!kDM~f%2AS5gl$J`^^oL0^
zvwIbb^4TiEW&rId?4IV$L#03jri)D5^&3kHA0kCa(CqbslsG>yc91x^iS$OF(&&xP
zg|5;*&*Y2_)&NtPlvSw`RTFR&+8A(OE_qSvN7_po#WGb)*l5l4l6jHu22zmDh6ib}
z5Tl`1J!Ae(@K+?~26lGZQs4zX;=VTSK>_<0NEEgpg4vp>WhU~G4u+j^_9#?oW!>Gr
z!Y!hUUVEN~k8yBvnECpyR(JDUPa~$@hL%AL#c3^vbF>pW*{>^Di%#hHoIe!bCbLOX
zC&Bpz%I8r{X>hyuTf1E17AFD;;wqNx2ShJZvHS?LOfwEna>rkj@yG$uDujBA^g2z5
zC-1Fp;t)j|Wx9CxYkp>3T(dW`8au?u)WI=gs_&x0me-5YtRqOrLTd+|5n`FVMsNmh
zQvfTCFQJ#B<JjRVvKO?9%LsAog1R6bb|85vhRut-_O^V&9dvGx@HWf3Czn&$FjI&p
zt_0dEvF%TPQB;At4ep>|N{7WujI&#<UvpuEVU$jw1w26;!C}hO3x;*e8%EY3|4JRv
zL0%3iR&mXOoUvF=TUXbN5cfTPuY}Fr_H@{Mf9zo>B2QHaLoB|Yn+BZ*Vz+g|^?}a9
zytx5t&n%iv;U5<V9bjt)&8PwWua7o{QlE71T}1O&<G?1(2Us*~;oC2hh_aOHF6G$_
zxD@i`IY|$SdaSn|nkczY2Unca7Pd&3nr1-is(@BmVe`m<u;SCH8VhR#8;;Rof5h=r
ztyn!IC-VbZt%_yxp2#J%x9*+!pww?7HuBE@yph|itNid$ZG@SK9jCjQq)CdBqKWq~
zWR}?mg6H|86sB-8@`O17zn&xz#BK_}U+8K6^tb8N(6AVOd81hwhLperVK)?YF+qUo
z$q$W<=MM}v{4J~frFvY(zQ`Ji<^nU3m7bnnUq=}!5VzPmou32^JzAtzB#f17;Q4Lu
zT4TXE!I5g*+}4EVwi>{34o>^}M=rO{JHAan-{q@O1+-ZV@mgezLqJ%uPt?azVWocI
zF=ncP(Wf#5jMj|V5#KeHa@E0M+d}jdljiG%J%}mEIM!)OjdA46B6~X`AF+|@pn}Q4
z_0x(DZdGrGPj3JL>SV1r=j~loUk6^*mrKa{ao5t#^DsN>JsHeWCF|~RcbKUL(1twH
znyM6xO&Il7!-q$*FiV5Ede7WTE$(gda*R_u|BzeVI=(HuzzBL<W45F7Uc(u)ic0;i
z{j>LlgBSC1Es%0khiqGxtT;4dyAwFhd#eKI04Xg>F#|KyW94`<5ikzB!Co+jG>h!4
zw|6l9k&<;_=fQDNET?CcC?)Qz;dZuDxSU*v$nRmM2~VhpvSc&Pa#kall=Dq#8G`$8
zO_NU;Ras0LnP%UbfiCiLD8%tNBJPBsRE`yoBF!AaZFhXPkHIUphDOzO63!uwu_XEQ
z4e>(4{AZ1I6GQ9_)=%;LY<BG*`W+H-TQps~$c$+xSF<e6$1GJMy2j7zvV&|<x2$#K
zUJO+~o?H&oNlS6mOmBM(F!Z6}Ft2oaRPe7dVK)q2l)7{UX%Amo>DVA*Al5_meHmV1
z2;^lwKBQ9*b!adEMW*=DB5b<$g^8~2sTZKc$63a_n--y_Y3RUmKi_F@AQR$iWz<wB
zSaWCUnXfk_g4E~6ITJ21)wkzTK$y>Vjk?U`I$hJ^ozhXpEJU4oq`P@X|81CtX5CGU
zEJl5ZSOzNHP;punfFlTekSe(r8D?4FKw+SKj@P)y+u}SfuAAg1!O2JQY$6U=c)4&S
zL5kAs(|ZS%2ZR|0sKzLp4A&b=CXN-$q`dKS9Ai8ch!IBK1e)OH)aM`y=d&_QR0G3S
z_w@2RJsY=259v#kPY0P`3$n^7<p>i8C{CYsJK2%-s3dG}Bz=)8kOzA_B7C;Y$*DCt
zQ^M8|uvZLoxrMR!`Y4iJViPtQu-3M&EycO@y6fl*5?aEJoe)>4?8hzMk)1@b)6fCb
zVm?zgic0dEQZSQecikUr*jJ{9RhdwS5wh{-DzTu5Tjot&?@FiDENj3a3A<@Zlf&zr
zHb)4U>mtkDRUI5+<GPBNftavV+$;mCla6Rif1!y7SQ0P_&11$nCMdnoaizpK#B{@>
zRG?v>S}-bf?MQirs#i4e94|f?<_l6a<3j~gj-jWMVTbq0;MR^DTdlQK4|SNUt>)IW
zecr8W<oK8D+wvgyF0S7e?F7HhPfI|+XlQ4?`WmL#zKeu|Ozl-!&_7o2ZPT^?E{#s>
zWI6R(QQe83%E9YK=tKBZ%NO5%6tWjze8{<4k?D4?p~7APSp>CDH1&@u90kQ^Mncb=
zCp0+@<zG=(J0xf{TQVEQ9g-<w4A*uqqca*kzI#8t@OW^YTao)lCNk>rAd0r@HR%Ca
zRf}1x*{H<LQ^a!pksJ}s4I&GT*qs4`eyzrVkwa54P-3gOcy)is&41@sA|z4+t5WWt
zd%S%?>7%NpoH@g2x=o?uMR{AB6YNFyHCQ(z+NCGF&MKW2AB|avhu2FrEGDNTst4n?
zVdLVXPpQaN(S)SY9?3XXgYn}~iqGA0_gXgp?PwqKcNbPtO^&|QiVkiS+YX0nlT6qK
zQ0T>Q?pl&MQR?7k&&4~Q4JTRRqcj@u27;bSis@HOo^Frt)_-MkMg-RG7hOVv$37Rx
zsIBR!f(DJ6w=)}L-%6F^8=3p|1*hE(Y`%3qb#Hv}=?@wi4WDmWv{d#Z?C8w(zv-R<
zEqGl~C2P@{k~Goa5#oa#8fN?Uj$bn8KQ~)BZ(28MXylj1^B-Za6Es@GXHgpNUorFb
zvQH@H!eHUWix*du(ulV7IM(#y8_!XfWi~OD{X)~y$fgsdJ|8uZ@RFF5gKkVHIsCQ{
z^z50*C=OgFGRtP@qR$BOx_T~jr@7!Q$G0uKvki5@u@0-dqPZ7pi);yANRSnFOS&BE
zW{5$@P!d2B^K7@rpMAFYw<2~A&uk72?hEqU5&0Vjg8TiP^wDwOVaz~}jz+~rM@vrq
z{WnsQL62YTGbuDQ6flG`{=0s?MqXsw7cGE0sW$PQ<Vb$R#5ShM5_vn+Hf^fj;M92r
z&37wCg>kGI*aQxVZ9B*J_mK!i4Id%?#PCNLmB6#7ooIeldU$5xLUdujh(ai*Dn1?V
zMqE1hZ!vrJtXVDTD|l>lq+s<VF`nlwsSlAWNT~~lA@KyH7Ecip7xJjXPRxcK2Ktyg
zcz%kWXia`-A+3C1H#I;t2ozYE#t)d+WGs=XJgxhOeZk@X7BAeQla3N5FgRGLBUK3<
z1%7l8^nJym83;^-oYD8$vsE%p&!?b8rST)BMfi*~U>9w=hlnD+!IL})dN0q;M2ndL
z4@WCZ1=cKBg0qN|z)mGo2c4ss2CV7udP>rf_HeHM7BajW*Cu525<>(a8`(@J0H2o0
z@&^O_4pT3OpI@BZXstDjZ=Q609VI)$_z)N#9{#q9EO%6HHNv_SihUiQoh+@g&ehoN
z?th8K{#IweEcL_e)D2{2TCrlqgMz;*&X@CktT;WQEb7!K3Ig;72`CVQGDndt2k1?p
zaV*`6<#AcUY<G}{&8>?gOWL;mm%!_&oZO90=Ra&iA)`)nkMI0h?hpbwc}kqIw9tkq
z_ggg30SP`BRS0L$M%)!pF`R9xD`>hX_~L)?{D0hZYear`aZs`wA&ywg*@%<GMR=N1
zHr(5CM+tSLOxs23;1wxHakGj_S&?O*^+Gh~ZKTDA-~aF4hR(%0fu$zbbnk6>P?tX7
zcH4N!RlJPj{v_27MDW1^Mv===z!;7<y1c~jGVJV>eMKr)8tz0{#$IFtg)($4tdF^<
zL&q7_GX6#N|3&qGrX2qtC1S?9B}GO~Ha(rDwPfou&7<ByTEKl2R^`RI&emWTw<H@F
z_s@lK*mlSPKsxqGv2p%A=y-5}iRat<VD3JBg8)&$eX>iR{ypRXptt)Ztv-FjKBDn-
z`{cqn-*8|A(Z79CY@dF>U1G3~^~v0Q`i3P$jP*?ja=zj39)ih=Ib+MqjwcR&xH&KC
z<x0pbF5Zq!<H=0Bc{fdeo;FX|(VuT}#=BoTWo{h!LZCv)(BR-`BJ>UK{uhtQFd4)a
z`bwEHPH{VXK|G<rIN)FQEx*TzJm}~#W8xQbkFlC7K?(8qQ!otL^joTuQMfE#;j3N&
zM1AQrs3~|E69d0vT^UB#N~R3`i&F>HZZZm){hvwuue>fpJe|oq_^)b;QG-V*%JYSG
z|4OVL5U6Ik4*W~RaKId^;-bqJ-uf@Xk@0v$i=F&{zcdNUB>9a7zFzS~=kOPI`tuVU
zE<T|0`WM;4zmM-zx&8n9$Cm(TZp)l!yE=9a`ni<+c`@X7b&Im47xvp+_qDqFJDmDh
z;QgI4eXQ|*j*GsQc|V8W;}PL)T`;lKVGypAE^E=T5q3+@2HPBWUmo``1hzg~C}W>$
zuhMXxkMp7bL)c`+MRptvk5g8ips+F^tz>eF=!hfzQb<hG%d-*lH6E@8aCI>E><9Lt
zg{I7D<rcpks(x-S0pr0Kw<PXI7+sLDy=2i=O58^VHHL)+5lYq>;nXG0T!I6K5wgAC
z6Y_w-(|T6F9A>n!!5~>bGaF{jyHCF~;F~7zo5n6aA#RUkTzAu2;I?%%__^&v5e1vo
zfFfu!+7tO3<K6iF?}^AscN}&4BH~Cu9ui$UixT9>j)}3|B8n6&Ue*{%@CU%QSwbGO
z(F$=Q@-ZM$6AcI=qf&Yn7l1af0NzBvHE^>`c;LuqNM3lBqbEhd5d#1ZGTAA?R--W$
zQA8!5OWz3i-jUt87mm=B4+0*^7R*?dJ_tLYbU6qUcW*ED9mE_h_8pUV)JhXFEis2q
zbNaN@S-_lRrHKF`O6MU}8n{Z(rN){E3dRA%N<97Y!j&sm5`I0>ZTm-bSfzTr$}5N6
z5V4b;4oWXK114<CM)7P*2Y8^=;heo3{2mmoGNqt{Og*z+QW3#IH>@-Rd$RJ23r4XS
zoa?2(%a|T={1=8xRvpm}nnqD8GY;xp^AHeU?}^~IX+%qceJRcjZ~L$kv&UqC?mx_2
zpNWKjT2!~M+O8oA03?t-)5;%gu42^{Gj|YaF=9IKz0fp!!1pwtl$;Y?o#1d`%%S*y
zPZ6}Jf{n3jxr%4%!yQd@I$012qxQZ*;r=w!&=cM)LE<9-v3kPp`wIo2p?M<O5m8I$
z&Xz<>A@=-;L}5klsElASm=`oYECL#f2JbRvp&|wHWP<Y*`gxeRwt;{_06alhCbkBb
z5Ge-|F)J_6pDa!wB{{L|9jx0qo!jlMLls1qWWQ&~k!&V{6;0uMdGNN1*e^Z&8J^tT
z11Wp1#YjNL_F-kJcb&tPRFh2DBSjREhCoAS$IjKLaG00<J?CWpu4of_&{&<$wHfV=
z<rWYq*a;^CGSi9_&59-V=H8Zdlv5`wj@SvBaqY=HTtXTS_hL-Ku%IkM5k-nJKn}-d
zO--++F2Ubo<}Dw&eS~;e&Zvxlu$)?T{V0sHBn)ls{Mbz}mC@RD!bEwyh*t3Sz}uT;
zKY;9rIKSd^^X{p_x*gt<(Dnz4AgU|+J$XetyvcO+zQ~e71js?cGL=kFqEOabgW|J#
zp~Gtdi&lr!`?gpea;-of;0H_hldeLHtsv?G&QT_4(M+U8#)cl_5JVRKb^ynFL+b16
zck3*|){^=7cu;Y(>Bj@B033ono39A@W)P(<P<-BpqJpkW8-?)%2X9GG3T{1Ot{)u6
zSr1%*8#pOU^xk8WjORk=LZPU!at@cyo%02_yk<?2PkshZ!LWa@XL`R-9cBg`s-tD?
z+O;$?)jER4nwV9-Zj_CKRhABvTlU@I+~OE3lf!b%IQZlM_zwlJ#i|d|a^Z6!tlP)m
zKoPnGxW|QRi&bNrUm*l^-V3MarQ5)z69~n0lp0EZ(nTg`iGO*M4{rh5q7cBXuMiqQ
z!H!UhT1SAqNwDkQ#15Ftz8jkk5i!+o{oZXj=;49OM%rt@208X7%q`>4FKE>jB|i>p
zAqInX3^|bT3m|E=W<li|Lem4Cot?)DKaz3rqVwqFc(FC07+f5Dh^@h5%!K1=le+qC
z8cG9`VR1u%Gq-z1@tG2!(qFdvIT)7!0^6i11RiuFib3G%XILVmj8(7qdG#vx$V@8I
zXb#c|crV+G4_4zWV3ZEg3bZ*KasyXfajvh%T5K$$2{0K(F#{8OaSdJuw_Cd*xb+{b
z-ysi5jNz=h?QID=WbG};5yS<a&zw6qd%k1iFoGk9b%`|D{-Dbdat@Gz59s26FoFbD
zo-HSh?N@xdWhiL`Zxb`vGyniw$kjZ)OBuBwf8T8-YFPwwS|!TMA|8NpRlunO4UzP;
zyKoDnxDLcbgt4z*T6$!6Nt*FHT~Il68zF>}CQZZI9g`ht`3uopqJZtcxh8PaP?ide
zi4><we?gb3rK3SqN2M1H%<u*=LqkrHX;{23a^_|y{b1p6zRm%EUy2+KU_&SLP|0cc
zi&gWOGGkPmNAI4jBkBGd`&j$Hzds!><};Pr2itDZg4)5aa^`FKIP(dkI!84Q{tqWf
zScw-_-rdL;)~0nSsuz#h*DRgZGQDgVrvd7(>Fc8h>i4Yrxee;~g#Wn{>USso+zj<w
zCH~Y8{m|>aa0e}`Mu$3uG%5C~^OKgDc1th2q<4gw4P(MnNbCP&xx2}dw1zn8fbl|n
zN0VryEgJ&C5g(0R%-}BF_G@3g-uHjuEGZHUz_Grb2xVGX&^*Z0`wlP}!nLTc7PXRU
z99rDq0%PIFG>l8xwefhU@YiPGf*_6%)Dgci+(lpk=%XE8xCoJVyW>M$lXQV+7B}=I
z4fIPk)YDT2#ERfG{_5~e*m$6h%(FHs&<xpJEwhtg5en>k{Y7PZ^Iv|I6YolX!YodF
zv}C}LL4E%LmjZ%<RKZw4@kXQA_TAkJ#+Lkv7z|DsCZy{}R-UDokQMOCnZ3zI@dYXY
z7@bX-m88)(#`q-^Y%O&<y^`rW<PX+#<f?szFc(UB{{8Q7m;%fi1+DFaG%`3s4rS0-
z$3%cJ{S!(Q$(mbdXc~}hgttxQCES>*c(ojD8%wBw8U7Oip~777h2#+4fEhR!wDv0o
z8JxcLlbUY2BJyZ*pfSk4slWqPaxYRYiygXi%2*dKK?(lNLtMXGp{dBrI(Hgl`@23D
zQGmk)$-)+dl|T@%7xn90M|MA)g*KHcP9N4=XksLE2v~klLOLioX|W>JZcadlNhYqh
zY~(>3^9O{5C6=3VSWra#i39`&uIZ~LToRnbT(AfOF2c@zMQUEVLuW+;z`)uJvzy77
zkl5!IQteu59!ZJ{l3O8g&g+8?rr3gTv=QbKG$P2J#DCXs!B<UFr!JUpi=?SdI_z3?
zieuP{K$eua9OJotkRD6YHM)fssUlQI^DkQOcG3NtxW?~oIq@nz2wYX{A0sBNnDDwU
zFeJ?58nciF9V5pit1ELqsSS{5V`T?9Dtx>%q@*<Q(u$#|^TS|>HIkx*=8$UcMSzOF
zp5Bm$=kdO?gVcq?-H{Wak7NLn7obIgy0NUU2bMoqYW3m=yuyzsWKHR7pEr~fJ4O2<
z#-^FOD<A7_HY5!QVQ-Yp#Z2wptPNG(Ubb!Z0e*gdW0__jeSC*S<3l*+6?2E}eDOQg
z^!QJ9*u&NZncKg6SNul%e`y@>db!NNKAP{dm%bwz>3lGxga#67LekXm_JIN_nTMF`
zYWOn+GUp~fvt;>+MKppApLm0Xg#`s|BUo(i%vKXs%^Xktku}gSW}<1Cs)(Rve{dA}
z568wQzdrljKHeVrf;W*%l&f_}ALp>@Zp!S1sG@!i_flvAYoG>Fg^&{xX>+un23THw
z(jwz{#QP`@K--7ynj7#dg`4>y<&oHL8taU7se=FZlYQUJB$%9;;>xp)yO#Gq!YnSO
z(-cPbyBNN^mU$AC+$YK&knmUN>umqN7j^cUu-u+urtHCZ=M2;+Y0R-<dO>&1&beU@
zx9A2tA7MWqg&*<kZyr4d%l!$C7A>q8Qa0$~VOvb9#D9K9lfN!7&NuuF25VcQJ;B-K
z>p<xQH@gs*@7y81#bXuYZ+>b_{Sc{*PkVa8%R2_s;_c^W4;Pm>RK>?xIh^n4^W$ID
z;p{ukH~fT_(`&PK96(Qv^Piv4aC!sp{^^G>!Y!+%bvp7u_)z5YLpRK5e#q+B-f6S*
zN6iyn)|2L8oG0*~@H2Wx(((PSfw6;b#eC5Md`A@g@BNDZ>5pyvd?3$hAhF@NqIgUj
zMKq1=;(`60gx@Xx4UcvyC``+zknU0ImoE3DNG2L$>iG~+A(-}lJs~bsN$;0_pUk%(
z+voWRak3DQYFWhD>(Dh`M&&i30et}K-xrXR#SyY6CC>inB9{d%<sDRFgg;R_u~j^j
zjmw~Jc(WjR!cC;$)Ig1VMkE%{2n9c%7`Lb;#n@35$c!XX88|^ozzIm7gp`$Fe3H$m
zK%zfH*Kq^}^mY92c_e>_PL`yX8nG$A{L@EO;o{{kSvw;It_K=WLD_5$<pL$j5WPVI
zNwww$9mhW@*o!)RSWZ+j4wm{+(*zxPC(0{eH?thx&(?WEYAIA3`p7Rr9hd<<0}Ta%
zd6W{DNhXdKj7Ct<k&p2ASW+t~Hue8eUEC6sv@CU&{G~<MfX}*K;{9l-oC(n!J=!0&
z2Ptmg0iD6g19U{@W(~@+^QZ^{5Hkii3&nwG0K!(FgZI3JNXsh1qgatB4bTzOry>ca
z1-%w`xgKlg0+piO1IG{O^Ekc<0xEUNM6)ln&h(}PB@7|l+ZIMv8H#FgcK)>pP?sd;
zNQoh8f+EgSU?6R+t17yU-}QE42H-q%B;?r%v+8ow1dK31La))$G2+sJ&!<`+d-^Ny
zbFAAW^E$I|6N(`WXq16wM)LhxRVwjpb?iQxWIj2pu-w^8r6`TLWK{Z)Pbd11l-PHK
z&S_yI6v-LFJs-I!!<15p*2}bRQp_L~AL(=e;ZcyYGq+$MG-*08OEI3V?6V~JhIW#7
zjO0XT@;H7RMEB&LE2hp+zlbOzg(8X+`gnEKaDe7QX+}C8*hdx>{xWe>-!;xxx@`mY
z-zkaDm&)HhsBdnkJ{q)d2)A7dsCXfi$X7=dwkX;O@bJa7!>#?F4d0N2^QE4yKYKSm
zzTxvV@Duy^-J|~I#_9hfAw_Pe-EX4&U$2fnim%U4dNXV-EjU?@#17{_j>6z$_rQJ*
zrq4_tZA=P6eyaWJ@T9M|SN|WIJRHz*&Z^(_Sot4)e2V15w&!bGxNXMYUm~B@bRYq~
zBvs0LwRAFN`FUHH9U}!aoLm+d^woSf^b#6b=+u0dI5{XtKvV^@)~Ota3v)I7m=KFM
z+y`&_^*)R&W>%&7gaX^14RsH>!pSF$EXhYoETx{ipRWOtbe2+;>^w7>4ZOoUGJ0s@
zF~s@QK){f#;cqZ04i@^qc+@|!K!Bu!GIF8ZB#Q7R<tv$nuG&<N6SS+pN8@8JI}}a5
zpmhUy9DUO5ku@W?J2@3Jet9SJSY3NF{b&#bd2{G!M43#-gJe&8IXrF}u})1czG%s7
zkuEy4B{nn4<CRvb>MvHen%+Y^ym`)8sd|j|e#P6-vwZl3r62EFg!`r`s(cj^a%=A@
z_yRP3dbsx~ja9DW1>W~6m$zXC7f90GR$wgN)r}betqH<C>0RwW;WH~<oGIvTRfHvf
z%}Zv3l_q{DaXVzn!nH%=o~Cm61Q;to?+}61$Mw}Lv;(1ll9=&B91R7KqpHW*$BBYG
zcaCNP$)+jmf4huK!Z>gra#(Evd3rg-K{8qb*El6wL!-Kk=BtUBmdVox!Fbwe){68e
za&uxags1qCC@#kuIzpgSR3)4oN@<uG!szruQ^QHx&g%B|B8xljD!bf`3WC1z-S6jv
z8JK7VHyd3E$jr_~i{X)_x^uL8^XM80l@1jholyD7*YRHrAb<*ZEm|AUC%yFCv$C^i
zo?blzzyQtpYu<K-10zDzC>R;N4JF$*i>3(Vff%layBFR}qJ(yctNgP=HScaacoi!g
zD<lI(7@r(N9=YzjbnRfWoFa1X{Ui~FkCR=omnAnPHz#KnTm&>>iGl}6E=MyOL~3A3
zP)B1<IhmB-MZ=nq+628WKoNhc0C3$F#9Uah02TP>5;pQlwxb+O3z&Z)`Q8w~R`}RC
zTdu*jPqRqAT>G^7igUwS_SE32YB?8bzmkGF@#F)Mq)8+h@^y0{a0aN`f{%Z@`++&R
z1IQx=ofz5qNydmX{UKE1TfsYZwnfAjE!f~%s+FR8Yx)U$q>@S0r-jk1J3O0>U=Jvp
z&3EkuTMqT{)w1XZVY*EOI!A+w=cjAm+;o8pYB2~rW;h-Wq1@2SQ+^Y!+&A@h&&o|{
zJP3UOxT}~^VdU<JJ-c?*P)2N<M10tlPY6x3vqX7gq*ZTDPR7RX-d5f=2m~ork!&5=
zIXShk(hRXvY|zCJAW3w!n(S!AH?BJ-e<MZ5=j;m;Re-!<yHmz1X26hj1Aqoz7qhkR
zIz%Mp;(`^J)ELaN2A+}$8`2#zl){sh_1p9-*C-Y;rAzM^Yv^s@4oMLX#&y}4eQ@G~
z4jR=rtKNC=J@{0l9u8vnz>b!Mn9`eLjqLz`XCnrib_wXfHrZZSF(^@vl9SKcMq+V4
z?kXsj>+MJrgQAwX0Q(k*izd8#IJV;2!b0~V$JO%|m$1Du=%4FWT_6gGTX^>DnWgow
zH<GfYSK6){XGGQL&nnfag8VZsFEza(QiB7GxDhxUz<Nzvmkt6l3w9W+264ODA8-uQ
zOt5b#MY7)(H=y()UK=J@k(y;2B)0^X`fV~eUBNCfa?v-OD|i%L=V}*&1f@&e+(%}Q
z{}g+`pl$FBB2_@@a7oQhzf_ErRB|aJ?RtovK*}6o@<~>@6bkK#tUP^)^|5s^O+>{j
zz6YMBO)#qxDu3?Wdo~Lx7}a(KPvG95S5E65=>8kj-7!`hH}u#!-~=SYlDBPbz<BV7
zFq}ynNA7K^c+rWv+$>L$t{+%LU|=K_DQZf&OvexT4P&_HpMSmTb0__)dKViNv1*)I
zCa{^7`Mpi^HL(9#R^ewjHEfZh$_R16Zm09>AnK=ia4b`3m0_<0P?SDy&z0-F2t{p@
z6^HB|nWMIH&%xl+tp>RBRrhbM{B&UbHbsZ~D<0OXX^be9=-!?#dP6kZC8i+Sq7Hz?
z5=GB%Wxp)2lYiOdj2<5tV2gB)9yvQJ;hpMROL)55B1UAL+POmLxvpib^1g-Uinp`Y
zpLsV+@$vewOf^lnQNbrHXZZ^nN?APJmU-S~;pFg@g2JN1mkN#^yvtjDg5jWZ?#|Z+
z%)X&?nd`*zNy<XMPr4)EY47MH8kO)zn^%#{i?_`(RjrpLoa$r6SyvxavDtCzoi#>P
zb(W6=@gSQ0t0eMGGWDu{H?1i3yLaVPqplod+hlhOr%kaQfsT%&RWEdDUZ`Un;IV%<
z$Qv_7;6BQOFGCu)@)-w>qtduhO>x2}5vl57-3{2st+=?@)pZM^*{b4^N8LJq@tcxD
z#M6bh!~HmcmltmQ<4JCOR;i^QokDw`^2Ln|wapHaWW6)<>C)O0StH!rC|qvbQ)w*8
zjDYAo65gpY+@#Fz$^h6Hy}hpo9}_Li8u@`|lS}K6^x;ZH%n0KWA;9$~$=Nopu^rAk
zU1%0ekwF<>Auf&->xm$x1u*%jSAaL!62GCRRJd$}vGs}x$Yp8HgC}I9v17BBl#Z~p
zwP+mN>%upg%d{_FR-NWIG(idRqU!4|Eu2bI7A0)$?8@Oma(AubI<DQ``S!t{WYiZ^
zCYOO9b*GQ_CNpysI5mwz%wXW<ZNw0fL~PVm_8)*<mBW+Hv@bG>D91?8ajnt{0!HH-
zv?jYt7cM?;CZS!=#aMTwbXx7Ld8~+exdWCxXl-kI_U_%goj%V@>5oaCt=)J$^>GfC
znJxwfnZn~DFQ2%QZ|i6fmgAc1bo6c{C}HLI{vO{f6+cR@^NmLtCS93!R~q;1E;LCP
z!Q0sF6j}ncfqmI1A6KL{i=%=n1NP^J_mM2jej~h6J`sJy;fss*thcX{DmjKTOB8zX
z2{=>@3z2&6-K$VVGh!E@pKdQ^#fkSAYw#QC4`*=&CfNR>=RA$c!UP6)tbj;VGCR<N
z-oq<wKP=aN*g88%drU$?LSs~#!#s^(HT%j%KF3fsO3%a1-+r>B{bVX5qqO(lCiBP|
zx!z{Eo1vleF|Vh`II#mYr4&`V6lqRb!LAq>DWVPhS(s_OcB+`f?Grj<oV{Qh<b}m*
z{DQmfe<%pdkgP|*&Phy;OvZXx#WqyLl~6|yUuR)V%OA=5k$dM<?T^XhVO$)Npp>P!
z^@H)&kU<A3Ya1GrdaN!MBJ)pF7Bx{a#u1O3Z};|98Sdy^T4pI?rXSxuS&GDom<B^*
z0X+46c<O1=&Ff-nl#Gv~?o0yiUoOM6*&vPU3`^jAM9fs0W{0cBy@Xv<a*`Ge9KW@Z
z?6!iNw{K5J9)O>#YdeHW5$)MJ?Ck0fAKd8)c)+jV!EbcjHM0KxE8VyEca?WK3hs^!
zm{ch2&t1oRZQSL!k`g3m+}ruW^WqXijVqS<dmAq}bzEr7+u^Y?u{l1Z4W=ws`G($>
z4JZM|-Gf4kx0kCop?613K}nD0^-@2pm~z2JMCra-fTAe15wQnLCr|Dvm+P5A_Rpo@
zU38O<k7<OKL5Rgv93v)k`@{x3>dBL}yYh3Np*$%EJ$<sYv~)|nqr<+EIGM>AgI0o`
zJ_(QjKPj&1TxcR{?0lOd@Y6d`mfmyf?ctv=;lzgzAFNS*OQv}&DO@ZqJpm?$H`&=^
zaHYClzkWT{Y}7cT@vovL%x@dSnD*3Ul!eNtW9Zr?*^dW#vf9L#Y&=ow=WOD1P4bSy
zf!%jYJU$$Ca`SpI_4Wyc&CXu%$DfYGxH>8-7C@Y^6hj@77gN}K?NXoxoQHv|4tVgS
zBtvm*8_dB>T6#xtE2`bmiFZug<G!RYtE5CYSj`~O)cWyWykiBa3*||UjK}i)MmN^i
z?sux&FUVM&R$3CtqoDiEOQ~nwTvWJ8aQ{o|=WAo5;=kY0&E0kJJqB8^*q(71zv=Av
ze?qE>s>Sn}<&);OR`v`O<#qP*KNDS860OC>NNIL)YInJZv-gGUh*>X9R98gGDGTOI
zS+*#T`QqS}g+fg##vkT0Ms1HgCK>RG9sJ6IkuE)G(4c#G75~Nm0~OZ(PY3&tpY6;b
ziCxkazQu()-@gu<^Ye9N-`-w!bexg?kk5y}uU#?{r&OP<(5_8k58gI;xcJ8D7E$pQ
zQQVBn3NEMjzs|U~@9llPFb((PLa1n^)FQ^uSiNA-;=}B#6Z-k<Zzilh&3(Xg=5Bh^
z{eBd4{+SsAlouuqPaoc9w}0a86D~1XUR}e!e_tNlQ&vhc*7#i<aQTVj2JVWpji)Zy
znmG;l{t$Sx{nl!(WiLiA`r8LAd17=8W5y-jU-ka}Po6Bun5z1kk+Lm*I#)l-qFPkt
z)}U)UkB_@HuAWt~Z2dSMn*ebZ-vQio^4?kRdve}OGW3K5R~<KJoL%?t!|L~8neS_5
zu@r)`$D0lxF#?wvR9Cv&woP0w-I%xqT9!haD(~c0E!^b%!D_P|Yc4p>L=T%~UD8#|
zhn7Mv#7E+7;MMqhft(HWH}6+a6F1p{#fwL8-MaN2Fy+*3s`81D@|AC8+N$H1jcN?o
z;4llh8{?2=FJn}7-=y?uJ1SRU{1y+U1JmwJL4km9$8!buf~w8Svzx`$zU+_w7K>l`
z&7h#!$lc58=)`o<<7z=;<htIbNqWF9oHRG0@ONHbAb+dqXrcR{x}o7Dy7f%&+>>pE
zkXH%C#>UbIb#=9_zn5J0e!i0p=Yn^wa`B4$cyzvbaTNx_ThX27=IV-wpafIo1|(xl
zRXJ?4?o=L4NsTWuN`^=E#g>P98(Ugh5+M(mGFcZs`o_z-G1JzgalVL@7WxR@P7m;h
zTQ_@^On_d?T^$#}f~8BB`mv<X_w{_8`Fdv#aJd=Cbs~G`p6u;%jBB=^D$tjb@q9eu
zl#SDP|3d7(L<7XIk5$()T`4au_t+fZ`zjPH_^=mc9c+j5@Cdg?aq3W=t}q?9Jy&g}
z6}R|_pX6U+TWXRCU}6X6gn3ux-uZJBZ0N_(%RMuUC>o1S*aP3|*KAzSU^!=N^kN~P
zOs=kZ7@#9B-~HBy{bzSWNsl3F#TkwW$VrZfYYpbjZKmpn#<wK`ArQtLbIhl^V!DDu
z+KzzQ4uK%fsr}cvMVwXbW0TCr#<EgBfV4^Qy{?L>B%>$h=FOUXRd`$HGx4}LHb!(!
z0|>Y&%g5V@ae#8EvR(BU?x;8c7PIDdKuI?rJ(5C8a~5i;QDq*z?H&rlMPt(eNtA(`
z|5RMU2krQxZeGmCr`E4Di+{)Xm~_`<D^y3Se(T0H_82?VX_hbY>3F@dq<KrAZO^kn
zDmI({I)d<74-l1ZzYfJSFpqo9g{WwDK$W53FmG}h21IXss1dvcrIss82b)R@1#%+{
znC|?x$6e`eM=`1JsK^}+26l%jM8lR>jUx(7VakoEh{kA%Qfri!<78xHO!N2Nw&=N2
zjOoQ=@Y#+bSHzN2I|6m`=)9V&(#OWYr^^B*QeK{vphTSn_5pAmld5}hadD|8>GCdK
zm@YmNirn=vDC9lB--uB)kXonRzNbc84|)CzVA*WPaJd4Tdf>A{rg06Hc8R$P`|E-u
zXe?VfG?K7vx?e<E7v8yER<6f;8MnWAPWwji@Fb_FE1O$bBtfL7tEcDRT!?a~9MlM<
z$Se#&*ap>faK_6gCc1C}C9D$!ibwMDg6*;CE@rZ|_dM2n$nm=>SXo&~gNWkgHGMTV
zLLNt1rb%Xgn1R$F4V{jc>SZvvH%PPcbK{nPo)S!x?b?TRpA2Y+VyFJY^1GM)O+|y&
z))e$WB9M`hXzN`O!C=)>B=77+vfjj*x36T3<4oi(qq?-bT)5X-{)BAdXkJqcw@sp=
zIMlAFt_%}(PvXfh7$J`2Xh}LuAsk;dN8{lPfRpa2W*AVlS_=x11y&ylFT!Mc#i^&m
z*=w4ApG&T<s}Ap5nd{q9=*La<wKlZ*s{+9PX_?@+W!g8_{`G8qi3xvB0NK7VxwGm;
zV6VMKYxB*EUn;G?e*EWJV=&{|cm2VSrNKak^0z(P*KPFqCLh3<@ooM3^)r61QW%WU
z-&JitUOEiM@xQGB7>t$u{GpusW;El#j}_Q=H_TARvhNDbfB*dND$V~pZ$Ea@FEhSq
z0lwC&SP5?uK`q64Nc#6#e_$!-<(rJEcq}<^qI#d}<d^Inf&)&?{;^KvtdT>t_uLuJ
z&;3yUep(;PTem1MD%QB1?qh9Sv%NEgvETRG5?s9ik#aXt3;=Q~2Vvq}pLuEV{!dHk
zr*HR9a4h+J>`=kP%L`4g>%oHum!eAAQTp_#X_dE?dxrxEk(ZIk9q_1$a&NneyfEY8
z!-pqgfOa|6Dri%g{Egig;#z!v1CZxoaB(<VB?jY6vdO4T9u~hW3;ApBrw_NtW4gGw
z?9kJj5gi?^3oV}a6ZF?WzMnlRZVdtcIK52Or*qTmD-w&quSof^<{CuSz`*2o_3G93
zY6ZFRI7!{W4m}IY)s`v~bM|UV>-SgXGauo1K>h6<Dr{SQ9c@kH10VLC<}ErpI-ejQ
zQ@G6&5!5G@2G(B+0>24}20M<+5hjuofD2ZBx+>{+y1Ig#O|~m;zDieq3yYPE_>`nn
zyLA;Z5rze0={5!SurbsSR|UpkB?3~SascLv(F9V-CvFU#ByRB%AND;c-XGV(3h%bA
zeu}9RvaaTNdhxA#J>4Dm_P>9L>IXtVb1SQ43}lopWlb1A{x}|vHF9(W!5)?Hx?EW$
zV1$^Oy;Qy+L0S0l;lm3cgyW6B^wQV}I-mq4<&`<SjHrySomAG|c&A2_OhT_f0Zh?u
z#vD;uI4H*OCgIWnOl}cJ!xzheyw7R5iix-<VWDbRfRQcLH8saP9O`#Wxhj`ok_jWt
zcvSDi?WFEN;kMfza$T9)s`q~DC9q%ZoxOfugPD#xmBvQVh)j^u(ZO}pc%8?=Kiy8q
zvO4{GB$OaKpgEr+TfRC$iAVb5GN66C5_iC#?*WT~<7=M_Zj$e@QJTgX{j`a$ukT{J
zxp<*79}}z=)I7a>^5o9W&N2)$Kj!M%)6>(AT;?&nX*^@RO`iTX<=4~T`T`>{3Rb&G
z0zrKq1xAm8n_CUEn=eN~s6I5d$_=#A96X-P4qK203q(al%fxQ7E2(Ha+0`5v(*K{B
z;3(Gzj^f5Q-yICg(yR7OFCrmaxq<ZpaqlQzUSs=mf%3}+A^4n#YJBxZKWx?T$)u;i
zFCEFnJ%PW`hCU&5mWhZJ62xFOt99OzVvd+Vw!U44g`a_p(7pQ#!sh}+)P=r}ASB+y
z8dO??QA$G7AzP+J;9-zj@Nqy{$?x86qa}rs%p%L+xso7GjK>uV`dFxfZC!YD*h^E~
z69&=rAvbtPrjtQn%iV3ufzQM>GhwNVqn3_Ni81P0jh=FoBA{N{f-uR~UgGmHrbd{v
zrO@BVwm;!{hNFrQMWeWm%0HML=xLszdm;3V(;MF1_QX|gu2mjb<==~niU`rR`1K)T
zBgO-wa6>$JA_9WM$~skYPkBjJU@g2rE=*wZj|VZkJ;g`PDbaXyS^F7P&dvSPE<DV3
z>>e(DZo~Rdg~3mUS6f#j;3HXD$*4P&Wq#O19K7dK&EYRpH{^g;%@4>ye?OKG5l*eq
z$HykTroT^1pv?dF{Q(H)-#_pF<G1&H8LTMh^M~noOARG7!va349~L>jdX0Oy_>Tnx
zzKpJH+7C+-U!_j+$31{AQ%V2BDUGjEkNo3xj;~TF|Kq`nFZ&PrX@Ahq=;K4{IjaS|
zH`!^*0C+1{=QEvcKQ!sj|EA#icftPjZog}9em=$iJ#Kzp_n#r||LxOM{nby^c);-f
z4%jTicqWtSp%x@o#y@_%WUX29D>xy?Axx`rtx!D5?j3+j5z#xIY62A9*2%6_@h!gd
zmGmO_K1_SHwTR06sGkpj6y*rEqqX%?)B-mMr`(q(s%t&7wXkvjt?f6V*qUc+Yg=|x
zn5|b|5HTNNh~vO0Hn`^U8;uVzqR*=J(|K@Uw||{u2{<8?%j28P`UE$t=$dERq}l00
z4)q3IhE8`GpF;PelLF$Faqw$f?kkRw0A@P6!BUFASk&omsJ!t{Fxc=>VG$A%vef$H
zJO3&<u;7FicQn0!Q&c4AHg@iDz&5idPwud>P|m%AE0_DH<7U~SEQ@%b=O&q$Pr}d3
zOOdxb;2>InIt(SBc(7K~!pW2Ak3~t*2rn@3mDA2%+)^3FD&zlYMV*KFa81p%*Fxwi
zP3Ze6$(=6sqjJtWbPqTzIHpf<U|lMyuS$I`sq4ERHFArul;2krEmn+>>105LH&Fh_
zw%zkMY2Aa$7uF9w0DIIQNqUgcsxdp%vDM$_!lUR)R6P5CK7=n{59wk9F<K>{a#_yF
zu@HHf-c^WwKGdG>B5R)I#An*F{U*$|NszA4vlnO_r!d}#Mw{y*qHT(hW}LkZW}CL|
z=!wzF7DIV*HsPo9AMIQb)1%cS6HslA%?Ghi8zl~v%}cMF1Z<PbI+m00e0!ovCZ>>#
zfz$NO#xyI?rS~8<OwA=W6742$=(q-PaTtM&3Vkkk_5T+cb+JEwG$HmtU0od`btYj&
zKWiy`<Lm|f6EA!==JaQBFvOY-PUHKoqs70eqpPLE;dzdP;%NsK9JRo8xuG6t{7&*m
zLLjvE^<}ZyLQdU{ytxtZC2a}Q-&|i3FZU}K7qxasBoC;l%3l6=+b%9XUQQ!8$F=<9
zNY&b?n<K;>Fqh>70!>#@4{G0>;+;!lmG6Oq;OFOeosB%(sZQsOAi=!{a+eVM*FJJ`
zv>^yhyn{tcdlgyECk782d!jvazYB!~(x#a1ZLR_+c>3i3)83UwHFfWKtgY>v`l=}k
zqAV&dprBG06p+{oB2`d?PC*fsxV1<yfC8GJ9iJLNf(r_kO%ZXa;6p%&vbaQ|C?JBM
zn1-@wWU&|*_9XA~jZ2lnnLp;u%$qajc#g*_x!n7^_xJmKm(Mb~M`Sz{w`3sq5xl@$
z$)dS3sh7EOuO)(w{PLJG#IkTypRuT-REPaDx36Nt->VJcYLah^@AK;)LfKxX={uFc
zo`Kt|req(b8+Ehw|7WD0vD9?8VHh0C#x=UCtQus_M#b+Xdk8fD@5RA=Ty)XxA2W3Y
zCSO4H>m*C@JSkz((U~7o%lEJZA2R3vk;3wSL!Q<3EaSnGdmsWW(62@)g?~g_2C!oR
z*MFGcI)F+!=#TtqRb1TqZqd3tNJ)jV?{jL0$|vSOn<zj1eeMKQa@`v{!bwO(9X@9(
zi`HLt*v)HLuwr=I#xfucw5h>g@Y2V(UiM}0<(0Yjdqf+nL@UdnCXk3q6+-C2gek5A
z_A$bs`?pVz$5*@RuqQ+%EYMK*l>U@-9muQ>*t98k^=%oj`VsJAPq*VL{v6Z(@>~MM
ztGe}Q;&yHfAsWj-<d2Sj0MDKr_?id^pf(ZRMl^GE<%hvDS#APJp5#Rd`cPH~hjEQj
zpD{Iq0eaK!I=TRY-T@t^NA`4<EtTvn7F0zzg~lgrC@ukR&CuEhh3xX6WJHjbQFtQb
z3FL>;<KvvxvjF7Q0Vh!%oi`cbx;2;;1<<{sh?NY0MJ3Eb-R>W$n^!&R2*?`>H<m#H
zTL(pA$XRU`RZd1zp9;+czPNn(GIe&Ff4yi<ZTAaLuY3;l=5A*9s)Y&Gvv=*@9nmQP
zs=hY2h3A80BA*ZOASNsWHHq9YdUP#;3e_)yqGcPCTFoj5b`Kq7-M&)Bo!3?5fFgJZ
zYPceZqSX-?mPP^GE=?o^pAZE_4m3Ko16TlS$CZQ)<7FFfia&*65XPcY0H8wvFr7dw
zCT3-s{|-}$W5LpzCRbG2ty=G~7>qOk+6{x1X??#^fWJ|b;dO;PDyJ207Q7pNZwlq%
zK(21elZX5G$H$;k+bj`I?U2w*f`~kBRRW(+@CNze67D#F{D8L<Qxl}IBjRProub22
z$N!$MQt|_XjUMCu_*N!Fb=co}a_x0O$Cb#t-7aXy->imYhj>NzwH3G#^uueRoi(CN
zrk5vxDsIrigrGC+!jJoybq6sHKD{Ie0Iq73z1u^H<^GPYdMGgAB48pc?R<QE+`)lB
z(;SwA;lpgUq#<BO49$|)UW;RXUx_r5T1r5T4DoNzGPJd!FqeeoiYt+kSX9wFxI}gi
zUf}AN0|B_Q@vL$JK~v3mcmUZ*Vdp8Nx`Tq@{M&0`)@4n$@o1X2)<EUt)n~pg+S`gm
zZ;`2?NQLy|-GbJbm^W>Jc~<9vlZm$2^dlF&l_yr4!3zvXO-E}D#d7uVim)75tbl0H
zi~$1%%*&CL7W-k+9E)3EBcjO#wg0Uz023zEuKG#?<++y9KBtD!)_enfeE}erj~g39
zL6H>!xF8b}Ah@)$Qk#nW1U`@hnUXuzKfU*agdcrxBj8S6E4r#&bGK}=>S|7>l$X*P
zuubi3L<C3T@pv2vdWmTs5ZKfTj$?6cLV3XmsxV;U+zuiK4oWL1sglj66s9H_y_m^h
zdL6!(&0$fyK_5-E;{57&^Q7wZ@}PK{N$BAiAv?1WgQgI%WfFs->Ml{~<|~`>^%V*Q
zlFF-<Q2f)_9@|Sue6(5dN8Yr(3=^*Q^e?HQ04`HsvMwvQDa&HVd}=~r!d(Mdq4rFA
zaVTLVq<+AH_6BP(+!d%^rXYkw5=Ml!C8WGe2o1CXD1H%U+iaV%`yRL?Ha70i<=}ud
zP|)Pjc7d`(bSHd}!V*IG8qgOsfbx!#ZOXCL2?Te(aPeYY(gDX%fJk&-T9sI~<V10q
zs`^FB`jfIR8FH-P)0EY!u5E_OX;Rm`$)tC9J-pYdUD0&ppi~J#GVIuTO7|coY``$|
zDBe<mujnNp`&K}S#NMzDH`~`|lOXS~BUAB$nllaCJlKhS?W$@3<cD12y`GNIj4vWJ
zG_)1=Heb8Eft&XEJb(=S(BJ9Nb^A|i`^b9!g?kfy<YXT%`aMtwhU3571^Sm5=?bLU
zU+lhGvvKB|zXVHll@||Rb;|9p;hRJ#8(gRQGY-o)fX=V~wcVbT9Rj@h>(9zN!`P#N
zWPkmMp0+FIpZL|KP_cAFEf_6$g8OZXKl!W<BodX&spmR3>%j0yE$=9Q$*Gn_S;yCH
zk+(mu?>HWDGd(sir(2w2+2rUrt~EP0=LKihKFtVOSN6*$d0I2ex0W+Yo{gWm73)&N
zwelzuKNQvBU_{jOb~#EAf~Kwm=Ku5?{7sb=(3bYyA(_D?OY^Tznl65De5}^TV~t}5
zx()v1X6&u3HLER>9b3O#7qwx+&Cl+yUjI$(T8D}whHHP`;C#ts+9by6Q=AbO8iq}}
z<na}MmFw<H%`?1@43U^bek}~%Hdrt_!^%rB+iFmG$#|jaaYK->r0$?(g<l@_RG|}l
zbjaKL8EIbVFvd*p%>%2SCkz>r{Py8WjLi3m{54jeNlFr(Gr5UB-v-_AI<&?Oe>zM+
z(cH1Y{>!p{d(9x=Hs`{f;_sgg*SK`)x&t&yZO@OvZ@C`5Lq}ue@<JTx*o(b?e%QNT
zh{Ta+qs;qh?A5xzAHQ}a3o3Nm{f%*?elFlTBTdhZwO{z@oC4@@X*4Xsk?Xr9A8X86
zx_ZI>=r6YKnFZJTJI8Tk?;s%ps*LN0=vH+6ru5fHT7V-X4SY2<G=A7;YVT>G@zo;;
zq|J}QkvZ1&;~*$Idf9%uw_8vj+dJvuIF0Sx4;TKwJmS0J0gZ#8OQ@lpAoh?9?FTU>
zq_A=t4vO-sY16029jXj6C$g0Y?pmL6RY)`}Aq<vy?AWoA7BYbs!|x64kD{U?IS_iY
zGI<}(jq%kSxSY^FP-B}1)trStPcsJfeXt5h_RsT~`)rpkTgHM8+lft+k>&Go;<m1f
z(pir-_?>Rc1i~rWD#+Jn^?|bDM_!+`ta<;H+I1AarDFji@o`|+4g;9ny|F|EAERr~
z8AhJoZrcqnxjh7~hc<OBl3oWzrZjxoO$UMjtx!ITVjPuxJWzm3bbu;ENL5u85zuPE
zTaQ4yumJ5aPh1)YfH3|{ze0yxzgzd$j(nJG;#+l}``4P>h3`+3DH()uCgdHR3|YvP
zC)7@a6vG4%omohxORK7egPXwtUd|5s3DoAHyqwquFd!r7Qi4nhjlVs^r%xcy#l#1Y
z{kcJG*MDPDx(A-OXZcAdHC<AT*fRUAas!(!AW?J91Me(Y_}Z3NR*K00i@|_efB-P1
zAs)Z9l~BPZGBQ&1iMIP-U<T#DXkQbf&v)|7nLQA94MPs*^CD=`j*dskP_H(oq6?%@
zLJ8%BdP!&Qd4@Ru*B{|6YUU~86P$aMYJ7Fp(W9gExlb=&@gg_9aI}VAs0_4cdKsBt
zzMT%ZypgGCII=(OUxS<Pr4eB<5mI+XVW4`A#hoOkrI~>f&jMvy4p_XFe!%J2J1bHj
zKiOxe9|E*}0Z7s;wCr|%dHY60QydGzNS%RrDL*eE?qPv8C@Nn+se?HV2RBD|?(*Gt
z*V<qy7JRns)K`nWyu8$HG$e~e^<&}MZ7b_sTHwIv!?w_y^`stu-OU{SIJ4im9?Re`
z3lu-JAdMadEdDIqzZR7I1fa~@*$fFLN#k1ZOXg;rKVN_>NE2EVdy0#T)%hAk`8?3P
zyq6Q?N284<d|12+kVS7U9Jd`LSBY=}sarXTVp>>Q9>V~#b6-~Q`Tg?O`uX_{95KU$
zyt%L8i&UndmLh-tBy-!G=iYyzcNV%3fp8XNfm}~%V>U<;UCjYg9wx&`jYkyEp{+0W
zMR3QUTi5CSfb>%3mj?w;p}L`-o?hwg+k*on9J_p#UPbEi5n$m=%6UZaHc%|*=M-Ez
zyh!+$UGaqOn-1mB!s_K)pJXzex{Dl$I>Chobp)yvwoqW_;=~dB`L~S6uiGgN#(#*Z
zv`IwGZBS$<Ub6PVm@UjyfOv`D4P|*&O%Q}gj~ze0W^or42xu8lVhIp~yyFSCE+V)j
zD#orkQc+O>fsZh#C87tv(!?WYCV6eDI`XzGm|z|!h^^?FzCZQuoYcSW)q8*2xq6#Z
zyW&8?M)@`ESH(3k^NbFxxx3r)RpI<PgDH@N)KdQK0fdz_Q7KqglSaBXInZXcEy)MI
zHCx%)q}lcqEqO7_8a>*B&<r(0vll>9ou^(i%+19ZsU_J8D1$n=x#<A0JZz(|;RI9h
z17Ui1J{Y8LH4K{h;Cee}R9}va0qehF-J+C~6ke(~_!}9SnYK1I=pWM>LDP%DQ(0ok
zRE0oyA`m<iPY|P>oSn<oMO?aXN6!Jk)X90Bch9zVUBvo33XV{Te4|O`P~SMcck6kd
zY7e7FV;)8xNdLwMzG8czzmfIyIqG3ySf>msEyX!dJcc%eK5z!5QmH;fF;Q2UeUf(u
zoD4cwJLnq#DB!i=wMIw&<d~<A#cE>r{k){MyUUgc8~p1tq-Xc}rws7*-rn3}bKWH&
zP%<195|fk1L0F2Op#1n`p7ChGHrJ_~TG18WvSrIYH!!lbp>i@62(OetmA%=SO#%*O
zE#~GBr@00ou`VGiv2YKWy~xG}TAmqMS%ccA$Hc_c0W!Uz)DPMWrcgMqK5+}>`sCo7
z4v^Qzj<hse3xbk#dk}3|Tl)NqWC@9SwOu@M;OrCZA$6#ds>`f+ku^cYGVZ*6rj2JP
zP@O_8uR?@kt|!{gyw-lPyvUf9kKKg64~_GM4a6f~#A|eBIJvX2G)<J8fW5@xTlR&<
ziZwem>8Sw#Jh@vUm3p|a8x`3HR7q73YG*;DSUvfa{_>Tr#P)|hFdu5c1z0I*nM0u_
ztTig~r_PJ|<9%1%>1s4RCGofLUD{GP|H8lb7tWZoxkpem_bT1QT!YTV-C!k!wYIjd
z_wk89smTswq-z!U_s1*5b~zV)o$c)GsBb|fU~Cq_oed{bQt-A{OZ;p5r}G2ygbkwI
z-2SHKFO`pyHCvuQOC%hQu-d%ES>QcmIB}syoqK;l2_5bGD%l0awmcyOWhVfka$i=Z
zY+lC<BRu-Lb(k-?%xO)$YCa#DQDcBrsfCZ;9)N_>O02i1M_><~P3lcB_$&Z!AdJo2
z-0VtldPp~oPRy40`?gv*y%|ZAlYNHru9jksgwpF8;436Y2|3=4sn&{yMPkw+c+A7D
zVYs<Ps>fh>dwX->TP#Ki-z_6QX<&Kv%b9XN45aszYDwfg$0o&a`^q0jRt~r>ct1v6
zwca(0z1%&pW+#trzzpVsWH;`hud|Jfv$OM_5$2QdDkY|*m{NK1WNNAagaEJyp^ooK
zt2Df@MW`dzQC*u0nx8>Atpj%U_V%FlF+&6bK{lbIu(5M+se!P7^@Is%8sZJA=ecjX
z`yH>)fub5bKRP?f<EFs@#Wg38gKfGkh&jH{fuK2X@EB47wU#dK`c`82fkCa0i7Ah3
z;f|*k3xmh}jEp(!G!xowh;&GsKk>vQz-B9?=A%8qVWU+ElVT_H?nkk|;;mj!X29hY
z6$JoS55)kc;X$_FT4$|Ebw1k6M3!!2PDvsoo%a_yz@T>z)mcgD3+FVih0$&}M1O+m
z*)%}Qc^RDvKEwug0$rUK)+j9-h}J|c*P;cV^_oFPK<iTF{;w7<5gIq}ULJCxeKw`d
zYs~UresUi%(z0jtRzqzpocjTfO{77>rJTaP0i+4_UbyX74Fn9sc_LAWCJS!|t*qH4
zggeLDVFo8*YT5yMvTpI&`x1#R(JkTIF&3U8Iz*!&siuwjh)KxMg0wn-pm%m*lbRL_
zqDG`FP{LGk7sgx7D#o7Bjwh-E@?hihV8_T9M-NQ&mgh(B>jg*{*qXxk8QK+4S5FP3
z$}{*IHtl7cUpH0SLa&Y@F35O|z~HC-c=xn(@XA?1&+6MjWsu4?|JIx&`FL(6tOGp3
z<8;Cv1ktMqLT2qeqbYaPLV$DA_@)DcQy9D7g&jK6qZnJu1{!S5MOIGjt+!O)k=g@T
zFm+JDF}dJ<q${xNNfrw#26QsW=}`DH^xGxl$E-P$<m>!)ajCjd`P8M?Iq}m}hZg3b
zXC53FGl(E6n9w$3d!$h>dEceOk8-ijp({WukGFTqwwgh6`Mo7^u|G%`JWM8WM>9w!
ztpDMM8(SyN3H^{_e#njgs7AQj+lQ!&7l#f$+h3_JXq<cI25X*On8UK%Pl{J6OWqbg
z-2FFio`;xT`rx;L$ym}~edbI;ibhPKAfFvmqiH#L^%3HY3xTQTs21;y1DuI#r2dT5
zCgu+5c=S9D<xvDh@oh6^?x2FltIu8`3{VPyi;KhYtM42?#l)WE1c`PW|C}bu>z_Z2
zpPw+pY$a7#UVUP<;^N{;D=M^1O-)JX#|CyBOk44hKfZLETwa;Tnx%Jt1{f9o86xaS
pwQ%#`V-L68x^=76CnWurs$Z&ZYFq4~SUWngy{)57>NhL@;~#<<U5o$#

literal 0
HcmV?d00001

diff --git a/arch/arm/ARMnn/docs/stylesheet.css b/arch/arm/ARMnn/docs/stylesheet.css
new file mode 100644
index 0000000000..ea7f4dccc0
--- /dev/null
+++ b/arch/arm/ARMnn/docs/stylesheet.css
@@ -0,0 +1,213 @@
+/* Copyright (c) 2020 ARM Limited. */
+/* Changes to tabs.css */
+
+.tabs, .tabs2, .tabs3 {
+    position: relative;
+}
+
+.tablist li {
+    line-height: 32px;
+}
+
+.tablist a {
+    color: #FFFFFF;
+    text-shadow: none;
+}
+
+.tablist a:hover {
+    text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0);
+    text-decoration: none;
+    background-repeat: no-repeat;
+    background-image: url('tab_s.png');;
+}
+
+.tablist li.current a {
+    text-shadow: none;
+}
+
+/* Changes to navtree.css */
+
+#nav-tree .selected {
+    background-image: url('tab_a_nav.png');
+    border-radius: 15px;
+    text-shadow: none;
+}
+
+#nav-tree .label a {
+    color: #444444;
+}
+
+#nav-tree .selected a {
+    color: #007fa3;
+    font-weight: bold
+}
+
+#nav-tree {
+    background-color: #fafafa;
+}
+
+#doc-content {
+    background-color: #fafafa;
+}
+
+.ui-resizable-e {
+    background: none;
+    background-color : lightgray;
+    width:4px;
+}
+
+#nav-tree {
+    background-image: none;
+    background-color: #fafafa;
+}
+
+
+/* Changes to doxygen.css */
+
+h2.groupheader {
+    border-bottom: 1px solid #979797;
+    color: #4C4C4C;
+}
+
+h1, h2, h3, h4, h5, h6 {
+    font-weight : normal;
+}
+
+h1.glow, h2.glow, h3.glow, h4.glow, h5.glow, h6.glow {
+    text-shadow: 0 0 15px #007fa3;
+}
+
+div.qindex, div.navtab{
+    background-color: #EBEBEB;
+    border: 1px solid #B4B4B4;
+}
+
+div.qindex, div.navpath {
+    position : relative;
+}
+
+a {
+    color: #444444;
+}
+
+.contents a:visited {
+    color: #666666;
+}
+
+a.qindexHL {
+    background-color: #AFAFAf;
+    border: 1px double #9D9D9D;
+}
+
+a.code, a.code:visited {
+    color: #444444;
+}
+
+a.codeRef, a.codeRef:visited {
+    color: #444444;
+}
+
+div.fragment {
+    background-color: #FCFCFC;
+    border: 1px solid #CFCFCF;
+    padding: 8px;
+    margin: 10px 2px;
+}
+
+div.line.glow {
+    background-color: #007fa3;
+}
+
+body {
+    background-color: #EEE;
+}
+
+.memberdecls td.glow, .fieldtable tr.glow {
+    background-color: #007fa3;
+}
+
+.memproto, dl.reflist dt {
+    border-top: 1px solid #B8B8B8;
+    border-left: 1px solid #B8B8B8;
+    border-right: 1px solid #B8B8B8;
+    color: #333333;
+    background-color: #E2E2E2;
+}
+
+.memdoc, dl.reflist dd {
+    border-bottom: 1px solid #B8B8B8;
+    border-left: 1px solid #B8B8B8;
+    border-right: 1px solid #B8B8B8;
+    background-color: #FCFCFC;
+}
+
+table.doxtable td, table.doxtable th {
+    border: 1px solid #2D2D2D;
+}
+
+table.doxtable th {
+    background-color: #373737;
+}
+
+.navpath li.navelem a
+{
+    color: white;
+    text-shadow: none;
+}
+
+.navpath li.navelem a:hover
+{
+    color:white;
+    text-shadow : 0px 1px 1px rgba(0, 0, 0, 1.0);
+}
+
+dl.note
+{
+    border-color: #f68a33;
+}
+
+#projectname
+{
+    font: 200% Tahoma, Arial,sans-serif;
+    color : #676767;
+    overflow:hidden;
+}
+
+#projectname #armdevcenter
+{
+    float:right;
+    padding-right: 20px;
+}
+
+#eula
+{
+    font-size: 80%;
+    font-weight: bold;
+}
+
+#titlearea
+{
+    background-color : white;
+    border-top: 5px solid white;
+    border-left: 10px solid white;
+    border-bottom: none;
+}
+
+a.copyright {
+    color: #FFFFFF;
+}
+
+a.copyright:hover {
+    color: #FFFFFF;
+}
+
+a.copyright:visited {
+    color: #FFFFFF;
+}
+
+div.toc h3 {
+    font: bold 12px/1.2 Arial,FreeSans,sans-serif;
+    color: #007fa3;
+    border-bottom: 0 none;
+    margin: 0;
+}
diff --git a/arch/arm/ARMnn/include/armnn/ArmNN.hpp b/arch/arm/ARMnn/include/armnn/ArmNN.hpp
new file mode 100644
index 0000000000..e4d5ce1fa1
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/ArmNN.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "BackendId.hpp"
+#include "Descriptors.hpp"
+#include "Exceptions.hpp"
+#include "INetwork.hpp"
+#include "IRuntime.hpp"
+#include "IWorkingMemHandle.hpp"
+#include "LstmParams.hpp"
+#include "Optional.hpp"
+#include "QuantizedLstmParams.hpp"
+#include "Tensor.hpp"
+#include "Types.hpp"
+#include "TypesUtils.hpp"
+#include "Utils.hpp"
+#include "Version.hpp"
diff --git a/arch/arm/ARMnn/include/armnn/BackendHelper.hpp b/arch/arm/ARMnn/include/armnn/BackendHelper.hpp
new file mode 100644
index 0000000000..0c625a6062
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/BackendHelper.hpp
@@ -0,0 +1,459 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/BackendId.hpp>
+#include <armnn/BackendOptions.hpp>
+#include <armnn/backends/ILayerSupport.hpp>
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+// This handle calls its own IsXXXLayerSupported() functions which then call the polymorphic
+// ILayerSupport::IsXXXLayerSupported() at the framework level so there is no risk of VTable misalignment.
+// This is to make ILayerSupport in its abstract form a solely Backend interface alongside a
+// separate ABI stable frontend class free of virtual functions via an added layer of indirection.
+class LayerSupportHandle
+{
+public:
+    explicit LayerSupportHandle(std::shared_ptr<ILayerSupport> layerSupport)
+        : m_LayerSupport(std::move(layerSupport)), m_BackendId(Compute::Undefined) {};
+
+    explicit LayerSupportHandle(std::shared_ptr<ILayerSupport> layerSupport, const BackendId& backendId)
+        : m_LayerSupport(std::move(layerSupport)), m_BackendId(backendId) {};
+
+    bool IsBackendRegistered() const;
+
+    bool IsActivationSupported(const TensorInfo& input,
+                               const TensorInfo& output,
+                               const ActivationDescriptor& descriptor,
+                               Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsAdditionSupported(const TensorInfo& input0,
+                             const TensorInfo& input1,
+                             const TensorInfo& output,
+                             Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsArgMinMaxSupported(const TensorInfo& input,
+                              const TensorInfo& output,
+                              const ArgMinMaxDescriptor& descriptor,
+                              Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsBatchNormalizationSupported(const TensorInfo& input,
+                                       const TensorInfo& output,
+                                       const TensorInfo& mean,
+                                       const TensorInfo& var,
+                                       const TensorInfo& beta,
+                                       const TensorInfo& gamma,
+                                       const BatchNormalizationDescriptor& descriptor,
+                                       Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsBatchToSpaceNdSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const BatchToSpaceNdDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsCastSupported(const TensorInfo& input,
+                         const TensorInfo& output,
+                         Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsChannelShuffleSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const ChannelShuffleDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsComparisonSupported(const TensorInfo& input0,
+                               const TensorInfo& input1,
+                               const TensorInfo& output,
+                               const ComparisonDescriptor& descriptor,
+                               Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsConcatSupported(const std::vector<const TensorInfo*> inputs,
+                           const TensorInfo& output,
+                           const OriginsDescriptor& descriptor,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsConstantSupported(const TensorInfo& output,
+                             Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsConvertBf16ToFp32Supported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsConvertFp32ToBf16Supported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsConvertFp16ToFp32Supported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsConvertFp32ToFp16Supported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsConvolution2dSupported(const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  const Convolution2dDescriptor& descriptor,
+                                  const TensorInfo& weights,
+                                  const Optional<TensorInfo>& biases,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsConvolution3dSupported(const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  const Convolution3dDescriptor& descriptor,
+                                  const TensorInfo& weights,
+                                  const Optional<TensorInfo>& biases,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsDebugSupported(const TensorInfo& input,
+                          const TensorInfo& output,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsDepthToSpaceSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const DepthToSpaceDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsDepthwiseConvolutionSupported(
+            const TensorInfo& input,
+            const TensorInfo& output,
+            const DepthwiseConvolution2dDescriptor& descriptor,
+            const TensorInfo& weights,
+            const Optional<TensorInfo>& biases,
+            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsDequantizeSupported(const TensorInfo& input,
+                               const TensorInfo& output,
+                               Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+                                         const TensorInfo& scores,
+                                         const TensorInfo& anchors,
+                                         const TensorInfo& detectionBoxes,
+                                         const TensorInfo& detectionClasses,
+                                         const TensorInfo& detectionScores,
+                                         const TensorInfo& numDetections,
+                                         const DetectionPostProcessDescriptor& descriptor,
+                                         Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsDilatedDepthwiseConvolutionSupported(
+            const TensorInfo& input,
+            const TensorInfo& output,
+            const DepthwiseConvolution2dDescriptor& descriptor,
+            const TensorInfo& weights,
+            const Optional<TensorInfo>& biases,
+            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsDivisionSupported(const TensorInfo& input0,
+                             const TensorInfo& input1,
+                             const TensorInfo& output,
+                             Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsElementwiseUnarySupported(const TensorInfo& input,
+                                     const TensorInfo& output,
+                                     const ElementwiseUnaryDescriptor& descriptor,
+                                     Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsFakeQuantizationSupported(const TensorInfo& input,
+                                     const FakeQuantizationDescriptor& descriptor,
+                                     Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsFillSupported(const TensorInfo& input,
+                         const TensorInfo& output,
+                         const FillDescriptor& descriptor,
+                         Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsFloorSupported(const TensorInfo& input,
+                          const TensorInfo& output,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsFullyConnectedSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const TensorInfo& weights,
+                                   const TensorInfo& biases,
+                                   const FullyConnectedDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsGatherSupported(const TensorInfo& input0,
+                           const TensorInfo& input1,
+                           const TensorInfo& output,
+                           const GatherDescriptor& descriptor,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsInputSupported(const TensorInfo& input,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsInstanceNormalizationSupported(
+            const TensorInfo& input,
+            const TensorInfo& output,
+            const InstanceNormalizationDescriptor& descriptor,
+            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsL2NormalizationSupported(const TensorInfo& input,
+                                    const TensorInfo& output,
+                                    const L2NormalizationDescriptor& descriptor,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsLogicalBinarySupported(const TensorInfo& input0,
+                                  const TensorInfo& input1,
+                                  const TensorInfo& output,
+                                  const LogicalBinaryDescriptor& descriptor,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsLogicalUnarySupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const ElementwiseUnaryDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsLogSoftmaxSupported(const TensorInfo& input,
+                               const TensorInfo& output,
+                               const LogSoftmaxDescriptor& descriptor,
+                               Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsLstmSupported(const TensorInfo& input,
+                         const TensorInfo& outputStateIn,
+                         const TensorInfo& cellStateIn,
+                         const TensorInfo& scratchBuffer,
+                         const TensorInfo& outputStateOut,
+                         const TensorInfo& cellStateOut,
+                         const TensorInfo& output,
+                         const LstmDescriptor& descriptor,
+                         const LstmInputParamsInfo& paramsInfo,
+                         Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsMaximumSupported(const TensorInfo& input0,
+                            const TensorInfo& input1,
+                            const TensorInfo& output,
+                            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsMeanSupported(const TensorInfo& input,
+                         const TensorInfo& output,
+                         const MeanDescriptor& descriptor,
+                         Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsMemCopySupported(const TensorInfo& input,
+                            const TensorInfo& output,
+                            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsMemImportSupported(const TensorInfo& input,
+                              const TensorInfo& output,
+                              Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsMergeSupported(const TensorInfo& input0,
+                          const TensorInfo& input1,
+                          const TensorInfo& output,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsMinimumSupported(const TensorInfo& input0,
+                            const TensorInfo& input1,
+                            const TensorInfo& output,
+                            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsMultiplicationSupported(const TensorInfo& input0,
+                                   const TensorInfo& input1,
+                                   const TensorInfo& output,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsNormalizationSupported(const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  const NormalizationDescriptor& descriptor,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsOutputSupported(const TensorInfo& output,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsPadSupported(const TensorInfo& input,
+                        const TensorInfo& output,
+                        const PadDescriptor& descriptor,
+                        Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsPermuteSupported(const TensorInfo& input,
+                            const TensorInfo& output,
+                            const PermuteDescriptor& descriptor,
+                            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsPooling2dSupported(const TensorInfo& input,
+                              const TensorInfo& output,
+                              const Pooling2dDescriptor& descriptor,
+                              Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsPooling3dSupported(const TensorInfo& input,
+                              const TensorInfo& output,
+                              const Pooling3dDescriptor& descriptor,
+                              Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsPreCompiledSupported(const TensorInfo& input,
+                                const PreCompiledDescriptor& descriptor,
+                                Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsPreluSupported(const TensorInfo& input,
+                          const TensorInfo& alpha,
+                          const TensorInfo& output,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsQuantizeSupported(const TensorInfo& input,
+                             const TensorInfo& output,
+                             Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsQLstmSupported(const TensorInfo& input,
+                          const TensorInfo& previousOutputIn,
+                          const TensorInfo& previousCellStateIn,
+                          const TensorInfo& outputStateOut,
+                          const TensorInfo& cellStateOut,
+                          const TensorInfo& output,
+                          const QLstmDescriptor& descriptor,
+                          const LstmInputParamsInfo& paramsInfo,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsQuantizedLstmSupported(const TensorInfo& input,
+                                  const TensorInfo& previousCellStateIn,
+                                  const TensorInfo& previousOutputIn,
+                                  const TensorInfo& cellStateOut,
+                                  const TensorInfo& output,
+                                  const QuantizedLstmInputParamsInfo& paramsInfo,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsRankSupported(const TensorInfo& input,
+                         const TensorInfo& output,
+                         Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsReduceSupported(const TensorInfo& input,
+                           const TensorInfo& output,
+                           const ReduceDescriptor& descriptor,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsReshapeSupported(const TensorInfo& input,
+                            const TensorInfo& output,
+                            const ReshapeDescriptor& descriptor,
+                            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsResizeSupported(const TensorInfo& input,
+                           const TensorInfo& output,
+                           const ResizeDescriptor& descriptor,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsShapeSupported(const TensorInfo& input,
+                          const TensorInfo& output,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsSliceSupported(const TensorInfo& input,
+                          const TensorInfo& output,
+                          const SliceDescriptor& descriptor,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsSoftmaxSupported(const TensorInfo& input,
+                            const TensorInfo& output,
+                            const SoftmaxDescriptor& descriptor,
+                            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsSpaceToBatchNdSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const SpaceToBatchNdDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsSpaceToDepthSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const SpaceToDepthDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsSplitterSupported(const TensorInfo& input,
+                             const std::vector<std::reference_wrapper<TensorInfo>>& outputs,
+                             const ViewsDescriptor& descriptor,
+                             Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsStackSupported(const std::vector<const TensorInfo*>& inputs,
+                          const TensorInfo& output,
+                          const StackDescriptor& descriptor,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsStandInSupported(const std::vector<const TensorInfo*>& inputs,
+                            const std::vector<const TensorInfo*>& outputs,
+                            const StandInDescriptor& descriptor,
+                            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+
+    bool IsStridedSliceSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const StridedSliceDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsSubtractionSupported(const TensorInfo& input0,
+                                const TensorInfo& input1,
+                                const TensorInfo& output,
+                                Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsSwitchSupported(const TensorInfo& input0,
+                           const TensorInfo& input1,
+                           const TensorInfo& output0,
+                           const TensorInfo& output1,
+                           Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsTransposeConvolution2dSupported(
+            const TensorInfo& input,
+            const TensorInfo& output,
+            const TransposeConvolution2dDescriptor& descriptor,
+            const TensorInfo& weights,
+            const Optional<TensorInfo>& biases,
+            Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsTransposeSupported(const TensorInfo& input,
+                              const TensorInfo& output,
+                              const TransposeDescriptor& descriptor,
+                              Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+    bool IsUnidirectionalSequenceLstmSupported(
+        const TensorInfo& input,
+        const TensorInfo& outputStateIn,
+        const TensorInfo& cellStateIn,
+        const TensorInfo& output,
+        const Optional<TensorInfo>& hiddenStateOutput,
+        const Optional<TensorInfo>& cellStateOutput,
+        const LstmDescriptor& descriptor,
+        const LstmInputParamsInfo& paramsInfo,
+        Optional<std::string&> reasonIfUnsupported = EmptyOptional());
+
+private:
+    std::shared_ptr<ILayerSupport> m_LayerSupport;
+    const BackendId m_BackendId;
+};
+
+/// Convenience function to retrieve the ILayerSupportHandle for a backend
+LayerSupportHandle GetILayerSupportByBackendId(const armnn::BackendId& backend);
+
+/// Convenience function to check if a capability exists in a BackendCapabilites struct
+bool HasCapability(const std::string& name,const BackendCapabilities& capabilities);
+
+/// Convenience function to check if a capability exists in a backend
+bool HasCapability(const std::string& name, const armnn::BackendId& backend);
+
+/// Convenience function to check if a given capability matches a  capability in a BackendCapabilities struct
+bool HasCapability(const BackendOptions::BackendOption& capability, const BackendCapabilities& capabilities);
+
+/// Convenience function to check if a given capability matches a  capability in a backend
+bool HasCapability(const BackendOptions::BackendOption& backendOption, const armnn::BackendId& backend);
+
+/// Returns a BackendCapability if the backend lists the capability
+/// The BackendCapability must then be inspected to check whether or not that BackendCapability is supported
+/// Otherwise returns an EmptyOptional if the BackendCapability is unlisted
+Optional<const BackendOptions::BackendOption> GetCapability(const std::string& backendCapabilityName,
+                                                            const BackendCapabilities& capabilities);
+
+/// Returns a BackendCapability if the backend lists the capability
+/// The BackendCapability must then be inspected to check whether or not that BackendCapability is supported
+/// Otherwise returns an EmptyOptional if the BackendCapability is unlisted
+Optional<const BackendOptions::BackendOption> GetCapability(const std::string& backendCapabilityName,
+                                                            const armnn::BackendId& backend);
+
+/// Convenience function to check a capability on a backend
+ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated in favour of GetBackendCapability", "22.05")
+bool IsCapabilitySupported(const armnn::BackendId& backend, armnn::BackendCapability capability);
+
+/// Returns the number of cached files if backend supports caching
+unsigned int GetNumberOfCacheFiles(const armnn::BackendId& backend);
+
+}
diff --git a/arch/arm/ARMnn/include/armnn/BackendId.hpp b/arch/arm/ARMnn/include/armnn/BackendId.hpp
new file mode 100644
index 0000000000..5849dea8fb
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/BackendId.hpp
@@ -0,0 +1,196 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <memory>
+#include <ostream>
+#include <set>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+namespace armnn
+{
+
+///
+/// The Compute enum is now deprecated and it is now
+/// being replaced by BackendId
+///
+enum class Compute
+{
+    Undefined = 0,
+    /// CPU Execution: Reference C++ kernels
+    CpuRef    = 1,
+    /// CPU Execution: NEON: ArmCompute
+    CpuAcc    = 2,
+    /// GPU Execution: OpenCL: ArmCompute
+    GpuAcc    = 3
+};
+
+/// Deprecated function that will be removed together with
+/// the Compute enum
+constexpr char const* GetComputeDeviceAsCString(Compute compute)
+{
+    switch (compute)
+    {
+        case armnn::Compute::CpuRef: return "CpuRef";
+        case armnn::Compute::CpuAcc: return "CpuAcc";
+        case armnn::Compute::GpuAcc: return "GpuAcc";
+        default:                     return "Unknown";
+    }
+}
+
+/// Deprecated function that will be removed together with
+/// the Compute enum
+inline std::ostream& operator<<(std::ostream& os, const std::vector<Compute>& compute)
+{
+    for (const Compute& comp : compute)
+    {
+        os << GetComputeDeviceAsCString(comp) << " ";
+    }
+    return os;
+}
+
+/// Deprecated function that will be removed together with
+/// the Compute enum
+inline std::ostream& operator<<(std::ostream& os, const std::set<Compute>& compute)
+{
+    for (const Compute& comp : compute)
+    {
+        os << GetComputeDeviceAsCString(comp) << " ";
+    }
+    return os;
+}
+
+/// Deprecated function that will be removed together with
+/// the Compute enum
+inline std::ostream& operator<<(std::ostream& os, const Compute& compute)
+{
+    os << GetComputeDeviceAsCString(compute);
+    return os;
+}
+
+class BackendId final
+{
+public:
+    BackendId() : m_Id(GetComputeDeviceAsCString(Compute::Undefined)) {}
+    BackendId(const std::string& id) : m_Id{id} {}
+    BackendId(const char* id) : m_Id{id} {}
+
+
+    BackendId(const BackendId& other) = default;
+    BackendId(BackendId&& other) = default;
+    BackendId& operator=(const BackendId& other) = default;
+    BackendId& operator=(BackendId&& other) = default;
+    ~BackendId(){}
+
+    /// Deprecated function that will be removed together with
+    /// the Compute enum
+    BackendId(Compute compute) : m_Id{GetComputeDeviceAsCString(compute)} {}
+
+    operator std::string() const { return m_Id; }
+    BackendId& operator=(const std::string& other)
+    {
+        m_Id = other;
+        return *this;
+    }
+
+    /// Deprecated function that will be removed together with
+    /// the Compute enum
+    BackendId& operator=(Compute compute)
+    {
+        BackendId temp{compute};
+        std::swap(temp.m_Id, m_Id);
+        return *this;
+    }
+
+    bool operator==(const BackendId& other) const
+    {
+        return m_Id == other.m_Id;
+    }
+
+    /// comparison against objects from which the
+    /// BackendId can be constructed
+    template <typename O>
+    bool operator==(const O& other) const
+    {
+        BackendId temp{other};
+        return *this == temp;
+    }
+
+    template <typename O>
+    bool operator!=(const O& other) const
+    {
+        return !(*this == other);
+    }
+
+    bool operator<(const BackendId& other) const
+    {
+        return m_Id < other.m_Id;
+    }
+
+    bool IsCpuRef() const { return m_Id == GetComputeDeviceAsCString(Compute::CpuRef); }
+    bool IsCpuAcc() const { return m_Id == GetComputeDeviceAsCString(Compute::CpuAcc); }
+    bool IsGpuAcc() const { return m_Id == GetComputeDeviceAsCString(Compute::GpuAcc); }
+
+    const std::string& Get() const { return m_Id; }
+
+    bool IsEmpty() const { return m_Id.empty(); }
+    bool IsUndefined() const { return m_Id == GetComputeDeviceAsCString(Compute::Undefined); }
+
+private:
+    std::string m_Id;
+};
+
+} // namespace armnn
+
+namespace std
+{
+
+/// make BackendId compatible with std hashtables by reusing the hash
+/// function for strings.
+/// Note this must come *before* the first use of unordered_set<BackendId>.
+template <>
+struct hash<armnn::BackendId>
+{
+    std::size_t operator()(const armnn::BackendId& id) const noexcept
+    {
+        std::hash<std::string> hasher;
+        return hasher(id.Get());
+    }
+};
+
+} // namespace std
+
+namespace armnn
+{
+
+namespace profiling
+{
+    // Static constant describing ArmNN as a dummy backend
+    static const BackendId BACKEND_ID("ARMNN");
+} // profiling
+
+inline std::ostream& operator<<(std::ostream& os, const BackendId& id)
+{
+    os << id.Get();
+    return os;
+}
+
+template <template <typename...> class TContainer, typename... TContainerTemplateArgs>
+std::ostream& operator<<(std::ostream& os,
+                         const TContainer<BackendId, TContainerTemplateArgs...>& ids)
+{
+    os << '[';
+    for (const auto& id : ids) { os << id << " "; }
+    os << ']';
+    return os;
+}
+
+using BackendIdVector = std::vector<BackendId>;
+using BackendIdSet    = std::unordered_set<BackendId>;
+
+} // namespace armnn
+
diff --git a/arch/arm/ARMnn/include/armnn/BackendOptions.hpp b/arch/arm/ARMnn/include/armnn/BackendOptions.hpp
new file mode 100644
index 0000000000..b7e2c3e8cc
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/BackendOptions.hpp
@@ -0,0 +1,339 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "BackendId.hpp"
+#include <armnn/Exceptions.hpp>
+#include <cassert>
+
+namespace armnn
+{
+
+struct BackendOptions;
+using NetworkOptions = std::vector<BackendOptions>;
+
+using ModelOptions = std::vector<BackendOptions>;
+using BackendCapabilities = BackendOptions;
+
+/// Struct for the users to pass backend specific options
+struct BackendOptions
+{
+private:
+    template<typename T>
+    struct CheckAllowed
+    {
+        static const bool value = std::is_same<T, int>::value ||
+                                  std::is_same<T, unsigned int>::value ||
+                                  std::is_same<T, float>::value ||
+                                  std::is_same<T, bool>::value ||
+                                  std::is_same<T, std::string>::value ||
+                                  std::is_same<T, const char*>::value;
+    };
+public:
+
+    /// Very basic type safe variant
+    class Var
+    {
+
+    public:
+        /// Constructors
+        explicit Var(int i) : m_Vals(i), m_Type(VarTypes::Integer) {};
+        explicit Var(unsigned int u) : m_Vals(u), m_Type(VarTypes::UnsignedInteger) {};
+        explicit Var(float f) : m_Vals(f), m_Type(VarTypes::Float) {};
+        explicit Var(bool b) : m_Vals(b), m_Type(VarTypes::Boolean) {};
+        explicit Var(const char* s) : m_Vals(s), m_Type(VarTypes::String) {};
+        explicit Var(std::string s) : m_Vals(s), m_Type(VarTypes::String) {};
+
+        /// Disallow implicit conversions from types not explicitly allowed below.
+        template<typename DisallowedType>
+        Var(DisallowedType)
+        {
+            static_assert(CheckAllowed<DisallowedType>::value, "Type is not allowed for Var<DisallowedType>.");
+            assert(false && "Unreachable code");
+        }
+
+        /// Copy Construct
+        Var(const Var& other)
+            : m_Type(other.m_Type)
+        {
+            switch(m_Type)
+            {
+                case VarTypes::String:
+                {
+                    new (&m_Vals.s) std::string(other.m_Vals.s);
+                    break;
+                }
+                default:
+                {
+                    DoOp(other, [](auto& a, auto& b)
+                        {
+                            a = b;
+                        });
+                    break;
+                }
+            }
+        }
+
+        /// Copy operator
+        Var& operator=(const Var& other)
+        {
+            // Destroy existing string
+            if (m_Type == VarTypes::String)
+            {
+                Destruct(m_Vals.s);
+            }
+
+            m_Type = other.m_Type;
+            switch(m_Type)
+            {
+                case VarTypes::String:
+                {
+
+                    new (&m_Vals.s) std::string(other.m_Vals.s);
+                    break;
+                }
+                default:
+                {
+                    DoOp(other, [](auto& a, auto& b)
+                        {
+                            a = b;
+                        });
+                    break;
+                }
+            }
+
+            return *this;
+        };
+
+        /// Type getters
+        bool IsBool() const { return m_Type == VarTypes::Boolean; }
+        bool IsInt() const { return m_Type == VarTypes::Integer; }
+        bool IsUnsignedInt() const { return m_Type == VarTypes::UnsignedInteger; }
+        bool IsFloat() const { return m_Type == VarTypes::Float; }
+        bool IsString() const { return m_Type == VarTypes::String; }
+
+        /// Value getters
+        bool AsBool() const { assert(IsBool()); return m_Vals.b; }
+        int AsInt() const { assert(IsInt()); return m_Vals.i; }
+        unsigned int AsUnsignedInt() const { assert(IsUnsignedInt()); return m_Vals.u; }
+        float AsFloat() const { assert(IsFloat()); return m_Vals.f; }
+        std::string AsString() const { assert(IsString()); return m_Vals.s; }
+        std::string ToString()
+        {
+            if (IsBool()) { return AsBool() ? "true" : "false"; }
+            else if (IsInt()) { return std::to_string(AsInt()); }
+            else if (IsUnsignedInt()) { return std::to_string(AsUnsignedInt()); }
+            else if (IsFloat()) { return std::to_string(AsFloat()); }
+            else if (IsString()) { return AsString(); }
+            else
+            {
+                throw armnn::InvalidArgumentException("Unknown data type for string conversion");
+            }
+        }
+
+        /// Destructor
+        ~Var()
+        {
+            DoOp(*this, [this](auto& a, auto&)
+                {
+                    Destruct(a);
+                });
+        }
+    private:
+        template<typename Func>
+        void DoOp(const Var& other, Func func)
+        {
+            if (other.IsBool())
+            {
+                func(m_Vals.b, other.m_Vals.b);
+            }
+            else if (other.IsInt())
+            {
+                func(m_Vals.i, other.m_Vals.i);
+            }
+            else if (other.IsUnsignedInt())
+            {
+                func(m_Vals.u, other.m_Vals.u);
+            }
+            else if (other.IsFloat())
+            {
+                func(m_Vals.f, other.m_Vals.f);
+            }
+            else if (other.IsString())
+            {
+                func(m_Vals.s, other.m_Vals.s);
+            }
+        }
+
+        template<typename Destructable>
+        void Destruct(Destructable& d)
+        {
+            if (std::is_destructible<Destructable>::value)
+            {
+                d.~Destructable();
+            }
+        }
+
+    private:
+        /// Types which can be stored
+        enum class VarTypes
+        {
+            Boolean,
+            Integer,
+            Float,
+            String,
+            UnsignedInteger
+        };
+
+        /// Union of potential type values.
+        union Vals
+        {
+            int i;
+            unsigned int u;
+            float f;
+            bool b;
+            std::string s;
+
+            Vals(){}
+            ~Vals(){}
+
+            explicit Vals(int i) : i(i) {};
+            explicit Vals(unsigned int u) : u(u) {};
+            explicit Vals(float f) : f(f) {};
+            explicit Vals(bool b) : b(b) {};
+            explicit Vals(const char* s) : s(std::string(s)) {}
+            explicit Vals(std::string s) : s(s) {}
+       };
+
+        Vals m_Vals;
+        VarTypes m_Type;
+    };
+
+    struct BackendOption
+    {
+    public:
+        BackendOption(std::string name, bool value)
+            : m_Name(name), m_Value(value)
+        {}
+        BackendOption(std::string name, int value)
+            : m_Name(name), m_Value(value)
+        {}
+        BackendOption(std::string name, unsigned int value)
+                : m_Name(name), m_Value(value)
+        {}
+        BackendOption(std::string name, float value)
+            : m_Name(name), m_Value(value)
+        {}
+        BackendOption(std::string name, std::string value)
+            : m_Name(name), m_Value(value)
+        {}
+        BackendOption(std::string name, const char* value)
+            : m_Name(name), m_Value(value)
+        {}
+
+        template<typename DisallowedType>
+        BackendOption(std::string, DisallowedType)
+            : m_Value(0)
+        {
+            static_assert(CheckAllowed<DisallowedType>::value, "Type is not allowed for BackendOption.");
+            assert(false && "Unreachable code");
+        }
+
+        BackendOption(const BackendOption& other) = default;
+        BackendOption(BackendOption&& other) = default;
+        BackendOption& operator=(const BackendOption& other) = default;
+        BackendOption& operator=(BackendOption&& other) = default;
+        ~BackendOption() = default;
+
+        std::string GetName() const   { return m_Name; }
+        Var GetValue() const          { return m_Value; }
+
+    private:
+        std::string m_Name;         ///< Name of the option
+        Var         m_Value;        ///< Value of the option. (Bool, int, Float, String)
+    };
+
+    explicit BackendOptions(BackendId backend)
+        : m_TargetBackend(backend)
+    {}
+
+    BackendOptions(BackendId backend, std::initializer_list<BackendOption> options)
+        : m_TargetBackend(backend)
+        , m_Options(options)
+    {}
+
+    BackendOptions(const BackendOptions& other) = default;
+    BackendOptions(BackendOptions&& other) = default;
+    BackendOptions& operator=(const BackendOptions& other) = default;
+    BackendOptions& operator=(BackendOptions&& other) = default;
+
+    void AddOption(BackendOption&& option)
+    {
+        m_Options.push_back(option);
+    }
+
+    void AddOption(const BackendOption& option)
+    {
+        m_Options.push_back(option);
+    }
+
+    const BackendId& GetBackendId() const noexcept { return m_TargetBackend; }
+    size_t GetOptionCount() const noexcept { return m_Options.size(); }
+    const BackendOption& GetOption(size_t idx) const { return m_Options[idx]; }
+
+private:
+    /// The id for the backend to which the options should be passed.
+    BackendId m_TargetBackend;
+
+    /// The array of options to pass to the backend context
+    std::vector<BackendOption> m_Options;
+};
+
+
+template <typename F>
+void ParseOptions(const std::vector<BackendOptions>& options, BackendId backend, F f)
+{
+    for (auto optionsGroup : options)
+    {
+        if (optionsGroup.GetBackendId() == backend)
+        {
+            for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
+            {
+                const BackendOptions::BackendOption option = optionsGroup.GetOption(i);
+                f(option.GetName(), option.GetValue());
+            }
+        }
+    }
+}
+
+inline bool ParseBooleanBackendOption(const armnn::BackendOptions::Var& value, bool defaultValue)
+{
+    if (value.IsBool())
+    {
+        return value.AsBool();
+    }
+    return defaultValue;
+}
+
+inline std::string ParseStringBackendOption(const armnn::BackendOptions::Var& value, std::string defaultValue)
+{
+    if (value.IsString())
+    {
+        return value.AsString();
+    }
+    return defaultValue;
+}
+
+inline int ParseIntBackendOption(const armnn::BackendOptions::Var& value, int defaultValue)
+{
+    if (value.IsInt())
+    {
+        return value.AsInt();
+    }
+    return defaultValue;
+}
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/BackendRegistry.hpp b/arch/arm/ARMnn/include/armnn/BackendRegistry.hpp
new file mode 100644
index 0000000000..0d09607de2
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/BackendRegistry.hpp
@@ -0,0 +1,82 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Types.hpp>
+#include <armnn/BackendId.hpp>
+#include <armnn/Optional.hpp>
+#include <armnn/backends/ICustomAllocator.hpp>
+#include <armnn/backends/IMemoryOptimizerStrategy.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <unordered_map>
+#include <functional>
+
+namespace armnn
+{
+
+namespace profiling
+{
+    class ProfilingService;
+}
+class IBackendInternal;
+using IBackendInternalUniquePtr = std::unique_ptr<IBackendInternal>;
+using MemoryOptimizerStrategiesMapRef = std::unordered_map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>>;
+
+class BackendRegistry
+{
+public:
+    using PointerType = IBackendInternalUniquePtr;
+    using FactoryFunction = std::function<PointerType()>;
+
+    void Register(const BackendId& id, FactoryFunction factory);
+    bool IsBackendRegistered(const BackendId& id) const;
+    FactoryFunction GetFactory(const BackendId& id) const;
+    size_t Size() const;
+    BackendIdSet GetBackendIds() const;
+    std::string GetBackendIdsAsString() const;
+    void SetProfilingService(armnn::Optional<profiling::ProfilingService&> profilingService);
+    void RegisterAllocator(const BackendId& id, std::shared_ptr<ICustomAllocator> alloc);
+    std::unordered_map<BackendId, std::shared_ptr<ICustomAllocator>> GetAllocators();
+    void RegisterMemoryOptimizerStrategy(const BackendId& id, std::shared_ptr<IMemoryOptimizerStrategy> strategy);
+    MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies();
+
+    BackendRegistry() {}
+    virtual ~BackendRegistry() {}
+
+    struct StaticRegistryInitializer
+    {
+        StaticRegistryInitializer(BackendRegistry& instance,
+                                  const BackendId& id,
+                                  FactoryFunction factory)
+        {
+            instance.Register(id, factory);
+        }
+    };
+
+    void Deregister(const BackendId& id);
+    void DeregisterAllocator(const BackendId &id);
+    void DeregisterMemoryOptimizerStrategy(const BackendId &id);
+
+protected:
+    using FactoryStorage = std::unordered_map<BackendId, FactoryFunction>;
+
+    /// For testing only
+    static void Swap(BackendRegistry& instance, FactoryStorage& other);
+
+private:
+    BackendRegistry(const BackendRegistry&) = delete;
+    BackendRegistry& operator=(const BackendRegistry&) = delete;
+
+    FactoryStorage m_Factories;
+    armnn::Optional<profiling::ProfilingService&> m_ProfilingService;
+    std::unordered_map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomMemoryAllocatorMap;
+    std::unordered_map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;
+};
+
+BackendRegistry& BackendRegistryInstance();
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/Conversion.hpp b/arch/arm/ARMnn/include/armnn/Conversion.hpp
new file mode 100644
index 0000000000..24c1d7c85a
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Conversion.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#if __GNUC__
+#   define ARMNN_NO_CONVERSION_WARN_BEGIN \
+    _Pragma("GCC diagnostic push")  \
+    _Pragma("GCC diagnostic ignored \"-Wconversion\"") \
+    _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"")
+
+#   define ARMNN_NO_CONVERSION_WARN_END \
+    _Pragma("GCC diagnostic pop")
+
+#elif __clang__
+#   define ARMNN_NO_CONVERSION_WARN_BEGIN \
+    _Pragma("clang diagnostic push")  \
+    _Pragma("clang diagnostic ignored \"-Wconversion\"") \
+    _Pragma("clang diagnostic ignored \"-Wsign-conversion\"")
+
+#   define ARMNN_NO_CONVERSION_WARN_END \
+    _Pragma("clang diagnostic pop")
+
+#elif defined (_MSC_VER)
+#   define ARMNN_NO_CONVERSION_WARN_BEGIN \
+    __pragma(warning( push )) \
+    __pragma(warning(disable : 4101)) \
+    __pragma(warning(disable : 4267))
+
+#   define ARMNN_NO_CONVERSION_WARN_END \
+    __pragma(warning( pop ))
+
+#else
+#   define ARMNN_NO_CONVERSION_WARN_BEGIN
+#   define ARMNN_NO_CONVERSION_WARN_END
+#endif
+
+#define ARMNN_SUPRESS_CONVERSION_WARNING(func) \
+ARMNN_NO_CONVERSION_WARN_BEGIN \
+func; \
+ARMNN_NO_CONVERSION_WARN_END
diff --git a/arch/arm/ARMnn/include/armnn/Deprecated.hpp b/arch/arm/ARMnn/include/armnn/Deprecated.hpp
new file mode 100644
index 0000000000..6b7fec6676
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Deprecated.hpp
@@ -0,0 +1,55 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#if __GNUC__
+#   define ARMNN_NO_DEPRECATE_WARN_BEGIN \
+    _Pragma("GCC diagnostic push")  \
+    _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+
+#   define ARMNN_NO_DEPRECATE_WARN_END \
+    _Pragma("GCC diagnostic pop")
+
+#elif __clang__
+#   define ARMNN_NO_DEPRECATE_WARN_BEGIN \
+    _Pragma("clang diagnostic push")  \
+    _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
+
+#   define ARMNN_NO_DEPRECATE_WARN_END \
+    _Pragma("clang diagnostic pop")
+
+#elif defined (_MSC_VER)
+#   define ARMNN_NO_DEPRECATE_WARN_BEGIN \
+    __pragma(warning( push )) \
+    __pragma(warning(disable : 4996))
+
+#   define ARMNN_NO_DEPRECATE_WARN_END \
+    __pragma(warning( pop ))
+
+#else
+#   define ARMNN_NO_DEPRECATE_WARN_BEGIN
+#   define ARMNN_NO_DEPRECATE_WARN_END
+#endif
+
+#define ARMNN_SUPPRESS_DEPRECATE_WARNING(func) \
+ARMNN_NO_DEPRECATE_WARN_BEGIN \
+func; \
+ARMNN_NO_DEPRECATE_WARN_END
+
+#define ARMNN_DEPRECATED [[deprecated]]
+#define ARMNN_DEPRECATED_MSG(message) [[deprecated(message)]]
+#define ARMNN_DEPRECATED_MSG_REMOVAL_DATE(message, removed_in_release) \
+[[deprecated("Expected to be removed in release " #removed_in_release ". " message)]]
+#define ARMNN_DEPRECATED_MSG_CHANGE_DATE(message, signature_changed_in_release) \
+[[deprecated("Expected to have signature changed in release " #signature_changed_in_release ". " message)]]
+
+#if defined(__GNUC__) && (__GNUC__ < 6)
+#   define ARMNN_DEPRECATED_ENUM
+#   define ARMNN_DEPRECATED_ENUM_MSG(message)
+#else
+#   define ARMNN_DEPRECATED_ENUM ARMNN_DEPRECATED
+#   define ARMNN_DEPRECATED_ENUM_MSG(message) ARMNN_DEPRECATED_MSG(message)
+#endif
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/Descriptors.hpp b/arch/arm/ARMnn/include/armnn/Descriptors.hpp
new file mode 100644
index 0000000000..280c18e78c
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Descriptors.hpp
@@ -0,0 +1,1528 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "Deprecated.hpp"
+#include "DescriptorsFwd.hpp"
+
+#include <cstdint>
+#include <initializer_list>
+#include <iostream>
+#include <sstream>
+
+#include "Tensor.hpp"
+#include "Types.hpp"
+
+namespace armnn
+{
+
+/// Base class for all descriptors.
+struct BaseDescriptor
+{
+    virtual bool IsNull() const { return false; }
+    virtual ~BaseDescriptor() = default;
+};
+
+/// Null Descriptor used as a return value from the IConnectableLayer GetParameters method
+/// by layers which do not have a descriptor
+struct NullDescriptor : BaseDescriptor
+{
+    bool IsNull() const override { return true; }
+};
+
+/// An ActivationDescriptor for the ActivationLayer.
+struct ActivationDescriptor : BaseDescriptor
+{
+    ActivationDescriptor()
+        : m_Function(ActivationFunction::Sigmoid)
+        , m_A(0)
+        , m_B(0)
+    {}
+
+    ActivationDescriptor(armnn::ActivationFunction activation,
+                         float a = 0,
+                         float b = 0)
+            : m_Function(activation)
+            , m_A(a)
+            , m_B(b)
+    {}
+
+    bool operator ==(const ActivationDescriptor &rhs) const
+    {
+        return m_Function == rhs.m_Function && m_A == rhs.m_B && m_B == rhs.m_B;
+    }
+
+    /// @brief The activation function to use
+    /// (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
+    ActivationFunction m_Function;
+    /// Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
+    float              m_A;
+    /// Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
+    float              m_B;
+};
+
+/// An ArgMinMaxDescriptor for ArgMinMaxLayer
+struct ArgMinMaxDescriptor : BaseDescriptor
+{
+    ArgMinMaxDescriptor()
+        : m_Function(ArgMinMaxFunction::Min)
+        , m_Axis(-1)
+        , m_Output_Type(armnn::DataType::Signed32)
+    {}
+
+    bool operator ==(const ArgMinMaxDescriptor &rhs) const
+    {
+        return m_Function == rhs.m_Function && m_Axis == rhs.m_Axis && m_Output_Type == rhs.m_Output_Type;
+    }
+
+    /// Specify if the function is to find Min or Max.
+    ArgMinMaxFunction m_Function;
+    /// Axis to reduce across the input tensor.
+    int m_Axis;
+    /// Deprecated and will be removed in future release.
+    armnn::DataType m_Output_Type;
+};
+
+/// A ComparisonDescriptor for the ComparisonLayer
+struct ComparisonDescriptor : BaseDescriptor
+{
+    ComparisonDescriptor()
+        : ComparisonDescriptor(ComparisonOperation::Equal)
+    {}
+
+    ComparisonDescriptor(ComparisonOperation operation)
+        : m_Operation(operation)
+    {}
+
+    bool operator ==(const ComparisonDescriptor &rhs) const
+    {
+        return m_Operation == rhs.m_Operation;
+    }
+
+    /// Specifies the comparison operation to execute
+    ComparisonOperation m_Operation;
+};
+
+/// A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer
+struct ElementwiseUnaryDescriptor : BaseDescriptor
+{
+    ElementwiseUnaryDescriptor()
+        : ElementwiseUnaryDescriptor(UnaryOperation::Abs)
+    {}
+
+    ElementwiseUnaryDescriptor(UnaryOperation operation)
+        : m_Operation(operation)
+    {}
+
+    bool operator ==(const ElementwiseUnaryDescriptor &rhs) const
+    {
+        return m_Operation == rhs.m_Operation;
+    }
+
+    /// Specifies the elementwiseUnary operation to execute
+    UnaryOperation m_Operation;
+};
+
+/// A PermuteDescriptor for the PermuteLayer.
+struct PermuteDescriptor : BaseDescriptor
+{
+    PermuteDescriptor()
+        : m_DimMappings{}
+    {}
+
+    PermuteDescriptor(const PermutationVector& dimMappings)
+        : m_DimMappings(dimMappings)
+    {}
+
+    bool operator ==(const PermuteDescriptor &rhs) const
+    {
+        return m_DimMappings.IsEqual(rhs.m_DimMappings);
+    }
+
+    /// @brief Indicates how to translate tensor elements from a given source into the target destination, when
+    /// source and target potentially have different memory layouts e.g. {0U, 3U, 1U, 2U}.
+    PermutationVector m_DimMappings;
+};
+
+/// A SoftmaxDescriptor for the SoftmaxLayer.
+struct SoftmaxDescriptor : BaseDescriptor
+{
+    SoftmaxDescriptor()
+        : m_Beta(1.0f)
+        , m_Axis(-1)
+    {}
+
+    bool operator ==(const SoftmaxDescriptor& rhs) const
+    {
+        return m_Beta == rhs.m_Beta && m_Axis == rhs.m_Axis;
+    }
+
+    /// Exponentiation value.
+    float m_Beta;
+    /// Scalar, defaulted to the last index (-1), specifying the dimension the activation will be performed on.
+    int m_Axis;
+};
+
+/// A LogSoftmaxDescriptor for the LogSoftmaxLayer
+using LogSoftmaxDescriptor = SoftmaxDescriptor;
+
+/// @brief An OriginsDescriptor for the ConcatLayer.
+/// Descriptor to configure the concatenation process. Number of views must be equal to the number of inputs, and
+/// their order must match - e.g. first view corresponds to the first input, second view to the second input, etc.
+struct OriginsDescriptor : BaseDescriptor
+{
+    OriginsDescriptor();
+    OriginsDescriptor(uint32_t numViews, uint32_t numDimensions = 4);
+    OriginsDescriptor(const OriginsDescriptor& other);
+    OriginsDescriptor(OriginsDescriptor&& other);
+
+    ~OriginsDescriptor();
+
+    OriginsDescriptor& operator=(OriginsDescriptor rhs);
+
+    bool operator ==(const OriginsDescriptor& rhs) const;
+
+    /// @Brief Set the view origin coordinates. The arguments are: view, dimension, value.
+    /// If the view is greater than or equal to GetNumViews(), then the view argument is out of range.
+    /// If the coord is greater than or equal to GetNumDimensions(), then the coord argument is out of range.
+    Status SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value);
+    /// Get the number of views.
+    uint32_t GetNumViews() const;
+    /// Get the number of dimensions.
+    uint32_t GetNumDimensions() const;
+    /// Return the view origin at the int value idx.
+    const uint32_t* GetViewOrigin(uint32_t idx) const;
+    /// @brief Reorders the viewOrigins in accordance with the indices presented in newOrdering array.
+    /// The number of views must match number of elements in the new ordering array.
+    void ReorderOrigins(unsigned int*  newOrdering, unsigned int numNewOrdering);
+    /// Swap the ViewsDescriptor values first and second.
+    friend void swap(OriginsDescriptor& first, OriginsDescriptor& second);
+    /// Set the concatenation axis value.
+    void SetConcatAxis(unsigned int concatAxis);
+    /// Get the concatenation axis value.
+    unsigned int GetConcatAxis() const;
+
+private:
+    unsigned int m_ConcatAxis;
+    uint32_t     m_NumViews;
+    uint32_t     m_NumDimensions;
+    uint32_t**   m_ViewOrigins;
+};
+
+/// @brief A ViewsDescriptor for the SplitterLayer.
+/// Descriptor to configure the splitting process. Number of Views must be equal to the number of outputs, and
+/// their order must match - e.g. first view corresponds to the first output, second view to the second output, etc.
+struct ViewsDescriptor : BaseDescriptor
+{
+    ViewsDescriptor(uint32_t numViews, uint32_t numDimensions = 4);
+    ViewsDescriptor(const ViewsDescriptor& other);
+    ViewsDescriptor();
+    ViewsDescriptor(ViewsDescriptor&& other);
+
+    ~ViewsDescriptor();
+
+    ViewsDescriptor& operator=(ViewsDescriptor rhs);
+
+    bool operator ==(const ViewsDescriptor& rhs) const;
+
+    /// @Brief Set the view origin coordinates. The arguments are: view, dimension, value.
+    /// If the view is greater than or equal to GetNumViews(), then the view argument is out of range.
+    /// If the coord is greater than or equal to GetNumDimensions(), then the coord argument is out of range.
+    Status SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value);
+    /// @brief Set the size of the views. The arguments are: view, dimension, value.
+    /// If the view is greater than or equal to GetNumViews(), then the view argument is out of range.
+    /// If the coord is greater than or equal to GetNumDimensions(), then the coord argument is out of range.
+    Status SetViewSize(uint32_t view, uint32_t coord, uint32_t value);
+
+    /// Get the number of views.
+    uint32_t GetNumViews() const;
+    /// Get the number of dimensions.
+    uint32_t GetNumDimensions() const;
+    /// Get the view origin at the int value idx.
+    const uint32_t* GetViewOrigin(uint32_t idx) const;
+    /// Get the view sizes at the int value idx.
+    const uint32_t* GetViewSizes(uint32_t idx) const;
+    /// Get the View Origins
+    const OriginsDescriptor& GetOrigins() const;
+
+    /// Swap the ViewsDescriptor value first and second.
+    friend void swap(ViewsDescriptor& first, ViewsDescriptor& second);
+private:
+    OriginsDescriptor m_Origins;
+    uint32_t**        m_ViewSizes;
+};
+
+
+/// @brief Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing
+/// concatenation of a number of input tensors.
+template <typename TensorShapeIt>
+OriginsDescriptor CreateDescriptorForConcatenation(TensorShapeIt first,
+                                                   TensorShapeIt last,
+                                                   unsigned int concatenationDimension)
+{
+    auto numInputs = std::distance(first, last);
+
+    if (numInputs < 2)
+    {
+        throw InvalidArgumentException("Concatenation requires at least 2 inputs");
+    }
+
+    const auto& firstInputShape = *first;
+
+    const unsigned int numDimensions = firstInputShape.GetNumDimensions();
+    for (auto it = first + 1; it != last; ++it)
+    {
+        if (it->GetNumDimensions() != numDimensions)
+        {
+            throw InvalidArgumentException("All inputs to concatenation must have the same number of dimensions");
+        }
+    }
+
+    if (concatenationDimension >= numDimensions)
+    {
+        throw InvalidArgumentException("concatenationDimension must be between 0 and the number of dimensions.");
+    }
+
+    for (auto it = first; it != last; ++it)
+    {
+        for (unsigned int d = 0; d < numDimensions; ++d)
+        {
+            const bool dimSizeOk = (d == concatenationDimension) || (firstInputShape[d] == (*it)[d]);
+            if (!dimSizeOk)
+            {
+                throw InvalidArgumentException("All inputs to concatenation must be the same size along all dimensions "
+                    " except the concatenation dimension");
+            }
+        }
+    }
+
+    OriginsDescriptor viewsDescriptor(static_cast<uint32_t>(numInputs), numDimensions);
+    viewsDescriptor.SetConcatAxis(concatenationDimension);
+
+    uint32_t viewIndex = 0u;
+    uint32_t coordAlongConcatDim = 0u;
+    for (auto it = first; it != last; ++it)
+    {
+        const auto& inputShape = *it;
+
+        for (unsigned int i = 0; i < concatenationDimension; ++i)
+        {
+            viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
+        }
+
+        viewsDescriptor.SetViewOriginCoord(viewIndex, concatenationDimension, coordAlongConcatDim);
+        unsigned int dimSize = inputShape[concatenationDimension];
+        coordAlongConcatDim += dimSize;
+
+
+        for (unsigned int i = concatenationDimension + 1; i < numDimensions; ++i)
+        {
+            viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
+        }
+
+        ++viewIndex;
+    }
+
+    return viewsDescriptor;
+}
+
+/// A Pooling2dDescriptor for the Pooling2dLayer.
+struct Pooling2dDescriptor : BaseDescriptor
+{
+    Pooling2dDescriptor()
+        : m_PoolType(PoolingAlgorithm::Max)
+        , m_PadLeft(0)
+        , m_PadRight(0)
+        , m_PadTop(0)
+        , m_PadBottom(0)
+        , m_PoolWidth(0)
+        , m_PoolHeight(0)
+        , m_StrideX(0)
+        , m_StrideY(0)
+        , m_OutputShapeRounding(OutputShapeRounding::Floor)
+        , m_PaddingMethod(PaddingMethod::Exclude)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const Pooling2dDescriptor& rhs) const
+    {
+        return m_PoolType            == rhs.m_PoolType &&
+               m_PadLeft             == rhs.m_PadLeft &&
+               m_PadRight            == rhs.m_PadRight &&
+               m_PadTop              == rhs.m_PadTop &&
+               m_PadBottom           == rhs.m_PadBottom &&
+               m_PoolWidth           == rhs.m_PoolWidth &&
+               m_PoolHeight          == rhs.m_PoolHeight &&
+               m_StrideX             == rhs.m_StrideX &&
+               m_StrideY             == rhs.m_StrideY &&
+               m_OutputShapeRounding == rhs.m_OutputShapeRounding &&
+               m_PaddingMethod       == rhs.m_PaddingMethod &&
+               m_DataLayout          == rhs.m_DataLayout;
+    }
+
+    /// The pooling algorithm to use (Max. Average, L2).
+    PoolingAlgorithm    m_PoolType;
+    /// Padding left value in the width dimension.
+    uint32_t            m_PadLeft;
+    /// Padding right value in the width dimension.
+    uint32_t            m_PadRight;
+    /// Padding top value in the height dimension.
+    uint32_t            m_PadTop;
+    /// Padding bottom value in the height dimension.
+    uint32_t            m_PadBottom;
+    /// Pooling width value.
+    uint32_t            m_PoolWidth;
+    /// Pooling height value.
+    uint32_t            m_PoolHeight;
+    /// Stride value when proceeding through input for the width dimension.
+    uint32_t            m_StrideX;
+    /// Stride value when proceeding through input for the height dimension.
+    uint32_t            m_StrideY;
+    /// The rounding method for the output shape. (Floor, Ceiling).
+    OutputShapeRounding m_OutputShapeRounding;
+    /// The padding method to be used. (Exclude, IgnoreValue).
+    PaddingMethod       m_PaddingMethod;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout   m_DataLayout;
+};
+
+/// A Pooling3dDescriptor for the Pooling3dLayer.
+struct Pooling3dDescriptor : BaseDescriptor
+{
+    Pooling3dDescriptor()
+        : m_PoolType(PoolingAlgorithm::Max)
+        , m_PadLeft(0)
+        , m_PadRight(0)
+        , m_PadTop(0)
+        , m_PadBottom(0)
+        , m_PadFront(0)
+        , m_PadBack(0)
+        , m_PoolWidth(0)
+        , m_PoolHeight(0)
+        , m_PoolDepth(0)
+        , m_StrideX(0)
+        , m_StrideY(0)
+        , m_StrideZ(0)
+        , m_OutputShapeRounding(OutputShapeRounding::Floor)
+        , m_PaddingMethod(PaddingMethod::Exclude)
+        , m_DataLayout(DataLayout::NCDHW)
+    {}
+
+    bool operator ==(const Pooling3dDescriptor& rhs) const
+    {
+        return m_PoolType            == rhs.m_PoolType &&
+               m_PadLeft             == rhs.m_PadLeft &&
+               m_PadRight            == rhs.m_PadRight &&
+               m_PadTop              == rhs.m_PadTop &&
+               m_PadBottom           == rhs.m_PadBottom &&
+               m_PadFront            == rhs.m_PadFront &&
+               m_PadBack             == rhs.m_PadBack &&
+               m_PoolWidth           == rhs.m_PoolWidth &&
+               m_PoolHeight          == rhs.m_PoolHeight &&
+               m_PoolDepth           == rhs.m_PoolDepth &&
+               m_StrideX             == rhs.m_StrideX &&
+               m_StrideY             == rhs.m_StrideY &&
+               m_StrideZ             == rhs.m_StrideZ &&
+               m_OutputShapeRounding == rhs.m_OutputShapeRounding &&
+               m_PaddingMethod       == rhs.m_PaddingMethod &&
+               m_DataLayout          == rhs.m_DataLayout;
+    }
+
+    /// The pooling algorithm to use (Max. Average, L2).
+    PoolingAlgorithm    m_PoolType;
+    /// Padding left value in the width dimension.
+    uint32_t            m_PadLeft;
+    /// Padding right value in the width dimension.
+    uint32_t            m_PadRight;
+    /// Padding top value in the height dimension.
+    uint32_t            m_PadTop;
+    /// Padding bottom value in the height dimension.
+    uint32_t            m_PadBottom;
+    /// Padding front value in the depth dimension.
+    uint32_t            m_PadFront;
+    /// Padding back value in the depth dimension.
+    uint32_t            m_PadBack;
+    /// Pooling width value.
+    uint32_t            m_PoolWidth;
+    /// Pooling height value.
+    uint32_t            m_PoolHeight;
+    /// Pooling depth value.
+    uint32_t            m_PoolDepth;
+    /// Stride value when proceeding through input for the width dimension.
+    uint32_t            m_StrideX;
+    /// Stride value when proceeding through input for the height dimension.
+    uint32_t            m_StrideY;
+    /// Stride value when proceeding through input for the depth dimension.
+    uint32_t            m_StrideZ;
+    /// The rounding method for the output shape. (Floor, Ceiling).
+    OutputShapeRounding m_OutputShapeRounding;
+    /// The padding method to be used. (Exclude, IgnoreValue).
+    PaddingMethod       m_PaddingMethod;
+    /// The data layout to be used (NCDHW, NDHWC).
+    DataLayout   m_DataLayout;
+};
+
+/// A FullyConnectedDescriptor for the FullyConnectedLayer.
+struct FullyConnectedDescriptor : BaseDescriptor
+{
+    FullyConnectedDescriptor()
+        : m_BiasEnabled(false)
+        , m_TransposeWeightMatrix(false)
+        , m_ConstantWeights(true)
+    {}
+
+    bool operator ==(const FullyConnectedDescriptor& rhs) const
+    {
+        return m_BiasEnabled == rhs.m_BiasEnabled
+               && m_TransposeWeightMatrix == rhs.m_TransposeWeightMatrix
+               && m_ConstantWeights == rhs.m_ConstantWeights;
+    }
+
+    /// Get the number of views/inputs.
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use GetNumInputs instead", "22.05")
+    uint32_t GetNumViews() const;
+
+    /// Get the number of views/inputs.
+    uint32_t GetNumInputs() const;
+
+    /// Enable/disable bias.
+    bool m_BiasEnabled;
+    /// Enable/disable transpose weight matrix.
+    bool m_TransposeWeightMatrix;
+    /// Enable/disable constant weights and biases.
+    bool m_ConstantWeights;
+};
+
+/// A Convolution2dDescriptor for the Convolution2dLayer.
+struct Convolution2dDescriptor : BaseDescriptor
+{
+    Convolution2dDescriptor()
+        : m_PadLeft(0)
+        , m_PadRight(0)
+        , m_PadTop(0)
+        , m_PadBottom(0)
+        , m_StrideX(1)
+        , m_StrideY(1)
+        , m_DilationX(1)
+        , m_DilationY(1)
+        , m_BiasEnabled(false)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const Convolution2dDescriptor& rhs) const
+    {
+        return m_PadLeft     == rhs.m_PadLeft &&
+               m_PadRight    == rhs.m_PadRight &&
+               m_PadTop      == rhs.m_PadTop &&
+               m_PadBottom   == rhs.m_PadBottom &&
+               m_StrideX     == rhs.m_StrideX &&
+               m_StrideY     == rhs.m_StrideY &&
+               m_DilationX   == rhs.m_DilationX &&
+               m_DilationY   == rhs.m_DilationY &&
+               m_BiasEnabled == rhs.m_BiasEnabled &&
+               m_DataLayout  == rhs.m_DataLayout;
+    }
+
+    /// Padding left value in the width dimension.
+    uint32_t             m_PadLeft;
+    /// Padding right value in the width dimension.
+    uint32_t             m_PadRight;
+    /// Padding top value in the height dimension.
+    uint32_t             m_PadTop;
+    /// Padding bottom value in the height dimension.
+    uint32_t             m_PadBottom;
+    /// Stride value when proceeding through input for the width dimension.
+    uint32_t             m_StrideX;
+    /// Stride value when proceeding through input for the height dimension.
+    uint32_t             m_StrideY;
+    /// Dilation along x axis
+    uint32_t             m_DilationX;
+    /// Dilation along y axis
+    uint32_t             m_DilationY;
+    /// Enable/disable bias.
+    bool                 m_BiasEnabled;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout           m_DataLayout;
+};
+
+/// A Convolution3dDescriptor for the Convolution3dLayer.
+struct Convolution3dDescriptor : BaseDescriptor
+{
+    Convolution3dDescriptor()
+        : m_PadLeft(0)
+        , m_PadRight(0)
+        , m_PadTop(0)
+        , m_PadBottom(0)
+        , m_PadFront(0)
+        , m_PadBack(0)
+        , m_StrideX(1)
+        , m_StrideY(1)
+        , m_StrideZ(1)
+        , m_DilationX(1)
+        , m_DilationY(1)
+        , m_DilationZ(1)
+        , m_BiasEnabled(false)
+        , m_DataLayout(DataLayout::NDHWC)
+    {}
+
+    bool operator ==(const Convolution3dDescriptor& rhs) const
+    {
+        return m_PadLeft     == rhs.m_PadLeft &&
+               m_PadRight    == rhs.m_PadRight &&
+               m_PadTop      == rhs.m_PadTop &&
+               m_PadBottom   == rhs.m_PadBottom &&
+               m_PadFront    == rhs.m_PadFront &&
+               m_PadBack     == rhs.m_PadBack &&
+               m_StrideX     == rhs.m_StrideX &&
+               m_StrideY     == rhs.m_StrideY &&
+               m_StrideZ     == rhs.m_StrideZ &&
+               m_DilationX   == rhs.m_DilationX &&
+               m_DilationY   == rhs.m_DilationY &&
+               m_DilationZ   == rhs.m_DilationZ &&
+               m_BiasEnabled == rhs.m_BiasEnabled &&
+               m_DataLayout  == rhs.m_DataLayout;
+    }
+
+    /// Get the number of views/inputs.
+    uint32_t GetNumInputs() const;
+
+    /// Padding left value in the width dimension.
+    uint32_t             m_PadLeft;
+    /// Padding right value in the width dimension.
+    uint32_t             m_PadRight;
+    /// Padding top value in the height dimension.
+    uint32_t             m_PadTop;
+    /// Padding bottom value in the height dimension.
+    uint32_t             m_PadBottom;
+    /// Padding front value in the depth dimension.
+    uint32_t             m_PadFront;
+    /// Padding back value in the depth dimension.
+    uint32_t             m_PadBack;
+    /// Stride value when proceeding through input for the width dimension.
+    uint32_t             m_StrideX;
+    /// Stride value when proceeding through input for the height dimension.
+    uint32_t             m_StrideY;
+    /// Stride value when proceeding through input for the depth dimension.
+    uint32_t             m_StrideZ;
+    /// Dilation along x axis
+    uint32_t             m_DilationX;
+    /// Dilation along y axis
+    uint32_t             m_DilationY;
+    /// Dilation along z axis
+    uint32_t             m_DilationZ;
+    /// Enable/disable bias.
+    bool                 m_BiasEnabled;
+    /// The data layout to be used (NDHWC, NCDHW).
+    DataLayout           m_DataLayout;
+};
+
+/// A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
+struct DepthwiseConvolution2dDescriptor : BaseDescriptor
+{
+    DepthwiseConvolution2dDescriptor()
+        : m_PadLeft(0)
+        , m_PadRight(0)
+        , m_PadTop(0)
+        , m_PadBottom(0)
+        , m_StrideX(1)
+        , m_StrideY(1)
+        , m_DilationX(1)
+        , m_DilationY(1)
+        , m_BiasEnabled(false)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const DepthwiseConvolution2dDescriptor& rhs) const
+    {
+        return m_PadLeft     == rhs.m_PadLeft &&
+               m_PadRight    == rhs.m_PadRight &&
+               m_PadTop      == rhs.m_PadTop &&
+               m_PadBottom   == rhs.m_PadBottom &&
+               m_StrideX     == rhs.m_StrideX &&
+               m_StrideY     == rhs.m_StrideY &&
+               m_DilationX   == rhs.m_DilationX &&
+               m_DilationY   == rhs.m_DilationY &&
+               m_BiasEnabled == rhs.m_BiasEnabled &&
+               m_DataLayout  == rhs.m_DataLayout;
+    }
+
+    /// Padding left value in the width dimension.
+    uint32_t   m_PadLeft;
+    /// Padding right value in the width dimension.
+    uint32_t   m_PadRight;
+    /// Padding top value in the height dimension.
+    uint32_t   m_PadTop;
+    /// Padding bottom value in the height dimension.
+    uint32_t   m_PadBottom;
+    /// Stride value when proceeding through input for the width dimension.
+    uint32_t   m_StrideX;
+    /// Stride value when proceeding through input for the height dimension.
+    uint32_t   m_StrideY;
+    /// Dilation factor value for width dimension.
+    uint32_t   m_DilationX;
+    /// Dilation factor value for height dimension.
+    uint32_t   m_DilationY;
+    /// Enable/disable bias.
+    bool       m_BiasEnabled;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+};
+
+struct DetectionPostProcessDescriptor : BaseDescriptor
+{
+    DetectionPostProcessDescriptor()
+        : m_MaxDetections(0)
+        , m_MaxClassesPerDetection(1)
+        , m_DetectionsPerClass(1)
+        , m_NmsScoreThreshold(0)
+        , m_NmsIouThreshold(0)
+        , m_NumClasses(0)
+        , m_UseRegularNms(false)
+        , m_ScaleX(0)
+        , m_ScaleY(0)
+        , m_ScaleW(0)
+        , m_ScaleH(0)
+    {}
+
+    bool operator ==(const DetectionPostProcessDescriptor& rhs) const
+    {
+        return m_MaxDetections          == rhs.m_MaxDetections &&
+               m_MaxClassesPerDetection == rhs.m_MaxClassesPerDetection &&
+               m_DetectionsPerClass     == rhs.m_DetectionsPerClass &&
+               m_NmsScoreThreshold      == rhs.m_NmsScoreThreshold &&
+               m_NmsIouThreshold        == rhs.m_NmsIouThreshold &&
+               m_NumClasses             == rhs.m_NumClasses &&
+               m_UseRegularNms          == rhs.m_UseRegularNms &&
+               m_ScaleX                 == rhs.m_ScaleX &&
+               m_ScaleY                 == rhs.m_ScaleY &&
+               m_ScaleW                 == rhs.m_ScaleW &&
+               m_ScaleH                 == rhs.m_ScaleH;
+    }
+
+    /// Maximum numbers of detections.
+    uint32_t m_MaxDetections;
+    /// Maximum numbers of classes per detection, used in Fast NMS.
+    uint32_t m_MaxClassesPerDetection;
+    /// Detections per classes, used in Regular NMS.
+    uint32_t m_DetectionsPerClass;
+    /// NMS score threshold.
+    float m_NmsScoreThreshold;
+    /// Intersection over union threshold.
+    float m_NmsIouThreshold;
+    /// Number of classes.
+    uint32_t m_NumClasses;
+    /// Use Regular NMS.
+    bool m_UseRegularNms;
+    /// Center size encoding scale x.
+    float m_ScaleX;
+    /// Center size encoding scale y.
+    float m_ScaleY;
+    /// Center size encoding scale weight.
+    float m_ScaleW;
+    /// Center size encoding scale height.
+    float m_ScaleH;
+};
+
+/// A NormalizationDescriptor for the NormalizationLayer.
+struct NormalizationDescriptor : BaseDescriptor
+{
+    NormalizationDescriptor()
+        : m_NormChannelType(NormalizationAlgorithmChannel::Across)
+        , m_NormMethodType(NormalizationAlgorithmMethod::LocalBrightness)
+        , m_NormSize(0)
+        , m_Alpha(0.f)
+        , m_Beta(0.f)
+        , m_K(0.f)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const NormalizationDescriptor& rhs) const
+    {
+        return m_NormChannelType == rhs.m_NormChannelType &&
+               m_NormMethodType  == rhs.m_NormMethodType &&
+               m_NormSize        == rhs.m_NormSize &&
+               m_Alpha           == rhs.m_Alpha &&
+               m_Beta            == rhs.m_Beta &&
+               m_K               == rhs.m_K &&
+               m_DataLayout      == rhs.m_DataLayout;
+    }
+
+    /// Normalization channel algorithm to use (Across, Within).
+    NormalizationAlgorithmChannel m_NormChannelType;
+    /// Normalization method algorithm to use (LocalBrightness, LocalContrast).
+    NormalizationAlgorithmMethod  m_NormMethodType;
+    /// Depth radius value.
+    uint32_t                      m_NormSize;
+    /// Alpha value for the normalization equation.
+    float                         m_Alpha;
+    /// Beta value for the normalization equation.
+    float                         m_Beta;
+    /// Kappa value used for the across channel normalization equation.
+    float                         m_K;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout                    m_DataLayout;
+};
+
+/// A L2NormalizationDescriptor for the L2NormalizationLayer.
+struct L2NormalizationDescriptor : BaseDescriptor
+{
+    L2NormalizationDescriptor()
+        : m_Eps(1e-12f)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const L2NormalizationDescriptor& rhs) const
+    {
+        return m_Eps == rhs.m_Eps && m_DataLayout == rhs.m_DataLayout;
+    }
+
+    /// Used to avoid dividing by zero.
+    float m_Eps;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+};
+
+/// A BatchNormalizationDescriptor for the BatchNormalizationLayer.
+struct BatchNormalizationDescriptor : BaseDescriptor
+{
+    BatchNormalizationDescriptor()
+        : m_Eps(0.0001f)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const BatchNormalizationDescriptor& rhs) const
+    {
+        return m_Eps == rhs.m_Eps && m_DataLayout == rhs.m_DataLayout;
+    }
+
+    /// Value to add to the variance. Used to avoid dividing by zero.
+    float m_Eps;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+};
+
+/// An InstanceNormalizationDescriptor for InstanceNormalizationLayer
+struct InstanceNormalizationDescriptor : BaseDescriptor
+{
+    InstanceNormalizationDescriptor()
+        : m_Gamma(1.0f)
+        , m_Beta(0.0f)
+        , m_Eps(1e-12f)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const InstanceNormalizationDescriptor& rhs) const
+    {
+        return m_Gamma      == rhs.m_Gamma &&
+               m_Beta       == rhs.m_Beta &&
+               m_Eps        == rhs.m_Eps &&
+               m_DataLayout == rhs.m_DataLayout;
+    }
+
+    /// Gamma, the scale scalar value applied for the normalized tensor. Defaults to 1.0.
+    float m_Gamma;
+    /// Beta, the offset scalar value applied for the normalized tensor. Defaults to 1.0.
+    float m_Beta;
+    /// Epsilon, small scalar value added to variance to avoid dividing by zero. Defaults to 1e-12f.
+    float m_Eps;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+};
+
+/// A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
+struct BatchToSpaceNdDescriptor : BaseDescriptor
+{
+    BatchToSpaceNdDescriptor()
+        : m_BlockShape({1, 1})
+        , m_Crops({{0, 0}, {0, 0}})
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    BatchToSpaceNdDescriptor(std::vector<unsigned int> blockShape,
+                             std::vector<std::pair<unsigned int, unsigned int>> crops)
+        : m_BlockShape(blockShape)
+        , m_Crops(crops)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const BatchToSpaceNdDescriptor& rhs) const
+    {
+        return m_BlockShape == rhs.m_BlockShape &&
+               m_Crops      == rhs.m_Crops &&
+               m_DataLayout == rhs.m_DataLayout;
+    }
+
+    /// Block shape values.
+    std::vector<unsigned int> m_BlockShape;
+    /// The values to crop from the input dimension.
+    std::vector<std::pair<unsigned int, unsigned int>> m_Crops;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+};
+
+/// A FakeQuantizationDescriptor for the FakeQuantizationLayer.
+struct FakeQuantizationDescriptor : BaseDescriptor
+{
+        FakeQuantizationDescriptor()
+        : m_Min(-6.0f)
+        , m_Max(6.0f)
+    {}
+
+    bool operator ==(const FakeQuantizationDescriptor& rhs) const
+    {
+        return m_Min == rhs.m_Min && m_Max == rhs.m_Max;
+    }
+
+    /// Minimum value.
+    float m_Min;
+    /// Maximum value.
+    float m_Max;
+};
+
+/// A FillDescriptor for the FillLayer
+struct FillDescriptor : BaseDescriptor
+{
+    FillDescriptor()
+    : m_Value(0)
+    {}
+
+    FillDescriptor(const float& value)
+    : m_Value(value)
+    {}
+
+    bool operator ==(const FillDescriptor& rhs) const
+    {
+        return m_Value == rhs.m_Value;
+    }
+
+    float m_Value;
+};
+
+/// A GatherDescriptor for the GatherLayer.
+struct GatherDescriptor : BaseDescriptor
+{
+    GatherDescriptor()
+        : m_Axis(0)
+    {}
+
+    GatherDescriptor(int32_t axis)
+        : m_Axis(axis)
+    {}
+
+    bool operator ==(const GatherDescriptor& rhs) const
+    {
+        return m_Axis == rhs.m_Axis;
+    }
+
+    /// The axis in params to gather indices from
+    int32_t m_Axis;
+};
+
+/// A ResizeBilinearDescriptor for the ResizeBilinearLayer.
+struct ARMNN_DEPRECATED_MSG_REMOVAL_DATE(
+        "ResizeBilinearDescriptor is not supported anymore. Use ResizeDescriptor instead.",
+        "22.08")
+        ResizeBilinearDescriptor : BaseDescriptor
+{
+    ResizeBilinearDescriptor()
+        : m_TargetWidth(0)
+        , m_TargetHeight(0)
+        , m_DataLayout(DataLayout::NCHW)
+        , m_AlignCorners(false)
+        , m_HalfPixelCenters(false)
+    {}
+
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
+    bool operator ==(const ResizeBilinearDescriptor& rhs) const
+    {
+        return m_TargetWidth          == rhs.m_TargetWidth &&
+               m_TargetHeight         == rhs.m_TargetHeight &&
+               m_DataLayout           == rhs.m_DataLayout &&
+               m_AlignCorners         == rhs.m_AlignCorners &&
+               m_HalfPixelCenters     == rhs.m_HalfPixelCenters;
+    }
+    ARMNN_NO_DEPRECATE_WARN_END
+
+    /// Target width value.
+    uint32_t          m_TargetWidth;
+    /// Target height value.
+    uint32_t          m_TargetHeight;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+    /// Aligned corners
+    bool m_AlignCorners;
+    /// Half Pixel Centers
+    bool m_HalfPixelCenters;
+};
+
+/// A ResizeDescriptor for the ResizeLayer.
+struct ResizeDescriptor : BaseDescriptor
+{
+    ResizeDescriptor()
+        : m_TargetWidth(0)
+        , m_TargetHeight(0)
+        , m_Method(ResizeMethod::NearestNeighbor)
+        , m_DataLayout(DataLayout::NCHW)
+        , m_AlignCorners(false)
+        , m_HalfPixelCenters(false)
+    {}
+
+    bool operator ==(const ResizeDescriptor& rhs) const
+    {
+        return m_TargetWidth          == rhs.m_TargetWidth &&
+               m_TargetHeight         == rhs.m_TargetHeight &&
+               m_Method               == rhs.m_Method &&
+               m_DataLayout           == rhs.m_DataLayout &&
+               m_AlignCorners         == rhs.m_AlignCorners &&
+               m_HalfPixelCenters     == rhs.m_HalfPixelCenters;
+    }
+
+    /// Target width value.
+    uint32_t m_TargetWidth;
+    /// Target height value.
+    uint32_t m_TargetHeight;
+    /// The Interpolation method to use
+    /// (Bilinear, NearestNeighbor).
+    ResizeMethod m_Method;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+    /// Aligned corners
+    bool m_AlignCorners;
+    /// Half Pixel Centers
+    bool m_HalfPixelCenters;
+};
+
+
+/// A ReshapeDescriptor for the ReshapeLayer.
+struct ReshapeDescriptor : BaseDescriptor
+{
+    ReshapeDescriptor()
+        : m_TargetShape()
+    {}
+
+    ReshapeDescriptor(const TensorShape& shape)
+        : m_TargetShape(shape)
+    {}
+
+    bool operator ==(const ReshapeDescriptor& rhs) const
+    {
+        return m_TargetShape == rhs.m_TargetShape;
+    }
+
+    /// Target shape value.
+    TensorShape m_TargetShape;
+};
+
+/// A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer.
+struct SpaceToBatchNdDescriptor : BaseDescriptor
+{
+    SpaceToBatchNdDescriptor()
+        : m_BlockShape({1, 1})
+        , m_PadList({{0, 0}, {0, 0}})
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    SpaceToBatchNdDescriptor(const std::vector<unsigned int>& blockShape,
+                             const std::vector<std::pair<unsigned int, unsigned int>>& padList)
+        : m_BlockShape(blockShape)
+        , m_PadList(padList)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    bool operator ==(const SpaceToBatchNdDescriptor& rhs) const
+    {
+        return m_BlockShape == rhs.m_BlockShape &&
+               m_PadList    == rhs.m_PadList &&
+               m_DataLayout == rhs.m_DataLayout;
+    }
+
+    /// Block shape value.
+    std::vector<unsigned int> m_BlockShape;
+    /// @brief Specifies the padding values for the input dimension:
+    /// heightPad{top, bottom} widthPad{left, right}.
+    std::vector<std::pair<unsigned int, unsigned int>> m_PadList;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+};
+
+/// A SpaceToDepthDescriptor for the SpaceToDepthLayer
+struct SpaceToDepthDescriptor : BaseDescriptor
+{
+    SpaceToDepthDescriptor()
+        : SpaceToDepthDescriptor(1u, DataLayout::NHWC)
+    {}
+
+    SpaceToDepthDescriptor(unsigned int blockSize, DataLayout dataLayout)
+        : m_BlockSize(blockSize)
+        , m_DataLayout(dataLayout)
+    {}
+
+    bool operator ==(const SpaceToDepthDescriptor& rhs) const
+    {
+        return m_BlockSize == rhs.m_BlockSize && m_DataLayout == rhs.m_DataLayout;
+    }
+
+    /// Scalar specifying the input block size. It must be >= 1
+    unsigned int m_BlockSize;
+
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+};
+
+/// A DepthToSpaceDescriptor for the DepthToSpaceLayer
+using DepthToSpaceDescriptor = SpaceToDepthDescriptor;
+
+/// An LstmDescriptor for the LstmLayer.
+struct LstmDescriptor : BaseDescriptor
+{
+    LstmDescriptor()
+        : m_ActivationFunc(1) // 0: None, 1: Relu, 3: Relu6, 4: Tanh, 6: Sigmoid
+        , m_ClippingThresCell(0.0)
+        , m_ClippingThresProj(0.0)
+        , m_CifgEnabled(true)
+        , m_PeepholeEnabled(false)
+        , m_ProjectionEnabled(false)
+        , m_LayerNormEnabled(false)
+        , m_TimeMajor(false)
+    {}
+
+    bool operator ==(const LstmDescriptor& rhs) const
+    {
+        return m_ActivationFunc    == rhs.m_ActivationFunc &&
+               m_ClippingThresCell == rhs.m_ClippingThresCell &&
+               m_ClippingThresProj == rhs.m_ClippingThresProj &&
+               m_CifgEnabled       == rhs.m_CifgEnabled &&
+               m_PeepholeEnabled   == rhs.m_PeepholeEnabled &&
+               m_LayerNormEnabled  == rhs.m_LayerNormEnabled &&
+               m_TimeMajor         == rhs.m_TimeMajor;
+    }
+
+    /// @brief The activation function to use.
+    /// 0: None, 1: Relu, 3: Relu6, 4: Tanh, 6: Sigmoid.
+    uint32_t m_ActivationFunc;
+    /// Clipping threshold value for the cell state.
+    float m_ClippingThresCell;
+    /// Clipping threshold value for the projection.
+    float m_ClippingThresProj;
+    /// Enable/disable cifg (coupled input & forget gate).
+    bool m_CifgEnabled;
+    /// Enable/disable peephole.
+    bool m_PeepholeEnabled;
+    /// Enable/disable the projection layer.
+    bool m_ProjectionEnabled;
+    /// Enable/disable layer normalization
+    bool m_LayerNormEnabled;
+    /// Enable/disable time major
+    bool m_TimeMajor;
+};
+
+using UnidirectionalSequenceLstmDescriptor = LstmDescriptor;
+
+/// A MeanDescriptor for the MeanLayer.
+struct MeanDescriptor : BaseDescriptor
+{
+    MeanDescriptor()
+        : m_Axis()
+        , m_KeepDims(false)
+    {}
+
+    MeanDescriptor(const std::vector<unsigned int>& axis, bool keepDims)
+        : m_Axis(axis)
+        , m_KeepDims(keepDims)
+    {}
+
+    bool operator ==(const MeanDescriptor& rhs) const
+    {
+        return m_Axis == rhs.m_Axis && m_KeepDims == rhs.m_KeepDims;
+    }
+
+    /// Values for the dimensions to reduce.
+    std::vector<unsigned int> m_Axis;
+    /// Enable/disable keep dimensions. If true, then the reduced dimensions that are of length 1 are kept.
+    bool m_KeepDims;
+};
+
+/// A PadDescriptor for the PadLayer.
+struct PadDescriptor : BaseDescriptor
+{
+    PadDescriptor() : m_PadValue(0), m_PaddingMode(PaddingMode::Constant)
+    {}
+
+    PadDescriptor(const std::vector<std::pair<unsigned int, unsigned int>>& padList,
+                  const float& padValue = 0,
+                  const PaddingMode& paddingMode = PaddingMode::Constant)
+        : m_PadList(padList)
+        , m_PadValue(padValue)
+        , m_PaddingMode(paddingMode)
+    {}
+
+    bool operator ==(const PadDescriptor& rhs) const
+    {
+        return m_PadList == rhs.m_PadList && m_PadValue == rhs.m_PadValue && m_PaddingMode == rhs.m_PaddingMode;
+    }
+
+    /// @brief Specifies the padding for input dimension.
+    /// First is the number of values to add before the tensor in the dimension.
+    /// Second is the number of values to add after the tensor in the dimension.
+    /// The number of pairs should match the number of dimensions in the input tensor.
+    std::vector<std::pair<unsigned int, unsigned int>> m_PadList;
+
+    /// Optional value to use for padding, defaults to 0
+    float m_PadValue;
+
+    /// Specifies the Padding mode (Constant, Reflect or Symmetric)
+    PaddingMode m_PaddingMode;
+};
+
+/// A SliceDescriptor for the SliceLayer.
+struct SliceDescriptor : BaseDescriptor
+{
+    SliceDescriptor(const std::vector<unsigned int>& begin, const std::vector<unsigned int>& size)
+        : m_Begin(begin)
+        , m_Size(size)
+    {}
+
+    SliceDescriptor() : SliceDescriptor({}, {})
+    {}
+
+    bool operator ==(const SliceDescriptor& rhs) const
+    {
+        return m_Begin == rhs.m_Begin && m_Size == rhs.m_Size;
+    }
+
+    /// Beginning indices of the slice in each dimension.
+    std::vector<unsigned int> m_Begin;
+
+    /// Size of the slice in each dimension.
+    std::vector<unsigned int> m_Size;
+};
+
+/// A StackDescriptor for the StackLayer.
+struct StackDescriptor : BaseDescriptor
+{
+    StackDescriptor()
+        : m_Axis(0)
+        , m_NumInputs(0)
+        , m_InputShape()
+    {}
+
+    StackDescriptor(uint32_t axis, uint32_t numInputs, const TensorShape& inputShape)
+        : m_Axis(axis)
+        , m_NumInputs(numInputs)
+        , m_InputShape(inputShape)
+    {}
+
+    bool operator ==(const StackDescriptor& rhs) const
+    {
+        return m_Axis       == rhs.m_Axis &&
+               m_NumInputs  == rhs.m_NumInputs &&
+               m_InputShape == rhs.m_InputShape;
+    }
+
+    /// 0-based axis along which to stack the input tensors.
+    uint32_t m_Axis;
+    /// Number of input tensors.
+    uint32_t m_NumInputs;
+    /// Required shape of all input tensors.
+    TensorShape m_InputShape;
+};
+
+/// A StandInDescriptor for the StandIn layer
+struct StandInDescriptor : BaseDescriptor
+{
+    StandInDescriptor() {};
+
+    StandInDescriptor(uint32_t numInputs, uint32_t numOutputs)
+        : m_NumInputs(numInputs)
+        , m_NumOutputs(numOutputs)
+    {}
+
+    bool operator ==(const StandInDescriptor& rhs) const
+    {
+        return m_NumInputs  == rhs.m_NumInputs &&
+               m_NumOutputs == rhs.m_NumOutputs;
+    }
+
+    /// Number of input tensors
+    uint32_t m_NumInputs = 0;
+    /// Number of output tensors
+    uint32_t m_NumOutputs = 0;
+};
+
+/// A StridedSliceDescriptor for the StridedSliceLayer.
+struct StridedSliceDescriptor : BaseDescriptor
+{
+    StridedSliceDescriptor(const std::vector<int>& begin,
+                           const std::vector<int>& end,
+                           const std::vector<int>& stride)
+        : m_Begin(begin)
+        , m_End(end)
+        , m_Stride(stride)
+        , m_BeginMask(0)
+        , m_EndMask(0)
+        , m_ShrinkAxisMask(0)
+        , m_EllipsisMask(0)
+        , m_NewAxisMask(0)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    StridedSliceDescriptor()
+        : StridedSliceDescriptor({}, {}, {})
+    {}
+
+    bool operator ==(const StridedSliceDescriptor& rhs) const
+    {
+        return m_Begin          == rhs.m_Begin &&
+               m_End            == rhs.m_End &&
+               m_Stride         == rhs.m_Stride &&
+               m_BeginMask      == rhs.m_BeginMask &&
+               m_EndMask        == rhs.m_EndMask &&
+               m_ShrinkAxisMask == rhs.m_ShrinkAxisMask &&
+               m_EllipsisMask   == rhs.m_EllipsisMask &&
+               m_NewAxisMask    == rhs.m_NewAxisMask &&
+               m_DataLayout     == rhs.m_DataLayout;
+    }
+
+    int GetStartForAxis(const TensorShape& inputShape, unsigned int axis) const;
+    int GetStopForAxis(const TensorShape& inputShape,
+                       unsigned int axis,
+                       int startForAxis) const;
+
+    /// Begin values for the input that will be sliced.
+    std::vector<int> m_Begin;
+    /// End values for the input that will be sliced.
+    std::vector<int> m_End;
+    /// Stride values for the input that will be sliced.
+    std::vector<int> m_Stride;
+
+    /// @brief Begin mask value. If set, then the begin is disregarded and the fullest
+    /// range is used for the dimension.
+    int32_t m_BeginMask;
+    /// @brief End mask value. If set, then the end is disregarded and the fullest range
+    /// is used for the dimension.
+    int32_t m_EndMask;
+    /// Shrink axis mask value. If set, the nth specification shrinks the dimensionality by 1.
+    int32_t m_ShrinkAxisMask;
+    /// Ellipsis mask value.
+    int32_t m_EllipsisMask;
+    /// @brief New axis mask value. If set, the begin, end and stride is disregarded and
+    /// a new 1 dimension is inserted to this location of the output tensor.
+    int32_t m_NewAxisMask;
+
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout m_DataLayout;
+};
+
+/// A PreCompiledDescriptor for the PreCompiledLayer.
+struct PreCompiledDescriptor : BaseDescriptor
+{
+    PreCompiledDescriptor(unsigned int numInputSlots = 1u, unsigned int numOutputSlots = 1u)
+        : m_NumInputSlots(numInputSlots), m_NumOutputSlots(numOutputSlots)
+    {}
+
+    ~PreCompiledDescriptor() = default;
+
+    unsigned int m_NumInputSlots;
+    unsigned int m_NumOutputSlots;
+};
+
+/// A QLstmDescriptor for the QLstmLayer.
+struct QLstmDescriptor : BaseDescriptor
+{
+    QLstmDescriptor()
+            : m_CellClip(0.0)
+            , m_ProjectionClip(0.0)
+            , m_CifgEnabled(true)
+            , m_PeepholeEnabled(false)
+            , m_ProjectionEnabled(false)
+            , m_LayerNormEnabled(false)
+            , m_InputIntermediateScale(0.0)
+            , m_ForgetIntermediateScale(0.0)
+            , m_CellIntermediateScale(0.0)
+            , m_OutputIntermediateScale(0.0)
+            , m_HiddenStateZeroPoint(0)
+            , m_HiddenStateScale(0.0)
+    {}
+
+    bool operator ==(const QLstmDescriptor& rhs) const
+    {
+        return m_CellClip          == rhs.m_CellClip &&
+               m_ProjectionClip    == rhs.m_ProjectionClip &&
+               m_CifgEnabled       == rhs.m_CifgEnabled &&
+               m_PeepholeEnabled   == rhs.m_PeepholeEnabled &&
+               m_ProjectionEnabled == rhs.m_ProjectionEnabled &&
+               m_LayerNormEnabled  == rhs.m_LayerNormEnabled &&
+               m_InputIntermediateScale == rhs.m_InputIntermediateScale &&
+               m_ForgetIntermediateScale == rhs.m_ForgetIntermediateScale &&
+               m_CellIntermediateScale == rhs.m_CellIntermediateScale &&
+               m_OutputIntermediateScale == rhs.m_OutputIntermediateScale &&
+               m_HiddenStateZeroPoint == rhs.m_HiddenStateZeroPoint &&
+               m_HiddenStateScale == rhs.m_HiddenStateScale;
+    }
+
+    /// Clipping threshold value for the cell state
+    float m_CellClip;
+    /// Clipping threshold value for the projection
+    float m_ProjectionClip;
+    /// Enable/disable CIFG (coupled input & forget gate).
+    bool m_CifgEnabled;
+    /// Enable/disable peephole
+    bool m_PeepholeEnabled;
+    /// Enable/disable the projection layer
+    bool m_ProjectionEnabled;
+    /// Enable/disable layer normalization
+    bool m_LayerNormEnabled;
+    /// Input intermediate quantization scale
+    float m_InputIntermediateScale;
+    /// Forget intermediate quantization scale
+    float m_ForgetIntermediateScale;
+    /// Cell intermediate quantization scale
+    float m_CellIntermediateScale;
+    /// Output intermediate quantization scale
+    float m_OutputIntermediateScale;
+    /// Hidden State zero point
+    int32_t m_HiddenStateZeroPoint;
+    /// Hidden State quantization scale
+    float m_HiddenStateScale;
+};
+
+/// A TransposeConvolution2dDescriptor for the TransposeConvolution2dLayer.
+struct TransposeConvolution2dDescriptor : BaseDescriptor
+{
+    TransposeConvolution2dDescriptor() :
+        m_PadLeft(0),
+        m_PadRight(0),
+        m_PadTop(0),
+        m_PadBottom(0),
+        m_StrideX(0),
+        m_StrideY(0),
+        m_BiasEnabled(false),
+        m_DataLayout(DataLayout::NCHW),
+        m_OutputShapeEnabled(false)
+    {}
+
+    bool operator ==(const TransposeConvolution2dDescriptor& rhs) const
+    {
+        return m_PadLeft            == rhs.m_PadLeft &&
+               m_PadRight           == rhs.m_PadRight &&
+               m_PadTop             == rhs.m_PadTop &&
+               m_PadBottom          == rhs.m_PadBottom &&
+               m_StrideX            == rhs.m_StrideX &&
+               m_StrideY            == rhs.m_StrideY &&
+               m_BiasEnabled        == rhs.m_BiasEnabled &&
+               m_DataLayout         == rhs.m_DataLayout &&
+               m_OutputShapeEnabled == rhs.m_OutputShapeEnabled &&
+               m_OutputShape        == rhs.m_OutputShape;
+    }
+
+    /// Padding left value in the width dimension.
+    uint32_t                  m_PadLeft;
+    /// Padding right value in the width dimension.
+    uint32_t                  m_PadRight;
+    /// Padding top value in the height dimension.
+    uint32_t                  m_PadTop;
+    /// Padding bottom value in the height dimension.
+    uint32_t                  m_PadBottom;
+    /// Stride value when proceeding through input for the width dimension.
+    uint32_t                  m_StrideX;
+    /// Stride value when proceeding through input for the height dimension.
+    uint32_t                  m_StrideY;
+    /// Enable/disable bias.
+    bool                      m_BiasEnabled;
+    /// The data layout to be used (NCHW, NHWC).
+    DataLayout                m_DataLayout;
+    /// Output shape if it has been specified.
+    bool                      m_OutputShapeEnabled;
+    std::vector<unsigned int> m_OutputShape;
+};
+
+/// A TransposeDescriptor for the TransposeLayer.
+struct TransposeDescriptor : BaseDescriptor
+{
+    TransposeDescriptor()
+            : m_DimMappings{}
+    {}
+
+    TransposeDescriptor(const PermutationVector& dimMappings)
+            : m_DimMappings(dimMappings)
+    {}
+
+    bool operator ==(const TransposeDescriptor &rhs) const
+    {
+        return m_DimMappings.IsEqual(rhs.m_DimMappings);
+    }
+
+    /// @brief Indicates how to translate tensor elements from a given source into the target destination, when
+    /// source and target potentially have different memory layouts e.g. {0U, 3U, 1U, 2U}.
+    PermutationVector m_DimMappings;
+};
+
+/// A LogicalBinaryDescriptor for the LogicalBinaryLayer
+struct LogicalBinaryDescriptor : BaseDescriptor
+{
+    LogicalBinaryDescriptor()
+        : LogicalBinaryDescriptor(LogicalBinaryOperation::LogicalAnd)
+    {}
+
+    LogicalBinaryDescriptor(LogicalBinaryOperation operation)
+        : m_Operation(operation)
+    {}
+
+    bool operator ==(const LogicalBinaryDescriptor &rhs) const
+    {
+        return m_Operation == rhs.m_Operation;
+    }
+
+    /// Specifies the logical operation to execute
+    LogicalBinaryOperation m_Operation;
+};
+
+/// A ReduceDescriptor for the REDUCE operators.
+struct ReduceDescriptor : BaseDescriptor
+{
+    ReduceDescriptor()
+        : m_KeepDims(false)
+        , m_vAxis()
+        , m_ReduceOperation(ReduceOperation::Sum)
+    {}
+
+    bool operator ==(const ReduceDescriptor& rhs) const
+    {
+        return m_KeepDims             == rhs.m_KeepDims &&
+               m_vAxis                == rhs.m_vAxis &&
+               m_ReduceOperation      == rhs.m_ReduceOperation;
+    }
+
+    /// if true then output shape has no change.
+    bool m_KeepDims;
+    /// The indices of the dimensions to reduce.
+    std::vector<uint32_t> m_vAxis;
+    /// Specifies the reduction operation to execute
+    ReduceOperation m_ReduceOperation;
+};
+
+/// A ChannelShuffleDescriptor for the ChannelShuffle operator
+struct ChannelShuffleDescriptor : BaseDescriptor
+{
+    ChannelShuffleDescriptor()
+        : m_NumGroups(0), m_Axis(0)
+    {}
+
+    ChannelShuffleDescriptor(const uint32_t& numGroups, const uint32_t& axis)
+        : m_NumGroups(numGroups), m_Axis(axis)
+    {}
+
+    bool operator ==(const ChannelShuffleDescriptor& rhs) const
+    {
+        return m_NumGroups == rhs.m_NumGroups;
+    }
+
+    /// Number of groups for the channel shuffle operation
+    uint32_t m_NumGroups;
+    /// Axis to apply channel shuffle operation on
+    uint32_t m_Axis;
+};
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/DescriptorsFwd.hpp b/arch/arm/ARMnn/include/armnn/DescriptorsFwd.hpp
new file mode 100644
index 0000000000..ab6c7d235a
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/DescriptorsFwd.hpp
@@ -0,0 +1,63 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace armnn
+{
+struct BaseDescriptor;
+
+struct ActivationDescriptor;
+struct ArgMinMaxDescriptor;
+struct BatchNormalizationDescriptor;
+struct BatchToSpaceNdDescriptor;
+struct ChannelShuffleDescriptor;
+struct ComparisonDescriptor;
+struct Convolution2dDescriptor;
+struct Convolution3dDescriptor;
+struct DepthwiseConvolution2dDescriptor;
+struct DetectionPostProcessDescriptor;
+struct ElementwiseUnaryDescriptor;
+struct FakeQuantizationDescriptor;
+struct FillDescriptor;
+struct FullyConnectedDescriptor;
+struct GatherDescriptor;
+struct InstanceNormalizationDescriptor;
+struct L2NormalizationDescriptor;
+struct LogicalBinaryDescriptor;
+struct LstmDescriptor;
+struct MeanDescriptor;
+struct NormalizationDescriptor;
+struct OriginsDescriptor;
+struct PadDescriptor;
+struct PermuteDescriptor;
+struct Pooling2dDescriptor;
+struct Pooling3dDescriptor;
+struct PreCompiledDescriptor;
+struct QLstmDescriptor;
+struct ReshapeDescriptor;
+struct ResizeBilinearDescriptor;
+struct ResizeDescriptor;
+struct ReduceDescriptor;
+struct SliceDescriptor;
+struct SoftmaxDescriptor;
+struct SpaceToBatchNdDescriptor;
+struct SpaceToDepthDescriptor;
+struct StackDescriptor;
+struct StandInDescriptor;
+struct StridedSliceDescriptor;
+struct TransposeConvolution2dDescriptor;
+struct TransposeDescriptor;
+struct ViewsDescriptor;
+
+using ConcatDescriptor       = OriginsDescriptor;
+using DepthToSpaceDescriptor = SpaceToDepthDescriptor;
+using LogSoftmaxDescriptor   = SoftmaxDescriptor;
+/// MergerDescriptor is deprecated, use ConcatDescriptor instead
+using MergerDescriptor       = OriginsDescriptor;
+using SplitterDescriptor     = ViewsDescriptor;
+using UnidirectionalSequenceLstmDescriptor = LstmDescriptor;
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/Exceptions.hpp b/arch/arm/ARMnn/include/armnn/Exceptions.hpp
new file mode 100644
index 0000000000..980b57449d
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Exceptions.hpp
@@ -0,0 +1,209 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace armnn
+{
+
+struct CheckLocation
+{
+    const char* m_Function;
+    const char* m_File;
+    unsigned int m_Line;
+
+    CheckLocation(const char* func,
+                  const char* file,
+                  unsigned int line)
+    : m_Function{func}
+    , m_File{file}
+    , m_Line{line}
+    {
+    }
+
+    std::string AsString() const
+    {
+        std::stringstream ss;
+        ss << " at function " << m_Function
+           << " [" << m_File << ':' << m_Line << "]";
+        return ss.str();
+    }
+
+    std::string FileLine() const
+    {
+        std::stringstream ss;
+        ss << " [" << m_File << ':' << m_Line << "]";
+        return ss.str();
+    }
+};
+
+/// Base class for all ArmNN exceptions so that users can filter to just those.
+class Exception : public std::exception
+{
+public:
+    explicit Exception(const std::string& message);
+
+    /// exception with context
+    explicit Exception(const std::string& message,
+                       const CheckLocation& location);
+
+    /// preserving previous exception context
+    /// and adding local context information
+    explicit Exception(const Exception& other,
+                       const std::string& message,
+                       const CheckLocation& location);
+
+    virtual const char* what() const noexcept override;
+
+private:
+    std::string m_Message;
+};
+
+/// Class for non-fatal exceptions raised while initialising a backend
+class BackendUnavailableException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+class ClRuntimeUnavailableException : public BackendUnavailableException
+{
+public:
+    using BackendUnavailableException::BackendUnavailableException;
+};
+
+class InvalidArgumentException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+class FileNotFoundException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+class ParseException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+class UnimplementedException : public Exception
+{
+public:
+    using Exception::Exception;
+    UnimplementedException();
+};
+
+class LayerValidationException : public Exception
+{
+    using Exception::Exception;
+};
+
+class GraphValidationException : public Exception
+{
+    using Exception::Exception;
+};
+
+class BadOptionalAccessException : public Exception
+{
+    using Exception::Exception;
+};
+
+class RuntimeException : public Exception
+{
+    using Exception::Exception;
+};
+
+class MemoryImportException : public Exception
+{
+    using Exception::Exception;
+};
+
+class MemoryExportException : public Exception
+{
+    using Exception::Exception;
+};
+
+class TimeoutException : public Exception
+{
+    using Exception::Exception;
+};
+
+class BackendProfilingException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+class PolymorphicDowncastException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+class NullPointerException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+class BackendCapabilityException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+class MemoryValidationException : public Exception
+{
+public:
+    using Exception::Exception;
+};
+
+template <typename ExceptionType>
+void ConditionalThrow(bool condition, const std::string& message)
+{
+    if (!condition)
+    {
+        throw ExceptionType(message);
+    }
+}
+
+template <typename ExceptionType>
+void ConditionalThrow(bool condition)
+{
+    if (!condition)
+    {
+        throw ExceptionType();
+    }
+}
+
+
+///
+/// ComparedType must support:
+///   operator==(const ComparedType&)
+///   operator<<(ostream&, const ComparedType&)
+///
+template <typename ExceptionType, typename ComparedType>
+void ConditionalThrowIfNotEqual(const std::string& message,
+                                const ComparedType& leftHandSide,
+                                const ComparedType& rightHandSide)
+{
+    if (!(leftHandSide == rightHandSide))
+    {
+        std::stringstream ss;
+        ss << message << " : " << leftHandSide << " != " << rightHandSide;
+        throw ExceptionType(ss.str());
+    }
+}
+
+} // namespace armnn
+
+#define CHECK_LOCATION() armnn::CheckLocation(__func__, __FILE__, __LINE__)
diff --git a/arch/arm/ARMnn/include/armnn/IAsyncExecutionCallback.hpp b/arch/arm/ARMnn/include/armnn/IAsyncExecutionCallback.hpp
new file mode 100644
index 0000000000..3e0cacccee
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/IAsyncExecutionCallback.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Types.hpp"
+
+namespace armnn
+{
+
+namespace experimental
+{
+
+class IAsyncExecutionCallback;
+using IAsyncExecutionCallbackPtr = std::shared_ptr<IAsyncExecutionCallback>;
+
+class IAsyncExecutionCallback
+{
+public:
+    virtual ~IAsyncExecutionCallback() {};
+
+    // Notify the AsyncExecutionCallback object of the armnn execution status
+    virtual void Notify(armnn::Status status, InferenceTimingPair timeTaken) = 0;
+};
+
+} // experimental
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/ILayerVisitor.hpp b/arch/arm/ARMnn/include/armnn/ILayerVisitor.hpp
new file mode 100644
index 0000000000..3961ae347a
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/ILayerVisitor.hpp
@@ -0,0 +1,514 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Deprecated.hpp>
+#include <armnn/DescriptorsFwd.hpp>
+#include <armnn/NetworkFwd.hpp>
+#include <armnn/Optional.hpp>
+#include <armnn/TensorFwd.hpp>
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+class ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable IStrategy instead.", "22.05") ILayerVisitor
+{
+protected:
+    ILayerVisitor() {}
+    virtual ~ILayerVisitor() {}
+
+public:
+
+    /// Function that an activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param activationDescriptor - ActivationDescriptor to configure the activation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitActivationLayer(const IConnectableLayer* layer,
+                                      const ActivationDescriptor& activationDescriptor,
+                                      const char* name = nullptr) = 0;
+
+    /// Function that an addition layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitAdditionLayer(const IConnectableLayer* layer,
+                                    const char* name = nullptr) = 0;
+
+    /// Function that an arg min max layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param argMinMaxDescriptor - ArgMinMaxDescriptor to configure the activation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitArgMinMaxLayer(const IConnectableLayer* layer,
+                                     const ArgMinMaxDescriptor& argMinMaxDescriptor,
+                                     const char* name = nullptr) = 0;
+
+    /// Function that a batch normalization layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param mean - Pre-calculated mean for each channel.
+    /// @param variance - Pre-calculated variance for each channel.
+    /// @param beta - Per-channel additive factor.
+    /// @param gamma - Per-channel multiplicative factor.
+    /// @param name - Optional name for the layer.
+    virtual void VisitBatchNormalizationLayer(const IConnectableLayer* layer,
+                                              const BatchNormalizationDescriptor& desc,
+                                              const ConstTensor& mean,
+                                              const ConstTensor& variance,
+                                              const ConstTensor& beta,
+                                              const ConstTensor& gamma,
+                                              const char* name = nullptr) = 0;
+
+    /// Function that a batch to space ND layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param batchToSpaceNdDescriptor - Description of the layer.
+    /// @param name - Optional name for the layer.
+    virtual void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
+                                          const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+                                          const char* name = nullptr) = 0;
+
+    /// Function a Comparison layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param comparisonDescriptor - Description of the layer.
+    /// @param name - Optional name for the layer.
+    virtual void VisitComparisonLayer(const IConnectableLayer* layer,
+                                      const ComparisonDescriptor& comparisonDescriptor,
+                                      const char* name = nullptr) = 0;
+
+    /// Function that a concat layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param concatDescriptor - ConcatDescriptor (synonym for OriginsDescriptor) to configure the concatenation
+    ///                           process. Number of Views must be equal to the number of inputs, and their order
+    ///                           must match - e.g. first view corresponds to the first input, second view to the
+    ///                           second input, etc....
+    /// @param name - Optional name for the layer.
+    virtual void VisitConcatLayer(const IConnectableLayer* layer,
+                                  const OriginsDescriptor& concatDescriptor,
+                                  const char* name = nullptr) = 0;
+
+    /// Function a layer with no inputs and a single output, which always corresponds to
+    /// the passed in constant tensor should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param input - Tensor to be provided as the only output of the layer. The layer will maintain
+    ///                its own copy of the tensor data, meaning the memory referenced by @a input can
+    ///                be freed or reused after this function is called.
+    /// @param name - Optional name for the layer.
+    virtual void VisitConstantLayer(const IConnectableLayer* layer,
+                                    const ConstTensor& input,
+                                    const char* name = nullptr) = 0;
+
+    /// Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param convolution2dDescriptor - Description of the 2D convolution layer.
+    /// @param weights - Tensor for the weights data.
+    /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape.
+    /// @param name - Optional name for the layer.
+    virtual void VisitConvolution2dLayer(const IConnectableLayer* layer,
+                                         const Convolution2dDescriptor& convolution2dDescriptor,
+                                         const ConstTensor& weights,
+                                         const Optional<ConstTensor>& biases,
+                                         const char* name = nullptr) = 0;
+
+    /// Function a depth to space layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param depthToSpaceDescriptor - Parameters for the depth to space operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitDepthToSpaceLayer(const IConnectableLayer* layer,
+                                        const DepthToSpaceDescriptor& depthToSpaceDescriptor,
+                                        const char* name = nullptr) = 0;
+
+    /// Function that a 2D depthwise convolution layer with biases should call back to when its
+    /// Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
+    /// @param weights - Tensor for the weights. Expected format: [channelMultiplier, inputChannels, height, width].
+    /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape.
+    /// @param name - Optional name for the layer.
+    virtual void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
+                                                  const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
+                                                  const ConstTensor& weights,
+                                                  const Optional<ConstTensor>& biases,
+                                                  const char* name = nullptr) = 0;
+
+    /// Function that a Dequantize layer should call back to when its
+    /// Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitDequantizeLayer(const IConnectableLayer* layer,
+                                      const char* name = nullptr) = 0;
+
+    /// Function that a Detection PostProcess layer should call back to when its
+    /// Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param descriptor - Description of the Detection PostProcess layer.
+    /// @param anchors - Tensor for the anchors.
+    /// @param name - Optional name for the layer.
+    virtual void VisitDetectionPostProcessLayer(const IConnectableLayer* layer,
+                                                const DetectionPostProcessDescriptor& descriptor,
+                                                const ConstTensor& anchors,
+                                                const char* name = nullptr) = 0;
+
+    /// Function a division layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitDivisionLayer(const IConnectableLayer* layer,
+                                    const char* name = nullptr) = 0;
+
+    /// Function a ElementwiseUnary layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param elementwiseUnaryDescriptor - Description of the layer.
+    /// @param name - Optional name for the layer.
+    virtual void VisitElementwiseUnaryLayer(const IConnectableLayer* layer,
+                                            const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
+                                            const char* name = nullptr) = 0;
+
+    /// Function a fill layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param fillDescriptor - Description of the layer
+    /// @param name - Optional name for the layer.
+    virtual void VisitFillLayer(const IConnectableLayer* layer,
+                                const FillDescriptor& fillDescriptor,
+                                const char* name = nullptr) = 0;
+
+    /// Function a floor layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitFloorLayer(const IConnectableLayer* layer,
+                                 const char* name = nullptr) = 0;
+
+
+    /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param fullyConnectedDescriptor - Description of the fully connected layer.
+    /// @param name - Optional name for the layer.
+    virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer,
+                                          const FullyConnectedDescriptor& fullyConnectedDescriptor,
+                                          const char* name = nullptr) = 0;
+
+    /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param fullyConnectedDescriptor - Description of the fully connected layer.
+    /// @param weights - Tensor for the weights data.
+    /// @param biases - Optional tensor for the bias data.
+    /// @param name - Optional name for the layer.
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use VisitFullyConnectedLayer without ConstTensors", "22.05")
+    virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer,
+                                          const FullyConnectedDescriptor& fullyConnectedDescriptor,
+                                          const ConstTensor& weights,
+                                          const Optional<ConstTensor>& biases,
+                                          const char* name = nullptr) = 0;
+
+    /// Function a Gather layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param gatherDescriptor - Parameters for the gather operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitGatherLayer(const IConnectableLayer* layer,
+                                  const GatherDescriptor& gatherDescriptor,
+                                  const char* name = nullptr) = 0;
+
+    /// Function that an InputLayer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param id - User generated id to uniquely identify a particular input. The same id needs to be specified
+    ///             when passing the inputs to the IRuntime::EnqueueWorkload() function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitInputLayer(const IConnectableLayer* layer,
+                                 LayerBindingId id,
+                                 const char* name = nullptr) = 0;
+
+    /// Function that an instance normalization layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param desc - Parameters for the instance normalization operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitInstanceNormalizationLayer(const IConnectableLayer* layer,
+                                                 const InstanceNormalizationDescriptor& desc,
+                                                 const char* name = nullptr) = 0;
+
+    /// Function that an L2 normalization layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked. Normalization is performed along dimension 1, but requires a 4d input.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param desc - Parameters for the L2 normalization operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitL2NormalizationLayer(const IConnectableLayer* layer,
+                                           const L2NormalizationDescriptor& desc,
+                                           const char* name = nullptr) = 0;
+
+    /// Function that a log softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param logSoftmaxDescriptor - LogSoftmaxDescriptor to configure the log softmax.
+    /// @param name - Optional name for the layer.
+    virtual void VisitLogSoftmaxLayer(const IConnectableLayer* layer,
+                                      const LogSoftmaxDescriptor& logSoftmaxDescriptor,
+                                      const char* name = nullptr) = 0;
+
+    /// Function that a logical binary layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param logicalBinaryDescriptor - LogicalBinaryDescriptor to configure the logical unary layer.
+    /// @param name - Optional name for the layer.
+    virtual void VisitLogicalBinaryLayer(const IConnectableLayer* layer,
+                                         const LogicalBinaryDescriptor& logicalBinaryDescriptor,
+                                         const char* name = nullptr) = 0;
+
+    /// Function an Lstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param descriptor - Parameters controlling the operation of the Lstm operation.
+    /// @param params - The weights and biases for the LSTM cell.
+    /// @param name - Optional name for the layer.
+    virtual void VisitLstmLayer(const IConnectableLayer* layer,
+                                const LstmDescriptor& descriptor,
+                                const LstmInputParams& params,
+                                const char* name = nullptr) = 0;
+
+    /// Function a Maximum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitMaximumLayer(const IConnectableLayer* layer,
+                                   const char* name = nullptr) = 0;
+
+    /// Function a Mean layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param meanDescriptor - Parameters for the mean operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitMeanLayer(const IConnectableLayer* layer,
+                                const MeanDescriptor& meanDescriptor,
+                                const char* name = nullptr) = 0;
+
+    /// Function that a merge layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitMergeLayer(const IConnectableLayer* layer,
+                                 const char* name = nullptr) = 0;
+
+    /// Function a Minimum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitMinimumLayer(const IConnectableLayer* layer,
+                                   const char* name = nullptr) = 0;
+
+    /// Function that a multiplication layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitMultiplicationLayer(const IConnectableLayer* layer,
+                                          const char* name = nullptr) = 0;
+
+    /// Function that a normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param normalizationDescriptor - NormalizationDescriptor to configure the normalization.
+    /// @param name - Optional name for the layer.
+    virtual void VisitNormalizationLayer(const IConnectableLayer* layer,
+                                         const NormalizationDescriptor& normalizationDescriptor,
+                                         const char* name = nullptr) = 0;
+
+    /// Function an output layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param id - User generated id to uniquely identify a particular output. The same id needs to be specified
+    /// when passing the outputs to the IRuntime::EnqueueWorkload() function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitOutputLayer(const IConnectableLayer* layer,
+                                  LayerBindingId id,
+                                  const char* name = nullptr) = 0;
+
+    /// Function a pad layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param paddings - n by 2 tensor, where n is the rank of the input tensor,
+    ///                   such that paddings[i,0] indicates the amount of padding to add in front of dimension i, and
+    ///                   paddings[i,1] indicates the amount of padding to add after the end of dimension i
+    /// @param name - Optional name for the layer.
+    virtual void VisitPadLayer(const IConnectableLayer* layer,
+                               const PadDescriptor& padDescriptor,
+                               const char* name = nullptr) = 0;
+
+    /// Function that a permute layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param permuteDescriptor - PermuteDescriptor to configure the permute.
+    /// @param name - Optional name for the layer.
+    virtual void VisitPermuteLayer(const IConnectableLayer* layer,
+                                   const PermuteDescriptor& permuteDescriptor,
+                                   const char* name = nullptr) = 0;
+
+    /// Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param pooling2dDescriptor - Pooling2dDescriptor to configure the pooling.
+    /// @param name - Optional name for the layer.
+    virtual void VisitPooling2dLayer(const IConnectableLayer* layer,
+                                     const Pooling2dDescriptor& pooling2dDescriptor,
+                                     const char* name = nullptr) = 0;
+
+    /// Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param pooling3dDescriptor - Pooling3dDescriptor to configure the pooling.
+    /// @param name - Optional name for the layer.
+    virtual void VisitPooling3dLayer(const IConnectableLayer* layer,
+                                     const Pooling3dDescriptor& pooling3dDescriptor,
+                                     const char* name = nullptr) = 0;
+
+    /// Function that a PReLU activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitPreluLayer(const IConnectableLayer* layer,
+                                 const char* name = nullptr) = 0;
+
+    /// Function a quantize layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitQuantizeLayer(const IConnectableLayer* layer,
+                                    const char* name = nullptr) = 0;
+
+    /// Function a QLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param descriptor - Parameters controlling the operation of the QLstm operation.
+    /// @param params - The weights and biases for the layer
+    /// @param name - Optional name for the layer.
+    virtual void VisitQLstmLayer(const IConnectableLayer* layer,
+                                 const QLstmDescriptor& descriptor,
+                                 const LstmInputParams& params,
+                                 const char* name = nullptr) = 0;
+
+    /// Function a QuantizedLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param params - The weights and biases for the Quantized LSTM cell
+    /// @param name - Optional name for the layer.
+    virtual void VisitQuantizedLstmLayer(const IConnectableLayer* layer,
+                                         const QuantizedLstmInputParams& params,
+                                         const char* name = nullptr) = 0;
+
+    /// Function a rank layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitRankLayer(const IConnectableLayer* layer,
+                                const char* name = nullptr) = 0;
+
+    /// Function that a reduce layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param ReduceDescriptor - Parameters for the reduce max operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitReduceLayer(const IConnectableLayer* layer,
+                                  const ReduceDescriptor& reduceDescriptor,
+                                  const char* name = nullptr) = 0;
+
+    /// Function a reshape layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param reshapeDescriptor - Parameters for the reshape operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitReshapeLayer(const IConnectableLayer* layer,
+                                   const ReshapeDescriptor& reshapeDescriptor,
+                                   const char* name = nullptr) = 0;
+
+    /// Function that a resize layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param resizeDescriptor - Parameters for the resize operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitResizeLayer(const IConnectableLayer* layer,
+                                  const ResizeDescriptor& resizeDescriptor,
+                                  const char* name = nullptr) = 0;
+
+    /// Function that a slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param sliceDescriptor - SliceDescriptor to configure the slice operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitSliceLayer(const IConnectableLayer* layer,
+                                 const SliceDescriptor& sliceDescriptor,
+                                 const char* name = nullptr) = 0;
+
+
+    /// Function that a softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param softmaxDescriptor - SoftmaxDescriptor to configure the softmax.
+    /// @param name - Optional name for the layer.
+    virtual void VisitSoftmaxLayer(const IConnectableLayer* layer,
+                                   const SoftmaxDescriptor& softmaxDescriptor,
+                                   const char* name = nullptr) = 0;
+
+    /// Function a space to batch layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param spaceToBatchNdDescriptor - Parameters for the space to batch operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
+                                          const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
+                                          const char* name = nullptr) = 0;
+
+    /// Function a space to depth layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param spaceToDepthDescriptor - Parameters for the space to depth operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitSpaceToDepthLayer(const IConnectableLayer* layer,
+                                        const SpaceToDepthDescriptor& spaceToDepthDescriptor,
+                                        const char* name = nullptr) = 0;
+
+    /// Function that a splitter layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param splitterDescriptor - ViewsDescriptor to configure the splitting process.
+    ///                             Number of Views must be equal to the number of outputs,
+    ///                             and their order must match - e.g. first view corresponds to
+    ///                             the first output, second view to the second output, etc....
+    /// @param name - Optional name for the layer.
+    virtual void VisitSplitterLayer(const IConnectableLayer* layer,
+                                    const ViewsDescriptor& splitterDescriptor,
+                                    const char* name = nullptr) = 0;
+
+    /// Function a stack layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param stackDescriptor - Parameters for the stack operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitStackLayer(const IConnectableLayer* layer,
+                                 const StackDescriptor& stackDescriptor,
+                                 const char* name = nullptr) = 0;
+
+    /// Function a StandInLayer should call back to when its Accept(ILaterVisitor&) function is invoked
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param standInDescriptor - Parameters for the stand-in layer.
+    /// @param name - Optional name for the layer.
+    virtual void VisitStandInLayer(const IConnectableLayer* layer,
+                                   const StandInDescriptor& standInDescriptor,
+                                   const char* name = nullptr) = 0;
+
+    /// Function a strided slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param stridedSliceDescriptor - Parameters for the strided slice operation.
+    /// @param name - Optional name for the layer.
+    virtual void VisitStridedSliceLayer(const IConnectableLayer* layer,
+                                        const StridedSliceDescriptor& stridedSliceDescriptor,
+                                        const char* name = nullptr) = 0;
+
+    /// Function a subtraction layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitSubtractionLayer(const IConnectableLayer* layer,
+                                       const char* name = nullptr) = 0;
+
+    /// Function a switch layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param name - Optional name for the layer.
+    virtual void VisitSwitchLayer(const IConnectableLayer* layer,
+                                  const char* name = nullptr) = 0;
+
+    /// Function that a 2D transpose convolution layer should call back to when its Accept(ILayerVisitor&)
+    /// function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param descriptor - Description of the 2D transpose convolution layer.
+    /// @param weights - Tensor for the weights data.
+    /// @param biases - Optional tensor for the bias data.
+    /// @param name - Optional name for the layer.
+    virtual void VisitTransposeConvolution2dLayer(const IConnectableLayer* layer,
+                                                  const TransposeConvolution2dDescriptor& descriptor,
+                                                  const ConstTensor& weights,
+                                                  const Optional<ConstTensor>& biases,
+                                                  const char* name = nullptr) = 0;
+
+    /// Function that a transpose  layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+    /// @param layer - pointer to the layer which is calling back to this visit function.
+    /// @param transposeDescriptor - TransposeDescriptor to configure the transpose.
+    /// @param name - Optional name for the layer.
+    virtual void VisitTransposeLayer(const IConnectableLayer* layer,
+                                     const TransposeDescriptor& transposeDescriptor,
+                                     const char* name = nullptr) = 0;
+
+    virtual void StartVisit() {}
+    virtual void FinishVisit() {}
+
+};
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/INetwork.hpp b/arch/arm/ARMnn/include/armnn/INetwork.hpp
new file mode 100644
index 0000000000..505edf86c9
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/INetwork.hpp
@@ -0,0 +1,845 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/BackendOptions.hpp>
+#include <armnn/Deprecated.hpp>
+#include <armnn/DescriptorsFwd.hpp>
+#include <armnn/ILayerVisitor.hpp>
+#include <armnn/IStrategy.hpp>
+#include <armnn/NetworkFwd.hpp>
+#include <armnn/Optional.hpp>
+#include <armnn/TensorFwd.hpp>
+#include <armnn/Logging.hpp>
+#include <armnn/backends/TensorHandle.hpp>
+
+#include <memory>
+#include <vector>
+
+namespace armnn
+{
+/// @brief An input connection slot for a layer.
+/// The input slot can be connected to an output slot of the preceding layer in the graph.
+/// Only one connection to the input slot is allowed.
+class IInputSlot
+{
+public:
+    virtual const IOutputSlot* GetConnection() const = 0;
+    virtual IOutputSlot* GetConnection() = 0;
+    virtual const IConnectableLayer& GetOwningIConnectableLayer() const = 0;
+
+protected:
+   /// Not user deletable.
+    ~IInputSlot() {}
+};
+
+/// @brief An output connection slot for a layer.
+/// The output slot may be connected to 1 or more input slots of subsequent layers in the graph.
+class IOutputSlot
+{
+public:
+    virtual unsigned int GetNumConnections() const = 0;
+    virtual const IInputSlot* GetConnection(unsigned int index) const = 0;
+    virtual IInputSlot* GetConnection(unsigned int index) = 0;
+
+    virtual void SetTensorInfo(const TensorInfo& tensorInfo) = 0;
+    virtual const TensorInfo& GetTensorInfo() const = 0;
+    virtual bool IsTensorInfoSet() const = 0;
+
+    virtual int Connect(IInputSlot& destination) = 0;
+    virtual void Disconnect(IInputSlot& slot) = 0;
+
+    virtual unsigned int CalculateIndexOnOwner() const = 0;
+
+    virtual LayerGuid GetOwningLayerGuid() const = 0;
+
+    virtual const IConnectableLayer& GetOwningIConnectableLayer() const = 0;
+
+protected:
+    /// Not user deletable.
+    ~IOutputSlot() {}
+};
+
+/// @brief Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
+class IConnectableLayer
+{
+public:
+    /// Returns the name of the layer
+    virtual const char* GetName() const = 0;
+
+    /// Returns the number of connectable input slots
+    virtual unsigned int GetNumInputSlots() const = 0;
+
+    /// Returns the number of connectable output slots
+    virtual unsigned int GetNumOutputSlots() const = 0;
+
+    /// Get a const input slot handle by slot index
+    virtual const IInputSlot& GetInputSlot(unsigned int index) const = 0;
+
+    /// Get the input slot handle by slot index
+    virtual IInputSlot& GetInputSlot(unsigned int index) = 0;
+
+    /// Get the const output slot handle by slot index
+    virtual const IOutputSlot& GetOutputSlot(unsigned int index) const = 0;
+
+    /// Get the output slot handle by slot index
+    virtual IOutputSlot& GetOutputSlot(unsigned int index) = 0;
+
+    /// Infer the shape of the output(s) based on the provided input shape(s)
+    virtual std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const = 0;
+
+    /// Returns the unique id of the layer
+    virtual LayerGuid GetGuid() const = 0;
+
+    // The Accept function needs to be wrapped in a no warn macro to avoid deprecation warnings from
+    // the deprecated ILayerVisitor which is used in the function.
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
+    /// Apply a visitor to this layer
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Accept is deprecated. The ILayerVisitor that works in conjunction with this "
+                                      "Accept function is deprecated. Use IStrategy in combination with "
+                                      "ExecuteStrategy instead, which is an ABI/API stable version of the "
+                                      "visitor pattern.",
+                                      "22.05")
+    virtual void Accept(ILayerVisitor& visitor) const = 0;
+    ARMNN_NO_DEPRECATE_WARN_END
+
+    /// Apply a visitor to this layer
+    virtual void ExecuteStrategy(IStrategy& strategy) const = 0;
+
+    /// Provide a hint for the optimizer as to which backend to prefer for this layer
+    virtual void BackendSelectionHint(Optional<BackendId> backend) = 0;
+
+    /// Returns the armnn::LayerType of this layer
+    virtual LayerType GetType() const = 0;
+
+    /// If the layer has a descriptor return it.
+    /// The base descriptor can then be cast to the correct descriptor class.
+    /// If the layer has no associated descriptor a struct of type NullDescriptor will be returned.
+    /// Note: NullDescriptors can be detected because they return true when
+    /// the BaseDescriptor IsNull function is invoked.
+    virtual const BaseDescriptor& GetParameters() const = 0;
+
+    using ConstantTensors = std::vector<std::reference_wrapper<std::shared_ptr<ConstTensorHandle>>>;
+
+    // Returns ConstantTensors of this Layer if it has any, otherwise returns empty vector.
+    virtual ConstantTensors GetConstantTensorsByRef() = 0;
+
+protected:
+      /// Objects are not deletable via the handle
+    ~IConnectableLayer() {}
+};
+
+
+/// ArmNN performs an optimization on each model/network before it gets loaded for execution. OptimizerOptions provides
+/// a set of features that allows the user to customize this optimization on a per model basis.
+struct OptimizerOptions
+{
+    OptimizerOptions()
+        : m_ReduceFp32ToFp16(false)
+        , m_Debug(false)
+        , m_ReduceFp32ToBf16(false)
+        , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly)
+        , m_ImportEnabled(false)
+        , m_ModelOptions()
+        , m_ProfilingEnabled(false)
+    {}
+
+    OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16, bool importEnabled,
+                     ModelOptions modelOptions = {})
+        : m_ReduceFp32ToFp16(reduceFp32ToFp16)
+        , m_Debug(debug)
+        , m_ReduceFp32ToBf16(reduceFp32ToBf16)
+        , m_shapeInferenceMethod(armnn::ShapeInferenceMethod::ValidateOnly)
+        , m_ImportEnabled(importEnabled)
+        , m_ModelOptions(modelOptions)
+        , m_ProfilingEnabled(false)
+    {
+        if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16)
+        {
+            throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
+        }
+    }
+
+    OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false,
+                     ShapeInferenceMethod shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly,
+                     bool importEnabled = false, ModelOptions modelOptions = {})
+        : m_ReduceFp32ToFp16(reduceFp32ToFp16)
+        , m_Debug(debug)
+        , m_ReduceFp32ToBf16(reduceFp32ToBf16)
+        , m_shapeInferenceMethod(shapeInferenceMethod)
+        , m_ImportEnabled(importEnabled)
+        , m_ModelOptions(modelOptions)
+        , m_ProfilingEnabled(false)
+    {
+        if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16)
+        {
+            throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
+        }
+    }
+
+    const std::string ToString() const
+    {
+        std::stringstream stream;
+        stream << "OptimizerOptions: \n";
+        stream << "\tReduceFp32ToFp16: " << m_ReduceFp32ToFp16 << "\n";
+        stream << "\tReduceFp32ToBf16: " << m_ReduceFp32ToBf16 << "\n";
+        stream << "\tDebug: " << m_Debug << "\n";
+        stream << "\tShapeInferenceMethod: " <<
+        (m_shapeInferenceMethod == ShapeInferenceMethod::ValidateOnly ? "ValidateOnly" : "InferAndValidate") << "\n";
+        stream << "\tImportEnabled: " << m_ImportEnabled << "\n";
+        stream << "\tProfilingEnabled: " << m_ProfilingEnabled << "\n";
+
+        stream << "\tModelOptions: \n";
+        for (auto optionsGroup : m_ModelOptions)
+        {
+            for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
+            {
+                const armnn::BackendOptions::BackendOption option = optionsGroup.GetOption(i);
+                stream << "\t\tBackend: "  << optionsGroup.GetBackendId() << "\n"
+                       << "\t\t\tOption: " << option.GetName() << "\n"
+                       << "\t\t\tValue: "  << std::string(option.GetValue().ToString()) << "\n";
+            }
+        }
+
+        return stream.str();
+    }
+
+    /// Reduces all Fp32 operators in the model to Fp16 for faster processing.
+    /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers
+    ///       between layers that weren't in Fp32 in the first place or if the operator is not supported in Fp16.
+    ///       The overhead of these conversions can lead to a slower overall performance if too many conversions are
+    ///       required.
+    bool m_ReduceFp32ToFp16;
+
+    // Add debug data for easier troubleshooting
+    bool m_Debug;
+
+    /// Reduces all Fp32 operators in the model to Bf16 for faster processing.
+    /// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers
+    ///       between layers that weren't in Fp32 in the first place or if the operator is not supported in Bf16.
+    ///       The overhead of these conversions can lead to a slower overall performance if too many conversions are
+    ///       required.
+    bool m_ReduceFp32ToBf16;
+
+    // Infer output size when not available
+    ShapeInferenceMethod m_shapeInferenceMethod;
+
+    // Enable Import
+    bool m_ImportEnabled;
+
+    // Enable Model Options
+    ModelOptions m_ModelOptions;
+
+    // Enable profiling dump of the optimizer phase
+    bool m_ProfilingEnabled;
+};
+
+class IWorkloadFactory;
+class NetworkImpl;
+using INetworkPtr = std::unique_ptr<INetwork, void(*)(INetwork* network)>;
+using IOptimizedNetworkPtr = std::unique_ptr<IOptimizedNetwork, void(*)(IOptimizedNetwork* network)>;
+
+using CompiledBlobDeleter = std::function<void(const void*)>;
+using CompiledBlobPtr = std::unique_ptr<void, CompiledBlobDeleter>;
+
+/// Main network class which provides the interface for building up a neural network.
+/// This object is subsequently required by the IRuntime::Load() method.
+class INetwork
+{
+public:
+    static INetwork* CreateRaw(NetworkOptions networkOptions = {});
+    static INetworkPtr Create(NetworkOptions networkOptions = {});
+    static void Destroy(INetwork* network);
+
+    Status PrintGraph();
+
+    /// Adds an input layer to the network.
+    /// @param id - User generated id to uniquely identify a particular input. The same id needs to be specified.
+    /// when passing the inputs to the IRuntime::EnqueueWorkload() function.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddInputLayer(LayerBindingId id, const char* name = nullptr);
+
+    /// Adds an ArgMinMax layer to the network.
+    /// @param desc - Parameters for the L2 normalization operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddArgMinMaxLayer(const ArgMinMaxDescriptor& desc,
+                                         const char* name = nullptr);
+
+    /// Adds a cast layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddCastLayer(const char* name = nullptr);
+
+    /// Add a Comparison layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @param desc - Descriptor for the comparison operation.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddComparisonLayer(const ComparisonDescriptor& comparisonDescriptor,
+                                          const char* name = nullptr);
+
+    /// Adds a concatenation layer to the network.
+    /// @param concatDescriptor - ConcatDescriptor (synonym for OriginsDescriptor) to configure the concatenation
+    ///                           process. Number of Views must be equal to the number of inputs, and their order
+    ///                           must match - e.g. first view corresponds to the first input, second view to the
+    ///                           second input, etc....
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddConcatLayer(const ConcatDescriptor& concatDescriptor,
+                                      const char* name = nullptr);
+
+    /// Adds a 2D convolution layer to the network.
+    /// @param convolution2dDescriptor - Description of the 2D convolution layer.
+    /// @param weights - Tensor for the weights data.
+    /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
+                                             const ConstTensor& weights,
+                                             const Optional<ConstTensor>& biases,
+                                             const char* name = nullptr);
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This AddConvolution2dLayer overload is deprecated", "22.08")
+    IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
+                                             const ConstTensor& weights,
+                                             const char* name = nullptr);
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This AddConvolution2dLayer overload is deprecated", "22.08")
+    IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
+                                             const ConstTensor& weights,
+                                             const ConstTensor& biases,
+                                             const char* name = nullptr);
+
+    /// Adds a 3D convolution layer to the network.
+    /// @param convolution3dDescriptor - Description of the 3D convolution layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddConvolution3dLayer(const Convolution3dDescriptor& convolution3dDescriptor,
+                                             const char* name = nullptr);
+
+    /// Adds a depth to space layer to the network.
+    /// @param depthToSpaceDescriptor - Parameters for the depth to space operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddDepthToSpaceLayer(const DepthToSpaceDescriptor& depthToSpaceDescriptor,
+                                            const char* name = nullptr);
+
+    /// Adds a 2D depthwise convolution layer to the network.
+    /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
+    /// @param weights - Tensor for the weights. Expected format: [channelMultiplier, inputChannels, height, width].
+    /// @param biases Optional tensor for the bias data. If specified, must match the output tensor shape.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddDepthwiseConvolution2dLayer(
+        const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
+        const ConstTensor& weights,
+        const Optional<ConstTensor>& biases,
+        const char* name = nullptr);
+
+    /// Adds a Dequantize layer to the network.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddDequantizeLayer(const char* name = nullptr);
+
+    /// Adds a Detection PostProcess layer to the network.
+    /// @param descriptor - Description of the Detection PostProcess layer.
+    /// @param anchors - Tensor for anchors.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddDetectionPostProcessLayer(
+        const DetectionPostProcessDescriptor& descriptor,
+        const ConstTensor& anchors,
+        const char* name = nullptr);
+
+    /// Add an ElementwiseUnary layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @param desc - Descriptor for the elementwiseUnary operation.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddElementwiseUnaryLayer(const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
+                                                const char* name = nullptr);
+
+    /// Add an Fill layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @param fillDescriptor - Descriptor for the fill operation.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddFillLayer(const FillDescriptor& fillDescriptor,
+                                    const char* name = nullptr);
+
+
+    /// Adds a fully connected layer to the network.
+    /// @param fullyConnectedDescriptor - Description of the fully connected layer.
+    /// @return - Interface for configuring the layer.
+    ///
+    /// @note Weights and biases are passed in as inputs. If they are constant tensors you can simply store
+    ///       them in a ConstantLayer as seen below. A full example can be found in samples/SimpleSample.cpp.
+    ///
+    /// @code
+    /// // Make sure the IsConstant flag is set on the weightsInfo before passing it to the ConstTensor.
+    /// ConstTensor weights(weightsInfo, weightsData);
+    ///
+    /// // Constant layer that now holds weights data for FullyConnected
+    /// IConnectableLayer* const constantWeightsLayer = myNetwork->AddConstantLayer(weights, "weights");
+    ///
+    /// FullyConnectedDescriptor fullyConnectedDesc;
+    /// IConnectableLayer* const fullyConnectedLayer = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
+    ///                                                                                  "fully connected");
+    /// IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
+    /// InputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
+    /// constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
+    /// @endcode
+    IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
+                                              const char* name = nullptr);
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This AddFullyConnectedLayer overload is deprecated", "22.05")
+    IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
+                                              const Optional<ConstTensor>& weights,
+                                              const Optional<ConstTensor>& biases,
+                                              const char* name = nullptr);
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This AddFullyConnectedLayer overload is deprecated", "22.05")
+    IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor,
+                                              const ConstTensor& weights,
+                                              const Optional<ConstTensor>& biases,
+                                              const char* name = nullptr);
+
+    /// Adds a permute layer to the network.
+    /// @param permuteDescriptor - PermuteDescriptor to configure the permute.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
+                                       const char* name = nullptr);
+
+    /// Adds a batch to space ND layer to the network.
+    /// @param batchToSpaceNdDescriptor - Description of the layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+                                              const char* name = nullptr);
+
+    /// Adds a 2D pooling layer to the network.
+    /// @param pooling2dDescriptor - Pooling2dDescriptor to configure the pooling.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddPooling2dLayer(const Pooling2dDescriptor& pooling2dDescriptor,
+        const char* name = nullptr);
+
+    /// Adds a 3D pooling layer to the network.
+    /// @param pooling3dDescriptor - Pooling3dDescriptor to configure the pooling.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddPooling3dLayer(const Pooling3dDescriptor& pooling3dDescriptor,
+        const char* name = nullptr);
+
+    /// Adds a Precompiled layer to the network.
+    /// Method use is for backend users.
+    /// @param preCompiledDescriptor - PreCompiledDescriptor contains parameters for the Precompiled layer.
+    /// @param compiledBlobPtr - CompiledBlobPtr pre-compiled object set for the Precompiled layer.
+    /// @param backend - optional BackendId set for the Precompiled layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddPrecompiledLayer(const PreCompiledDescriptor& preCompiledDescriptor,
+                                           CompiledBlobPtr compiledBlobPtr,
+                                           const Optional<BackendId>& backend,
+                                           const char* name = nullptr);
+
+    /// Adds an activation layer to the network.
+    /// @param activationDescriptor - ActivationDescriptor to configure the activation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddActivationLayer(const ActivationDescriptor& activationDescriptor,
+        const char* name = nullptr);
+
+    /// Adds a normalization layer to the network.
+    /// @param normalizationDescriptor - NormalizationDescriptor to configure the normalization.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddNormalizationLayer(const NormalizationDescriptor& normalizationDescriptor,
+        const char* name = nullptr);
+
+    /// Adds a slice layer to the network.
+    /// @param sliceDescriptor - SliceDescriptor to configure the slice operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddSliceLayer(const SliceDescriptor& sliceDescriptor, const char* name = nullptr);
+
+    /// Adds a softmax layer to the network.
+    /// If the data type is QAsymm8, then the output quantization parameters
+    /// must have a scale of 1/256 and an offset of 0
+    /// @param softmaxDescriptor - SoftmaxDescriptor to configure the softmax.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddSoftmaxLayer(const SoftmaxDescriptor& softmaxDescriptor,
+        const char* name = nullptr);
+
+    /// Adds a splitter layer to the network.
+    /// @param splitterDescriptor - ViewsDescriptor to configure the splitting process.
+    ///                             Number of Views must be equal to the number of outputs,
+    ///                             and their order must match - e.g. first view corresponds to
+    ///                             the first output, second view to the second output, etc....
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddSplitterLayer(const ViewsDescriptor& splitterDescriptor,
+                                        const char* name = nullptr);
+
+    /// Adds a merge layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddMergeLayer(const char* name = nullptr);
+
+    /// Adds an addition layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddAdditionLayer(const char* name = nullptr);
+
+    /// Adds a multiplication layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddMultiplicationLayer(const char* name = nullptr);
+
+    /// Adds a batch normalization layer to the network.
+    /// @param mean - Pre-calculated mean for each channel.
+    /// @param variance - Pre-calculated variance for each channel.
+    /// @param beta - Per-channel additive factor.
+    /// @param gamma - Per-channel multiplicative factor.
+    /// @return - Interface for configuring the layer.
+    /// @param name - Optional name for the layer.
+    IConnectableLayer* AddBatchNormalizationLayer(const BatchNormalizationDescriptor& desc,
+        const ConstTensor& mean,
+        const ConstTensor& variance,
+        const ConstTensor& beta,
+        const ConstTensor& gamma,
+        const char* name = nullptr);
+
+    /// Adds a rank layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddRankLayer(const char* name = nullptr);
+
+    /// Adds a resize layer to the network.
+    /// @param resizeDescriptor - Parameters for the resize operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor,
+                                      const char* name = nullptr);
+
+    /// Adds a reduce layer to the network.
+    /// @param ReduceDescriptor - Parameters for the reduce operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddReduceLayer(const ReduceDescriptor& reduceDescriptor,
+                                      const char* name = nullptr);
+
+    /// Adds an instance normalization layer to the network.
+    /// @param desc - Parameters for the instance normalization operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
+                                                     const char* name = nullptr);
+
+    /// Adds an L2 normalization layer to the network.
+    /// Normalization is performed along dimension 1, but requires a 4d input.
+    /// @param desc - Parameters for the L2 normalization operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
+                                               const char* name = nullptr);
+
+    /// Adds a log softmax layer to the network.
+    /// @param logSoftmaxDescriptor - LogSoftmaxDescriptor to configure the log softmax.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddLogSoftmaxLayer(const LogSoftmaxDescriptor& logSoftmaxDescriptor,
+                                          const char* name = nullptr);
+
+    /// Adds a layer with no inputs and a single output, which always corresponds to
+    /// the passed in constant tensor.
+    /// @param input - Tensor to be provided as the only output of the layer. The layer will maintain
+    ///                its own copy of the tensor data, meaning the memory referenced by @a input can
+    ///                be freed or reused after this function is called.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddConstantLayer(const ConstTensor& input,
+                                        const char* name = nullptr);
+
+    /// Adds a reshape layer to the network.
+    /// @param reshapeDescriptor - Parameters for the reshape operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddReshapeLayer(const ReshapeDescriptor& reshapeDescriptor,
+                                       const char* name = nullptr);
+
+    /// Adds a shape layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddShapeLayer(const char* name = nullptr);
+
+    /// Adds a space to batch layer to the network.
+    /// @param spaceToBatchNdDescriptor - Parameters for the space to batch operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddSpaceToBatchNdLayer(const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
+                                              const char* name = nullptr);
+
+    /// Adds a space to depth layer to the network.
+    /// @param spaceToDepthDescriptor - Parameters for the space to depth operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddSpaceToDepthLayer(const SpaceToDepthDescriptor& spaceToDepthDescriptor,
+                                            const char* name = nullptr);
+
+    /// Adds a floor layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddFloorLayer(const char* name = nullptr);
+
+    /// Adds an output layer to the network.
+    /// @param id - User generated id to uniquely identify a particular output. The same id needs to be specified
+    /// when passing the outputs to the IRuntime::EnqueueWorkload() function.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddOutputLayer(LayerBindingId id, const char* name = nullptr);
+
+    /// Add a Lstm layer to the network
+    /// @param descriptor - Parameters for the Lstm operation
+    /// @param params - Weights and biases for the LSTM cell
+    /// @param name - Optional name for the layer
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddLstmLayer(const LstmDescriptor& descriptor,
+                                    const LstmInputParams& params,
+                                    const char* name = nullptr);
+
+    /// Adds a division layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddDivisionLayer(const char* name = nullptr);
+
+    /// Adds a subtraction layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddSubtractionLayer(const char* name = nullptr);
+
+    /// Add a Maximum layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddMaximumLayer(const char* name = nullptr);
+
+    /// Add a Mean layer to the network.
+    /// @param meanDescriptor - Parameters for the mean operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddMeanLayer(const MeanDescriptor& meanDescriptor, const char* name = nullptr);
+
+    /// Adds a fully pad layer to the network.
+    /// @param paddings - n by 2 tensor, where n is the rank of the input tensor,
+    ///                   such that paddings[i,0] indicates the amount of padding to add in front of dimonsion i, and
+    ///                   paddings[i,1] indicates the amount of padding to add after the end of dimension i
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddPadLayer(const PadDescriptor& padDescriptor,
+                                           const char* name = nullptr);
+
+    /// Add a quantize layer to the network
+    ///@param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddQuantizeLayer(const char* name = nullptr);
+
+    /// Adds a strided slice layer to the network.
+    /// @param StridedSliceDescriptor - Parameters for the strided slice operation.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddStridedSliceLayer(const StridedSliceDescriptor& stridedSliceDescriptor,
+                                                    const char* name = nullptr);
+
+    /// Add a Minimum layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddMinimumLayer(const char* name = nullptr);
+
+    /// Add Gather layer to the network.
+    /// @param descriptor - Description of the gather layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddGatherLayer(const GatherDescriptor& descriptor,
+                                              const char* name = nullptr);
+
+    /// Adds a switch layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddSwitchLayer(const char* name = nullptr);
+
+    /// Adds a PReLU layer to the network.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddPreluLayer(const char* name = nullptr);
+
+    /// Adds a 2D transpose convolution layer to the network.
+    /// @param descriptor - Description of the 2D transpose convolution layer.
+    /// @param weights - Tensor for the weights data.
+    /// @param biases - Optional tensor for the bias data.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddTransposeConvolution2dLayer(const TransposeConvolution2dDescriptor& descriptor,
+                                                              const ConstTensor& weights,
+                                                              const Optional<ConstTensor>& biases,
+                                                              const char* name = nullptr);
+
+    /// Adds a transpose layer to the network.
+    /// @param transposeDescriptor - TransposeDescriptor to configure the transpose.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddTransposeLayer(const TransposeDescriptor& transposeDescriptor,
+                                                 const char* name = nullptr);
+
+    /// Adds a stack layer to the network.
+    /// @param descriptor - Description of the stack layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddStackLayer(const StackDescriptor& descriptor,
+                                             const char* name = nullptr);
+
+    /// Add a stand-in layer for a type unknown to the Arm NN framework.
+    /// Note: Due to the nature of this layer, no validation can be performed by the framework.
+    /// Furthermore, Any model containing this layer cannot make use of dynamic tensors since the
+    /// tensor sizes cannot be inferred.
+    /// @descriptor - Descriptor for the StandIn layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddStandInLayer(const StandInDescriptor& descriptor,
+                                               const char* name = nullptr);
+
+    /// Add a QuantizedLstm layer to the network
+    /// @param params - The weights and biases for the Quantized LSTM cell
+    /// @param name - Optional name for the layer
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddQuantizedLstmLayer(const QuantizedLstmInputParams& params,
+                                                     const char* name = nullptr);
+
+    /// Add a QLstm layer to the network
+    /// @param descriptor - Parameters for the QLstm operation
+    /// @param params - Weights and biases for the layer
+    /// @param name - Optional name for the layer
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddQLstmLayer(const QLstmDescriptor& descriptor,
+                                             const LstmInputParams& params,
+                                             const char* name = nullptr);
+
+    /// Adds a Logical Binary layer to the network.
+    /// @param descriptor - Description of the Logical Binary layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddLogicalBinaryLayer(const LogicalBinaryDescriptor& descriptor,
+                                                     const char* name = nullptr);
+
+    /// Add a UnidirectionalSequenceLstm layer to the network
+    /// @param descriptor - Parameters for the UnidirectionalSequenceLstm operation
+    /// @param params - Weights and biases for the UnidirectionalSequenceLstm
+    /// @param name - Optional name for the layer
+    /// @return - Interface for configuring the layer.
+    IConnectableLayer* AddUnidirectionalSequenceLstmLayer(const UnidirectionalSequenceLstmDescriptor& descriptor,
+                                                          const LstmInputParams& params,
+                                                          const char* name = nullptr);
+
+    /// Add a ChannelShuffle layer to the network
+    /// @param descriptor - Parameters for the ChannelShuffle operation
+    /// @param name - Optional name for the layer
+    /// @return - Interface for configuring the layer
+    IConnectableLayer* AddChannelShuffleLayer(const ChannelShuffleDescriptor& descriptor,
+                                              const char* name = nullptr);
+
+    // The Accept function needs to be wrapped in a no warn macro to avoid deprecation warnings from
+    // the deprecated ILayerVisitor which is used in the function.
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
+    /// Apply a visitor to this layer
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Accept is deprecated. The ILayerVisitor that works in conjunction with this "
+                                      "Accept function is deprecated. Use IStrategy in combination with "
+                                      "ExecuteStrategy instead, which is an ABI/API stable version of the "
+                                      "visitor pattern.",
+                                      "22.05")
+    void Accept(ILayerVisitor& visitor) const;
+    ARMNN_NO_DEPRECATE_WARN_END
+
+    void ExecuteStrategy(IStrategy& strategy) const;
+
+protected:
+    ~INetwork();
+
+    friend void VisitLayersTopologically(const INetwork* inputNetwork, IStrategy& strategy);
+    friend class TestConnectionPreservation;
+    friend TensorInfo GetInputTensorInfo(const INetwork* network);
+    friend IOptimizedNetworkPtr Optimize(const INetwork& network,
+                                         const std::vector<BackendId>& backendPreferences,
+                                         const IDeviceSpec& deviceSpec,
+                                         const OptimizerOptions& options,
+                                         Optional<std::vector<std::string>&> messages);
+
+    INetwork(NetworkOptions networkOptions = {});
+
+    std::unique_ptr<NetworkImpl> pNetworkImpl;
+};
+
+namespace experimental
+{
+class AsyncNetworkImpl;
+class WorkingMemHandle;
+}
+
+struct BackendSettings;
+struct OptimizationResult;
+class OptimizedNetworkImpl;
+class IProfiler;
+class IOptimizedNetwork
+{
+public:
+    static void Destroy(IOptimizedNetwork* network);
+
+    Status PrintGraph();
+    Status SerializeToDot(std::ostream& stream) const;
+
+    profiling::ProfilingGuid GetGuid() const;
+
+    size_t GetNumInputs() const;
+    size_t GetNumOutputs() const;
+
+    // Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized,
+    // the provided ModelOptions will only be used when creating a LoadedNetwork. 
+    IOptimizedNetwork(const IOptimizedNetwork& other, const ModelOptions& modelOptions);
+    IOptimizedNetwork(std::unique_ptr<Graph> graph);
+    IOptimizedNetwork(std::unique_ptr<OptimizedNetworkImpl> impl);
+    ~IOptimizedNetwork();
+
+    const std::shared_ptr<IProfiler>& GetProfiler() const;
+
+protected:
+    friend class LoadedNetwork;
+
+    friend class experimental::AsyncNetworkImpl;
+    friend class experimental::WorkingMemHandle;
+
+    friend Graph& GetGraphForTesting(IOptimizedNetwork* optNetPtr);
+    friend ModelOptions& GetModelOptionsForTesting(IOptimizedNetwork* optNetPtr);
+    friend IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
+                                         const std::vector<BackendId>& backendPreferences,
+                                         const IDeviceSpec& deviceSpec,
+                                         const OptimizerOptions& options,
+                                         Optional<std::vector<std::string>&> messages);
+
+    IOptimizedNetwork(std::unique_ptr<Graph> graph, const ModelOptions& modelOptions);
+
+    std::unique_ptr<OptimizedNetworkImpl> pOptimizedNetworkImpl;
+};
+
+/// Create an optimized version of the network
+/// @param network INetwork description of the network to be optimized.
+/// @param backendPreferences The choice of the backend ordered by user preferences.
+/// @param deviceSpec DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
+/// @param messages If there are failures or warnings a string describing same will be added to the vector
+/// @param options OptimizerOptions object with optimizer configuration options
+/// @return An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from
+/// armnn::Exception if process fails.
+
+IOptimizedNetworkPtr Optimize(const INetwork& network,
+                              const std::vector<BackendId>& backendPreferences,
+                              const IDeviceSpec& deviceSpec,
+                              const OptimizerOptions& options = OptimizerOptions(),
+                              Optional<std::vector<std::string>&> messages = EmptyOptional());
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/IProfiler.hpp b/arch/arm/ARMnn/include/armnn/IProfiler.hpp
new file mode 100644
index 0000000000..1b450d0151
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/IProfiler.hpp
@@ -0,0 +1,78 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <vector>
+
+namespace armnn
+{
+
+class ProfilerImpl;
+class BackendId;
+class Instrument;
+class Event;
+struct WorkloadInfo;
+
+class IProfiler
+{
+public:
+    /// Enables/disables profiling for this profiler.
+    /// @param [in] enableProfiling A flag that indicates whether profiling should be enabled or not.
+    void EnableProfiling(bool enableProfiling);
+
+    /// Checks whether profiling is enabled.
+    /// Profiling is disabled by default.
+    /// @return true if profiling is enabled, false otherwise.
+    bool IsProfilingEnabled();
+
+    /// Analyzes the tracked events and writes the results to the given output stream.
+    /// Please refer to the configuration variables in Profiling.cpp to customize the information written.
+    /// @param [out] outStream The stream where to write the profiling results to.
+    void AnalyzeEventsAndWriteResults(std::ostream& outStream) const;
+
+    /// Print stats for events in JSON Format to the given output stream.
+    /// @param [out] outStream The stream where to write the profiling results to.
+    void Print(std::ostream& outStream) const;
+
+    /// Print out details of each layer within the network that possesses a descriptor.
+    /// Also outputs tensor info. This will be part of the profiling json output
+    void EnableNetworkDetailsToStdOut(ProfilingDetailsMethod detailsMethod);
+
+    ~IProfiler();
+    IProfiler();
+
+private:
+
+    using InstrumentPtr = std::unique_ptr<Instrument>;
+
+    template<typename DescriptorType>
+    void AddLayerDetails(const std::string& name,
+                         const DescriptorType& desc,
+                         const WorkloadInfo& infos,
+                         const profiling::ProfilingGuid guid);
+
+    Event* BeginEvent(const BackendId& backendId,
+                      const std::string& label,
+                      std::vector<InstrumentPtr>&& instruments,
+                      const Optional<profiling::ProfilingGuid>& guid);
+
+    std::unique_ptr<ProfilerImpl> pProfilerImpl;
+
+    friend class ScopedProfilingEvent;
+
+    template<typename DescriptorType>
+    friend inline void ProfilingUpdateDescriptions(const std::string& name,
+                                                   const DescriptorType& desc,
+                                                   const WorkloadInfo& infos,
+                                                   const profiling::ProfilingGuid guid);
+
+    // Friend functions for unit testing, see ProfilerTests.cpp.
+    friend size_t GetProfilerEventSequenceSize(armnn::IProfiler* profiler);
+};
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/IRuntime.hpp b/arch/arm/ARMnn/include/armnn/IRuntime.hpp
new file mode 100644
index 0000000000..042271fc2b
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/IRuntime.hpp
@@ -0,0 +1,341 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "BackendOptions.hpp"
+#include "INetwork.hpp"
+#include "IProfiler.hpp"
+#include "IWorkingMemHandle.hpp"
+#include "IAsyncExecutionCallback.hpp"
+#include "Tensor.hpp"
+#include "Types.hpp"
+#include "TypesUtils.hpp"
+#include "profiling/ILocalPacketHandler.hpp"
+
+#include <armnn/backends/ICustomAllocator.hpp>
+#include <armnn/backends/IMemoryOptimizerStrategy.hpp>
+#include <memory>
+#include <map>
+
+namespace armnn
+{
+
+using NetworkId = int;
+
+class IGpuAccTunedParameters;
+
+struct RuntimeImpl;
+class IRuntime;
+using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
+
+struct INetworkProperties
+{   
+    INetworkProperties(bool asyncEnabled,
+                       MemorySource inputSource,
+                       MemorySource outputSource,
+                       bool profilingEnabled = false,
+                       ProfilingDetailsMethod detailsMethod = ProfilingDetailsMethod::Undefined,
+                       bool externalMemoryManagementEnabled = false)
+        : m_ImportEnabled(inputSource != MemorySource::Undefined),
+          m_ExportEnabled(outputSource != MemorySource::Undefined),
+          m_AsyncEnabled(asyncEnabled),
+          m_ProfilingEnabled(profilingEnabled),
+          m_OutputNetworkDetailsMethod(detailsMethod),
+          m_InputSource(inputSource),
+          m_OutputSource(outputSource),
+          m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled)
+    {}
+
+    /// Deprecated and will be removed in future release.
+    const bool m_ImportEnabled;
+    /// Deprecated and will be removed in future release.
+    const bool m_ExportEnabled;
+
+    const bool m_AsyncEnabled;
+
+    const bool m_ProfilingEnabled;
+
+    const ProfilingDetailsMethod m_OutputNetworkDetailsMethod;
+
+    const MemorySource m_InputSource;
+    const MemorySource m_OutputSource;
+
+    const bool m_ExternalMemoryManagementEnabled;
+
+    virtual ~INetworkProperties() {}
+};
+
+using namespace armnn::experimental;
+
+class IRuntime
+{
+public:
+    struct CreationOptions
+    {
+        CreationOptions()
+            : m_GpuAccTunedParameters(nullptr)
+            , m_EnableGpuProfiling(false)
+            , m_DynamicBackendsPath("")
+            , m_ProtectedMode(false)
+            , m_CustomAllocatorMap()
+            , m_MemoryOptimizerStrategyMap()
+        {}
+
+        /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
+        /// It will also be updated with new tuned parameters if it is configured to do so.
+        std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;
+
+        /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.
+        bool m_EnableGpuProfiling;
+
+        /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive
+        /// Only a single path is allowed for the override
+        /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md).
+        std::string m_DynamicBackendsPath;
+
+        /// Setting this flag will allow the user to create the Runtime in protected mode.
+        /// It will run all the inferences on protected memory and will make sure that
+        /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option
+        /// This requires that the backend supports Protected Memory and has an allocator capable of
+        /// allocating Protected Memory associated with it.
+        bool m_ProtectedMode;
+
+        /// @brief A map to define a custom memory allocator for specific backend Ids.
+        ///
+        /// @details  A Custom Allocator is used for allocation of working memory in the backends.
+        /// Set this if you need to take control of how memory is allocated on a backend. Required for
+        /// Protected Mode in order to correctly allocate Protected Memory
+        ///
+        /// @note Only supported for GpuAcc
+        std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;
+
+        /// @brief A map to define a custom memory optimizer strategy for specific backend Ids.
+        ///
+        /// @details  A Memory Optimizer Strategy provides a solution to an abstract representation of
+        /// a network's memory requirements. This can also be used to return a pre-computed solution
+        /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy
+        /// for a given backend.
+        std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;
+
+        struct ExternalProfilingOptions
+        {
+            ExternalProfilingOptions()
+                : m_EnableProfiling(false)
+                , m_TimelineEnabled(false)
+                , m_OutgoingCaptureFile("")
+                , m_IncomingCaptureFile("")
+                , m_FileOnly(false)
+                , m_CapturePeriod(LOWEST_CAPTURE_PERIOD)
+                , m_FileFormat("binary")
+                , m_LocalPacketHandlers()
+            {}
+
+            /// Indicates whether external profiling is enabled or not.
+            bool        m_EnableProfiling;
+            /// Indicates whether external timeline profiling is enabled or not.
+            bool        m_TimelineEnabled;
+            /// Path to a file in which outgoing timeline profiling messages will be stored.
+            std::string m_OutgoingCaptureFile;
+            /// Path to a file in which incoming timeline profiling messages will be stored.
+            std::string m_IncomingCaptureFile;
+            /// Enable profiling output to file only.
+            bool        m_FileOnly;
+            /// The duration at which captured profiling messages will be flushed.
+            uint32_t    m_CapturePeriod;
+            /// The format of the file used for outputting profiling data.
+            std::string m_FileFormat;
+            std::vector<armnn::profiling::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;
+        };
+        ExternalProfilingOptions m_ProfilingOptions;
+
+        /// Pass backend specific options.
+        ///
+        /// For example, to enable GpuAcc tuning add the following
+        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
+        /// m_BackendOption.emplace_back(
+        ///     BackendOptions{"GpuAcc",
+        ///       {
+        ///         {"TuningLevel", 2},
+        ///         {"TuningFile", filename}
+        ///         {"MemoryOptimizerStrategy", strategyname}
+        ///       }
+        ///     });
+        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        /// Execute representative workloads through the runtime to generate tuning data.
+        /// The tuning file is written once the runtime is destroyed
+
+        /// To execute with the tuning data, start up with just the tuning file specified.
+        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
+        /// m_BackendOption.emplace_back(
+        ///     BackendOptions{"GpuAcc",
+        ///       {
+        ///         {"TuningFile", filename}
+        ///       }
+        ///     });
+        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+        /// The following backend options are available:
+        /// AllBackends:
+        ///   "MemoryOptimizerStrategy" : string [stategynameString]
+        ///    (Existing Memory Optimizer Strategies: ConstantMemoryStrategy)
+        /// GpuAcc:
+        ///   "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)
+        ///   "TuningFile" : string [filenameString]
+        ///   "KernelProfilingEnabled" : bool [true | false]
+        std::vector<BackendOptions> m_BackendOptions;
+    };
+
+    static IRuntime* CreateRaw(const CreationOptions& options);
+    static IRuntimePtr Create(const CreationOptions& options);
+    static void Destroy(IRuntime* runtime);
+
+    /// Loads a complete network into the IRuntime.
+    /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
+    /// @param [in] network - Complete network to load into the IRuntime.
+    /// The runtime takes ownership of the network once passed in.
+    /// @return armnn::Status
+    Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);
+
+    /// Load a complete network into the IRuntime.
+    /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
+    /// @param [in] network Complete network to load into the IRuntime.
+    /// @param [out] errorMessage Error message if there were any errors.
+    /// The runtime takes ownership of the network once passed in.
+    /// @return armnn::Status
+    Status LoadNetwork(NetworkId& networkIdOut,
+                       IOptimizedNetworkPtr network,
+                       std::string& errorMessage);
+
+    Status LoadNetwork(NetworkId& networkIdOut,
+                       IOptimizedNetworkPtr network,
+                       std::string& errorMessage,
+                       const INetworkProperties& networkProperties);
+
+    TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
+    TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
+
+    /// ImportInputs separates the importing and mapping of InputTensors from network execution.
+    /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times.
+    /// This function is not thread safe and must not be used while other threads are calling Execute().
+    /// Only compatible with AsyncEnabled networks and aligned memory import
+    std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors,
+                                              MemorySource forceImportMemorySource = MemorySource::Undefined);
+
+    /// ImportOutputs separates the importing and mapping of OutputTensors from network execution.
+    /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times.
+    /// This function is not thread safe and must not be used while other threads are calling Execute().
+    /// Only compatible with AsyncEnabled networks and aligned memory import
+    std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,
+                                                MemorySource forceImportMemorySource = MemorySource::Undefined);
+
+    /// Un-import and delete the imported InputTensor/s
+    /// This function is not thread safe and must not be used while other threads are calling Execute().
+    /// Only compatible with AsyncEnabled networks
+    void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds);
+
+    /// Un-import and delete the imported OutputTensor/s
+    /// This function is not thread safe and must not be used while other threads are calling Execute().
+    /// Only compatible with AsyncEnabled networks
+    void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds);
+
+    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
+    Status EnqueueWorkload(NetworkId networkId,
+                           const InputTensors& inputTensors,
+                           const OutputTensors& outputTensors,
+                           std::vector<ImportedInputId> preImportedInputIds = {},
+                           std::vector<ImportedOutputId> preImportedOutputIds = {});
+
+    /// This is an experimental function.
+    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.
+    /// This function performs a thread safe execution of the network. Returns once execution is complete.
+    /// Will block until this and any other thread using the same workingMem object completes.
+    Status Execute(IWorkingMemHandle& workingMemHandle,
+                   const InputTensors& inputTensors,
+                   const OutputTensors& outputTensors,
+                   std::vector<ImportedInputId> preImportedInputs = {},
+                   std::vector<ImportedOutputId> preImportedOutputs = {});
+
+    /// Unloads a network from the IRuntime.
+    /// At the moment this only removes the network from the m_Impl->m_Network.
+    /// This might need more work in the future to be AndroidNN compliant.
+    /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
+    /// @return armnn::Status
+    Status UnloadNetwork(NetworkId networkId);
+
+    const IDeviceSpec& GetDeviceSpec() const;
+
+    /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
+    /// overlapped Execution by calling this function from different threads.
+    std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
+
+    /// Gets the profiler corresponding to the given network id.
+    /// @param networkId The id of the network for which to get the profile.
+    /// @return A pointer to the requested profiler, or nullptr if not found.
+    const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;
+
+    /// Registers a callback function to debug layers performing custom computations on intermediate tensors.
+    /// @param networkId The id of the network to register the callback.
+    /// @param func callback function to pass to the debug layer.
+    void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);
+
+protected:
+    IRuntime();
+    IRuntime(const IRuntime::CreationOptions& options);
+    ~IRuntime();
+
+    std::unique_ptr<RuntimeImpl> pRuntimeImpl;
+};
+
+
+/// The following API is replaced by the backend options API.
+using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;
+
+/// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
+/// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
+/// for all GPU workload execution.
+///
+/// Can be created in two modes:
+///     - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
+///     - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
+///       optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
+///
+/// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
+/// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
+class IGpuAccTunedParameters
+{
+public:
+    enum class Mode
+    {
+        UseTunedParameters,
+        UpdateTunedParameters
+    };
+
+    enum class TuningLevel
+    {
+        Rapid = 1,
+        Normal = 2,
+        Exhaustive = 3
+    };
+
+    /// Creates an IClTunedParameters with the given mode.
+    /// @{
+    static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);
+    static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);
+    /// @}
+    static void Destroy(IGpuAccTunedParameters* params);
+
+    /// Loads an existing set of tuned parameters from the given file.
+    /// If there is an error loading the file, an armnn::Exception is thrown.
+    virtual void Load(const char* filename) = 0;
+
+    /// Saves the current set of tuned parameters to the given file.
+    /// If there is an error saving to the file, an armnn::Exception is thrown.
+    virtual void Save(const char* filename) const = 0;
+
+protected:
+    virtual ~IGpuAccTunedParameters() {};
+};
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/IStrategy.hpp b/arch/arm/ARMnn/include/armnn/IStrategy.hpp
new file mode 100644
index 0000000000..8d29565dcc
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/IStrategy.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/DescriptorsFwd.hpp>
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+class IStrategy
+{
+protected:
+IStrategy() {}
+virtual ~IStrategy() {}
+
+public:
+virtual void ExecuteStrategy(const armnn::IConnectableLayer* layer,
+                             const armnn::BaseDescriptor& descriptor,
+                             const std::vector<armnn::ConstTensor>& constants,
+                             const char* name,
+                             const armnn::LayerBindingId id = 0) = 0;
+
+virtual void FinishStrategy() {};
+
+};
+
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/IWorkingMemHandle.hpp b/arch/arm/ARMnn/include/armnn/IWorkingMemHandle.hpp
new file mode 100644
index 0000000000..bbc4913c59
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/IWorkingMemHandle.hpp
@@ -0,0 +1,46 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace armnn
+{
+
+using NetworkId = int;
+
+namespace experimental
+{
+
+struct WorkingMemDescriptor;
+
+class IWorkingMemHandle
+{
+public:
+    virtual ~IWorkingMemHandle() {};
+
+    /// Returns the NetworkId of the Network that this IWorkingMemHandle works with.
+    virtual NetworkId GetNetworkId() = 0;
+
+    /// Allocate the backing memory required for execution. If this is not called, then allocation will be
+    /// deferred to execution time.
+    virtual void Allocate() = 0;
+
+    /// Free the backing memory required for execution.
+    virtual void Free() = 0;
+
+    /// IsAllocated returns true if the backing memory is currently allocated.
+    virtual bool IsAllocated() = 0;
+
+    /// Get the WorkingMemDescriptor for a Layer.
+    virtual WorkingMemDescriptor& GetWorkingMemDescriptor(LayerGuid id) = 0;
+
+    /// Get the WorkingMemDescriptor at an index. The WorkingMemDescriptors are stored in the same order as
+    /// the Workloads in a topologically sorted graph.
+    virtual WorkingMemDescriptor& GetWorkingMemDescriptorAt(unsigned int id) = 0;
+};
+
+} // end experimental namespace
+
+} // end armnn namespace
diff --git a/arch/arm/ARMnn/include/armnn/LayerSupport.hpp b/arch/arm/ARMnn/include/armnn/LayerSupport.hpp
new file mode 100644
index 0000000000..03b706fafd
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/LayerSupport.hpp
@@ -0,0 +1,402 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Deprecated.hpp>
+#include <armnn/DescriptorsFwd.hpp>
+#include <armnn/Optional.hpp>
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
+#include "LstmParams.hpp"
+#include "QuantizedLstmParams.hpp"
+
+namespace armnn
+{
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsActivationSupported(const BackendId& backend,
+                           const TensorInfo& input,
+                           const TensorInfo& output,
+                           const ActivationDescriptor& descriptor,
+                           char* reasonIfUnsupported = nullptr,
+                           size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsAdditionSupported(const BackendId& backend,
+                         const TensorInfo& input0,
+                         const TensorInfo& input1,
+                         const TensorInfo& output,
+                         char* reasonIfUnsupported = nullptr,
+                         size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsBatchNormalizationSupported(const BackendId& backend,
+                                   const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const TensorInfo& mean,
+                                   const TensorInfo& var,
+                                   const TensorInfo& beta,
+                                   const TensorInfo& gamma,
+                                   const BatchNormalizationDescriptor& descriptor,
+                                   char* reasonIfUnsupported = nullptr,
+                                   size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsBatchToSpaceNdSupported(const BackendId& backend,
+                               const TensorInfo& input,
+                               const TensorInfo& output,
+                               const BatchToSpaceNdDescriptor& descriptor,
+                               char* reasonIfUnsupported = nullptr,
+                               size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsConcatSupported(const BackendId& backend,
+                       const std::vector<const TensorInfo*> inputs,
+                       const TensorInfo& output,
+                       const OriginsDescriptor& descriptor,
+                       char* reasonIfUnsupported = nullptr,
+                       size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsConstantSupported(const BackendId& backend,
+                         const TensorInfo& output,
+                         char* reasonIfUnsupported = nullptr,
+                         size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsConvertFp16ToFp32Supported(const BackendId& backend,
+                                  const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  char* reasonIfUnsupported = nullptr,
+                                  size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsConvertFp32ToFp16Supported(const BackendId& backend,
+                                  const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  char* reasonIfUnsupported = nullptr,
+                                  size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsConvolution2dSupported(const BackendId& backend,
+                              const TensorInfo& input,
+                              const TensorInfo& output,
+                              const Convolution2dDescriptor& descriptor,
+                              const TensorInfo& weights,
+                              const Optional<TensorInfo>& biases,
+                              char* reasonIfUnsupported = nullptr,
+                              size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsDebugSupported(const BackendId& backend,
+                      const TensorInfo& input,
+                      const TensorInfo& output,
+                      char* reasonIfUnsupported = nullptr,
+                      size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsDepthwiseConvolutionSupported(const BackendId& backend,
+                                     const TensorInfo& input,
+                                     const TensorInfo& output,
+                                     const DepthwiseConvolution2dDescriptor& descriptor,
+                                     const TensorInfo& weights,
+                                     const Optional<TensorInfo>& biases,
+                                     char* reasonIfUnsupported = nullptr,
+                                     size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsDequantizeSupported(const BackendId& backend,
+                           const TensorInfo& input,
+                           const TensorInfo& output,
+                           char* reasonIfUnsupported = nullptr,
+                           size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsDivisionSupported(const BackendId& backend,
+                         const TensorInfo& input0,
+                         const TensorInfo& input1,
+                         const TensorInfo& output,
+                         char* reasonIfUnsupported = nullptr,
+                         size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsEqualSupported(const BackendId& backend,
+                      const TensorInfo& input0,
+                      const TensorInfo& input1,
+                      const TensorInfo& output,
+                      char* reasonIfUnsupported = nullptr,
+                      size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsFakeQuantizationSupported(const BackendId& backend,
+                                 const TensorInfo& input,
+                                 const FakeQuantizationDescriptor& descriptor,
+                                 char* reasonIfUnsupported = nullptr,
+                                 size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsFloorSupported(const BackendId& backend,
+                      const TensorInfo& input,
+                      const TensorInfo& output,
+                      char* reasonIfUnsupported = nullptr,
+                      size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsFullyConnectedSupported(const BackendId& backend,
+                               const TensorInfo& input,
+                               const TensorInfo& output,
+                               const TensorInfo& weights,
+                               const TensorInfo& biases,
+                               const FullyConnectedDescriptor& descriptor,
+                               char* reasonIfUnsupported = nullptr,
+                               size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsGreaterSupported(const BackendId& backend,
+                        const TensorInfo& input0,
+                        const TensorInfo& input1,
+                        const TensorInfo& output,
+                        char* reasonIfUnsupported = nullptr,
+                        size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsInputSupported(const BackendId& backend,
+                      const TensorInfo& input,
+                      char* reasonIfUnsupported = nullptr,
+                      size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsL2NormalizationSupported(const BackendId& backend,
+                                const TensorInfo& input,
+                                const TensorInfo& output,
+                                const L2NormalizationDescriptor& descriptor,
+                                char* reasonIfUnsupported = nullptr,
+                                size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsLstmSupported(const BackendId& backend, const TensorInfo& input, const TensorInfo& outputStateIn,
+                     const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer,
+                     const TensorInfo& outputStateOut, const TensorInfo& cellStateOut,
+                     const TensorInfo& output, const LstmDescriptor& descriptor,
+                     const LstmInputParamsInfo& paramsInfo, char* reasonIfUnsupported = nullptr,
+                     size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsMaximumSupported(const BackendId& backend,
+                        const TensorInfo& input0,
+                        const TensorInfo& input1,
+                        const TensorInfo& output,
+                        char* reasonIfUnSupported = nullptr,
+                        size_t reasonIfUnSupportedMaxLength = 0);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsMeanSupported(const BackendId& backend,
+                     const TensorInfo& input,
+                     const TensorInfo& output,
+                     const MeanDescriptor& descriptor,
+                     char* reasonIfUnsupported = nullptr,
+                     size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsMemCopySupported(const BackendId& backend,
+                        const TensorInfo& input,
+                        const TensorInfo& output,
+                        char* reasonIfUnsupported = nullptr,
+                        size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsMergeSupported(const BackendId& backend,
+                      const TensorInfo& input0,
+                      const TensorInfo& input1,
+                      const TensorInfo& output,
+                      char* reasonIfUnsupported = nullptr,
+                      size_t reasonIfUnsupportedMaxLength = 1024);
+
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsMinimumSupported(const BackendId& backend,
+                        const TensorInfo& input0,
+                        const TensorInfo& input1,
+                        const TensorInfo& output,
+                        char* reasonIfUnsupported = nullptr,
+                        size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsMultiplicationSupported(const BackendId& backend,
+                               const TensorInfo& input0,
+                               const TensorInfo& input1,
+                               const TensorInfo& output,
+                               char* reasonIfUnsupported = nullptr,
+                               size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsNormalizationSupported(const BackendId& backend,
+                              const TensorInfo& input,
+                              const TensorInfo& output,
+                              const NormalizationDescriptor& descriptor,
+                              char* reasonIfUnsupported = nullptr,
+                              size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsOutputSupported(const BackendId& backend,
+                       const TensorInfo& output,
+                       char* reasonIfUnsupported = nullptr,
+                       size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsPadSupported(const BackendId& backend,
+                     const TensorInfo& input,
+                     const TensorInfo& output,
+                     const PadDescriptor& descriptor,
+                     char* reasonIfUnsupported = nullptr,
+                     size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsPermuteSupported(const BackendId& backend,
+                        const TensorInfo& input,
+                        const TensorInfo& output,
+                        const PermuteDescriptor& descriptor,
+                        char* reasonIfUnsupported = nullptr,
+                        size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsPreCompiledSupported(const BackendId& backend,
+                            const TensorInfo& input,
+                            char* reasonIfUnsupported = nullptr,
+                            size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsPreluSupported(const BackendId& backend,
+                      const TensorInfo& input,
+                      const TensorInfo& alpha,
+                      const TensorInfo& output,
+                      char* reasonIfUnsupported = nullptr,
+                      size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsPooling2dSupported(const BackendId& backend,
+                          const TensorInfo& input,
+                          const TensorInfo& output,
+                          const Pooling2dDescriptor& descriptor,
+                          char* reasonIfUnsupported = nullptr,
+                          size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsQuantizedLstmSupported(const BackendId& backend,
+                              const TensorInfo& input,
+                              const TensorInfo& previousCellStateIn,
+                              const TensorInfo& previousOutputIn,
+                              const TensorInfo& cellStateOut,
+                              const TensorInfo& output,
+                              const QuantizedLstmInputParamsInfo& paramsInfo,
+                              char* reasonIfUnsupported = nullptr,
+                              size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsReduceSupported(const BackendId& backend,
+                       const TensorInfo& input,
+                       const TensorInfo& output,
+                       const ReduceDescriptor& descriptor,
+                       char* reasonIfUnsupported = nullptr,
+                       size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsReshapeSupported(const BackendId& backend,
+                        const TensorInfo& input,
+                        const ReshapeDescriptor& descriptor,
+                        char* reasonIfUnsupported = nullptr,
+                        size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsResizeSupported(const BackendId& backend,
+                       const TensorInfo& input,
+                       const TensorInfo& output,
+                       const ResizeDescriptor& descriptor,
+                       char* reasonIfUnsupported = nullptr,
+                       size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsRsqrtSupported(const BackendId& backend,
+                      const TensorInfo& input,
+                      const TensorInfo& output,
+                      char* reasonIfUnsupported = nullptr,
+                      size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsSoftmaxSupported(const BackendId& backend,
+                        const TensorInfo& input,
+                        const TensorInfo& output,
+                        const SoftmaxDescriptor& descriptor,
+                        char* reasonIfUnsupported = nullptr,
+                        size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsSpaceToBatchNdSupported(const BackendId& backend,
+                               const TensorInfo& input,
+                               const TensorInfo& output,
+                               const SpaceToBatchNdDescriptor& descriptor,
+                               char* reasonIfUnsupported = nullptr,
+                               size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsSpaceToDepthSupported(const BackendId& backend,
+                             const TensorInfo& input,
+                             const TensorInfo& output,
+                             const SpaceToDepthDescriptor& descriptor,
+                             char* reasonIfUnsupported = nullptr,
+                             size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsSplitterSupported(const BackendId& backend,
+                         const TensorInfo& input,
+                         const std::vector<std::reference_wrapper<TensorInfo>>& outputs,
+                         const ViewsDescriptor& descriptor,
+                         char* reasonIfUnsupported = nullptr,
+                         size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsStackSupported(const BackendId& backend,
+                      const std::vector<const TensorInfo*> inputs,
+                      const TensorInfo& output,
+                      const StackDescriptor& descriptor,
+                      char* reasonIfUnsupported = nullptr,
+                      size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsStridedSliceSupported(const BackendId& backend,
+                             const TensorInfo& input,
+                             const TensorInfo& output,
+                             const StridedSliceDescriptor& descriptor,
+                             char* reasonIfUnsupported = nullptr,
+                             size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsSubtractionSupported(const BackendId& backend,
+                            const TensorInfo& input0,
+                            const TensorInfo& input1,
+                            const TensorInfo& output,
+                            char* reasonIfUnsupported = nullptr,
+                            size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsSwitchSupported(const BackendId& backend,
+                       const TensorInfo& input0,
+                       const TensorInfo& input1,
+                       const TensorInfo& output0,
+                       const TensorInfo& output1,
+                       char* reasonIfUnsupported = nullptr,
+                       size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsTransposeConvolution2dSupported(const BackendId& backend,
+                                       const TensorInfo& input,
+                                       const TensorInfo& output,
+                                       const TransposeConvolution2dDescriptor& descriptor,
+                                       const TensorInfo& weights,
+                                       const Optional<TensorInfo>& biases,
+                                       char* reasonIfUnsupported = nullptr,
+                                       size_t reasonIfUnsupportedMaxLength = 1024);
+
+}
diff --git a/arch/arm/ARMnn/include/armnn/LayerVisitorBase.hpp b/arch/arm/ARMnn/include/armnn/LayerVisitorBase.hpp
new file mode 100644
index 0000000000..3d43725527
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/LayerVisitorBase.hpp
@@ -0,0 +1,263 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/ILayerVisitor.hpp>
+
+namespace armnn
+{
+
+struct VisitorThrowingPolicy
+{
+    static void Apply(const std::string& errorMessage = "") { throw UnimplementedException(errorMessage); }
+};
+
+struct VisitorNoThrowPolicy
+{
+    static void Apply(const std::string&) {}
+};
+
+/// Visitor base class with empty implementations.
+ARMNN_NO_DEPRECATE_WARN_BEGIN
+template<typename DefaultPolicy>
+class LayerVisitorBase : public ILayerVisitor
+{
+protected:
+    LayerVisitorBase() {}
+    virtual ~LayerVisitorBase() {}
+
+public:
+
+    void VisitActivationLayer(const IConnectableLayer*,
+                              const ActivationDescriptor&,
+                              const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitAdditionLayer(const IConnectableLayer*,
+                            const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitArgMinMaxLayer(const IConnectableLayer*,
+                             const ArgMinMaxDescriptor&,
+                             const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitBatchNormalizationLayer(const IConnectableLayer*,
+                                      const BatchNormalizationDescriptor&,
+                                      const ConstTensor&,
+                                      const ConstTensor&,
+                                      const ConstTensor&,
+                                      const ConstTensor&,
+                                      const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitBatchToSpaceNdLayer(const IConnectableLayer*,
+                                  const BatchToSpaceNdDescriptor&,
+                                  const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitComparisonLayer(const IConnectableLayer*,
+                              const ComparisonDescriptor&,
+                              const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitConcatLayer(const IConnectableLayer*,
+                          const ConcatDescriptor&,
+                          const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitConstantLayer(const IConnectableLayer*,
+                            const ConstTensor&,
+                            const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitConvolution2dLayer(const IConnectableLayer*,
+                                 const Convolution2dDescriptor&,
+                                 const ConstTensor&,
+                                 const Optional<ConstTensor>&,
+                                 const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitDepthToSpaceLayer(const IConnectableLayer*,
+                                const DepthToSpaceDescriptor&,
+                                const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitDepthwiseConvolution2dLayer(const IConnectableLayer*,
+                                          const DepthwiseConvolution2dDescriptor&,
+                                          const ConstTensor&,
+                                          const Optional<ConstTensor>&,
+                                          const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitDequantizeLayer(const IConnectableLayer*,
+                              const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitDetectionPostProcessLayer(const IConnectableLayer*,
+                                        const DetectionPostProcessDescriptor&,
+                                        const ConstTensor&,
+                                        const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitDivisionLayer(const IConnectableLayer*,
+                            const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitElementwiseUnaryLayer(const IConnectableLayer*,
+                                    const ElementwiseUnaryDescriptor&,
+                                    const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitFillLayer(const IConnectableLayer*,
+                        const FillDescriptor&,
+                        const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitFloorLayer(const IConnectableLayer*,
+                         const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitFullyConnectedLayer(const IConnectableLayer*,
+                                  const FullyConnectedDescriptor&,
+                                  const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitFullyConnectedLayer(const IConnectableLayer*,
+                                  const FullyConnectedDescriptor&,
+                                  const ConstTensor&,
+                                  const Optional<ConstTensor>&,
+                                  const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitGatherLayer(const IConnectableLayer*,
+                          const GatherDescriptor&,
+                          const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitInputLayer(const IConnectableLayer*,
+                         LayerBindingId,
+                         const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitInstanceNormalizationLayer(const IConnectableLayer*,
+                                         const InstanceNormalizationDescriptor&,
+                                         const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitL2NormalizationLayer(const IConnectableLayer*,
+                                   const L2NormalizationDescriptor&,
+                                   const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitLogSoftmaxLayer(const IConnectableLayer*,
+                              const LogSoftmaxDescriptor&,
+                              const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitLogicalBinaryLayer(const IConnectableLayer*,
+                                 const LogicalBinaryDescriptor&,
+                                 const char*) override {DefaultPolicy::Apply(__func__); }
+
+    void VisitLstmLayer(const IConnectableLayer*,
+                        const LstmDescriptor&,
+                        const LstmInputParams&,
+                        const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitMaximumLayer(const IConnectableLayer*,
+                           const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitMeanLayer(const IConnectableLayer*,
+                        const MeanDescriptor&,
+                        const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitMergeLayer(const IConnectableLayer*,
+                         const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitMinimumLayer(const IConnectableLayer*,
+                           const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitMultiplicationLayer(const IConnectableLayer*,
+                                  const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitNormalizationLayer(const IConnectableLayer*,
+                                 const NormalizationDescriptor&,
+                                 const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitOutputLayer(const IConnectableLayer*,
+                          LayerBindingId,
+                          const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitPadLayer(const IConnectableLayer*,
+                       const PadDescriptor&,
+                       const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitPermuteLayer(const IConnectableLayer*,
+                           const PermuteDescriptor&,
+                           const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitPooling2dLayer(const IConnectableLayer*,
+                             const Pooling2dDescriptor&,
+                             const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitPreluLayer(const IConnectableLayer*,
+                         const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitQuantizeLayer(const IConnectableLayer*,
+                            const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitQLstmLayer(const IConnectableLayer*,
+                         const QLstmDescriptor&,
+                         const LstmInputParams&,
+                         const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitQuantizedLstmLayer(const IConnectableLayer*,
+                                 const QuantizedLstmInputParams&,
+                                 const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitRankLayer(const IConnectableLayer*,
+                        const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitReduceLayer(const IConnectableLayer*,
+                          const ReduceDescriptor&,
+                          const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitReshapeLayer(const IConnectableLayer*,
+                           const ReshapeDescriptor&,
+                           const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitResizeLayer(const IConnectableLayer*,
+                          const ResizeDescriptor&,
+                          const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitSliceLayer(const IConnectableLayer*,
+                         const SliceDescriptor&,
+                         const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitSoftmaxLayer(const IConnectableLayer*,
+                           const SoftmaxDescriptor&,
+                           const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitSpaceToBatchNdLayer(const IConnectableLayer*,
+                                  const SpaceToBatchNdDescriptor&,
+                                  const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitSpaceToDepthLayer(const IConnectableLayer*,
+                                const SpaceToDepthDescriptor&,
+                                const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitSplitterLayer(const IConnectableLayer*,
+                            const ViewsDescriptor&,
+                            const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitStackLayer(const IConnectableLayer*,
+                         const StackDescriptor&,
+                         const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitStandInLayer(const IConnectableLayer*,
+                           const StandInDescriptor&,
+                           const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitStridedSliceLayer(const IConnectableLayer*,
+                                const StridedSliceDescriptor&,
+                                const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitSubtractionLayer(const IConnectableLayer*,
+                               const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitSwitchLayer(const IConnectableLayer*,
+                          const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitTransposeConvolution2dLayer(const IConnectableLayer*,
+                                          const TransposeConvolution2dDescriptor&,
+                                          const ConstTensor&,
+                                          const Optional<ConstTensor>&,
+                                          const char*) override { DefaultPolicy::Apply(__func__); }
+
+    void VisitTransposeLayer(const IConnectableLayer*,
+                             const TransposeDescriptor&,
+                             const char*) override { DefaultPolicy::Apply(__func__); }
+
+};
+ARMNN_NO_DEPRECATE_WARN_END
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/Logging.hpp b/arch/arm/ARMnn/include/armnn/Logging.hpp
new file mode 100644
index 0000000000..6336e96d8c
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Logging.hpp
@@ -0,0 +1,208 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Utils.hpp>
+#include <iostream>
+#include <algorithm>
+
+namespace armnn
+{
+
+inline std::string LevelToString(LogSeverity level)
+{
+    switch(level)
+    {
+        case LogSeverity::Trace:
+            return "Trace";
+        case LogSeverity::Debug:
+            return "Debug";
+        case LogSeverity::Info:
+            return "Info";
+        case LogSeverity::Warning:
+            return "Warning";
+        case LogSeverity::Error:
+            return "Error";
+        case LogSeverity::Fatal:
+            return "Fatal";
+        default:
+            return "Log";
+    }
+}
+
+inline LogSeverity StringToLogLevel(std::string level)
+{
+    // Transfer to lower case
+    std::transform(level.begin(), level.end(), level.begin(),
+                   [](unsigned char c){ return std::tolower(c); }
+    );
+
+    if (level == "trace")
+    {
+        return LogSeverity::Trace;
+    }
+    else if (level == "debug")
+    {
+        return LogSeverity::Debug;
+    }
+    else if (level == "info")
+    {
+        return LogSeverity::Info;
+    }
+    else if (level == "warning")
+    {
+        return LogSeverity::Warning;
+    }
+    else if (level == "error")
+    {
+        return LogSeverity::Error;
+    }
+    else if (level == "fatal")
+    {
+        return LogSeverity::Fatal;
+    }
+    else
+    {
+        throw armnn::Exception("Unknown severity level for logging: '" + level +
+                               "'. Valid options: trace, debug, info, warning, error, fatal");
+    }
+}
+
+class LogSink
+{
+public:
+    virtual ~LogSink(){};
+
+    virtual void Consume(const std::string&) = 0;
+private:
+
+};
+
+class StandardOutputSink : public LogSink
+{
+public:
+    void Consume(const std::string& s) override
+    {
+        std::cout << s << std::endl;
+    }
+};
+
+struct ScopedRecord
+{
+    ScopedRecord(const std::vector<std::shared_ptr<LogSink>>& sinks, LogSeverity level, bool enabled)
+    : m_LogSinks(sinks)
+    , m_Enabled(enabled)
+    {
+        if (enabled)
+        {
+            m_Os << LevelToString(level) << ": ";
+        }
+    }
+
+    ~ScopedRecord()
+    {
+        if (m_Enabled)
+        {
+            for (auto sink : m_LogSinks)
+            {
+                if (sink)
+                {
+                    sink->Consume(m_Os.str());
+                }
+            }
+        }
+    }
+
+    ScopedRecord(const ScopedRecord&) = delete;
+    ScopedRecord& operator=(const ScopedRecord&) = delete;
+    ScopedRecord& operator=(ScopedRecord&&) = delete;
+
+    ScopedRecord(ScopedRecord&& other)
+        : m_LogSinks(other.m_LogSinks)
+        , m_Os(std::move(other.m_Os))
+        , m_Enabled(other.m_Enabled)
+    {
+        // Disable the moved-from ScopedRecord, to prevent it from sending its (now empty) message to
+        // its sinks.
+        other.m_Enabled = false;
+    }
+
+    template<typename Streamable>
+    ScopedRecord& operator<<(const Streamable& s)
+    {
+        if (m_Enabled)
+        {
+            m_Os << s;
+        }
+        return (*this);
+    }
+
+private:
+    const std::vector<std::shared_ptr<LogSink>>& m_LogSinks;
+    std::ostringstream m_Os;
+    bool m_Enabled;
+};
+
+template<LogSeverity Level>
+class SimpleLogger
+{
+public:
+    SimpleLogger()
+        : m_Sinks{std::make_shared<StandardOutputSink>()}
+        , m_Enable(true)
+    {
+    }
+
+    static SimpleLogger& Get();
+
+    void Enable(bool enable = true)
+    {
+        m_Enable = enable;
+    }
+
+    ScopedRecord StartNewRecord()
+    {
+        return ScopedRecord(m_Sinks, Level, m_Enable);
+    }
+
+    void RemoveAllSinks()
+    {
+        m_Sinks.clear();
+    }
+
+    void AddSink(std::shared_ptr<LogSink> sink)
+    {
+        m_Sinks.push_back(sink);
+    }
+private:
+    std::vector<std::shared_ptr<LogSink>> m_Sinks;
+    bool m_Enable;
+};
+
+void SetLogFilter(LogSeverity level);
+
+void SetAllLoggingSinks(bool standardOut, bool debugOut, bool coloured);
+
+enum class BoostLogSeverityMapping
+{
+    trace,
+    debug,
+    info,
+    warning,
+    error,
+    fatal
+};
+
+constexpr LogSeverity ConvertLogSeverity(BoostLogSeverityMapping severity)
+{
+    return static_cast<LogSeverity>(severity);
+}
+
+
+#define ARMNN_LOG(severity) \
+    armnn::SimpleLogger<ConvertLogSeverity(armnn::BoostLogSeverityMapping::severity)>::Get().StartNewRecord()
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/LstmParams.hpp b/arch/arm/ARMnn/include/armnn/LstmParams.hpp
new file mode 100644
index 0000000000..6b0ad5c593
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/LstmParams.hpp
@@ -0,0 +1,208 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "TensorFwd.hpp"
+#include "Exceptions.hpp"
+
+namespace armnn
+{
+
+struct LstmInputParams
+{
+    LstmInputParams()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+        , m_CellToInputWeights(nullptr)
+        , m_CellToForgetWeights(nullptr)
+        , m_CellToOutputWeights(nullptr)
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+        , m_ProjectionWeights(nullptr)
+        , m_ProjectionBias(nullptr)
+        , m_InputLayerNormWeights(nullptr)
+        , m_ForgetLayerNormWeights(nullptr)
+        , m_CellLayerNormWeights(nullptr)
+        , m_OutputLayerNormWeights(nullptr)
+    {
+    }
+
+    const ConstTensor* m_InputToInputWeights;
+    const ConstTensor* m_InputToForgetWeights;
+    const ConstTensor* m_InputToCellWeights;
+    const ConstTensor* m_InputToOutputWeights;
+    const ConstTensor* m_RecurrentToInputWeights;
+    const ConstTensor* m_RecurrentToForgetWeights;
+    const ConstTensor* m_RecurrentToCellWeights;
+    const ConstTensor* m_RecurrentToOutputWeights;
+    const ConstTensor* m_CellToInputWeights;
+    const ConstTensor* m_CellToForgetWeights;
+    const ConstTensor* m_CellToOutputWeights;
+    const ConstTensor* m_InputGateBias;
+    const ConstTensor* m_ForgetGateBias;
+    const ConstTensor* m_CellBias;
+    const ConstTensor* m_OutputGateBias;
+    const ConstTensor* m_ProjectionWeights;
+    const ConstTensor* m_ProjectionBias;
+    const ConstTensor* m_InputLayerNormWeights;
+    const ConstTensor* m_ForgetLayerNormWeights;
+    const ConstTensor* m_CellLayerNormWeights;
+    const ConstTensor* m_OutputLayerNormWeights;
+};
+
+struct LstmInputParamsInfo
+{
+    LstmInputParamsInfo()
+            : m_InputToInputWeights(nullptr)
+            , m_InputToForgetWeights(nullptr)
+            , m_InputToCellWeights(nullptr)
+            , m_InputToOutputWeights(nullptr)
+            , m_RecurrentToInputWeights(nullptr)
+            , m_RecurrentToForgetWeights(nullptr)
+            , m_RecurrentToCellWeights(nullptr)
+            , m_RecurrentToOutputWeights(nullptr)
+            , m_CellToInputWeights(nullptr)
+            , m_CellToForgetWeights(nullptr)
+            , m_CellToOutputWeights(nullptr)
+            , m_InputGateBias(nullptr)
+            , m_ForgetGateBias(nullptr)
+            , m_CellBias(nullptr)
+            , m_OutputGateBias(nullptr)
+            , m_ProjectionWeights(nullptr)
+            , m_ProjectionBias(nullptr)
+            , m_InputLayerNormWeights(nullptr)
+            , m_ForgetLayerNormWeights(nullptr)
+            , m_CellLayerNormWeights(nullptr)
+            , m_OutputLayerNormWeights(nullptr)
+    {
+    }
+    const TensorInfo* m_InputToInputWeights;
+    const TensorInfo* m_InputToForgetWeights;
+    const TensorInfo* m_InputToCellWeights;
+    const TensorInfo* m_InputToOutputWeights;
+    const TensorInfo* m_RecurrentToInputWeights;
+    const TensorInfo* m_RecurrentToForgetWeights;
+    const TensorInfo* m_RecurrentToCellWeights;
+    const TensorInfo* m_RecurrentToOutputWeights;
+    const TensorInfo* m_CellToInputWeights;
+    const TensorInfo* m_CellToForgetWeights;
+    const TensorInfo* m_CellToOutputWeights;
+    const TensorInfo* m_InputGateBias;
+    const TensorInfo* m_ForgetGateBias;
+    const TensorInfo* m_CellBias;
+    const TensorInfo* m_OutputGateBias;
+    const TensorInfo* m_ProjectionWeights;
+    const TensorInfo* m_ProjectionBias;
+    const TensorInfo* m_InputLayerNormWeights;
+    const TensorInfo* m_ForgetLayerNormWeights;
+    const TensorInfo* m_CellLayerNormWeights;
+    const TensorInfo* m_OutputLayerNormWeights;
+
+    const TensorInfo& Deref(const TensorInfo* tensorInfo) const
+    {
+        if (tensorInfo != nullptr)
+        {
+            const TensorInfo &temp = *tensorInfo;
+            return temp;
+        }
+        throw InvalidArgumentException("Can't dereference a null pointer");
+    }
+
+    const TensorInfo& GetInputToInputWeights() const
+    {
+        return Deref(m_InputToInputWeights);
+    }
+    const TensorInfo& GetInputToForgetWeights() const
+    {
+        return Deref(m_InputToForgetWeights);
+    }
+    const TensorInfo& GetInputToCellWeights() const
+    {
+        return Deref(m_InputToCellWeights);
+    }
+    const TensorInfo& GetInputToOutputWeights() const
+    {
+        return Deref(m_InputToOutputWeights);
+    }
+    const TensorInfo& GetRecurrentToInputWeights() const
+    {
+        return Deref(m_RecurrentToInputWeights);
+    }
+    const TensorInfo& GetRecurrentToForgetWeights() const
+    {
+        return Deref(m_RecurrentToForgetWeights);
+    }
+    const TensorInfo& GetRecurrentToCellWeights() const
+    {
+        return Deref(m_RecurrentToCellWeights);
+    }
+    const TensorInfo& GetRecurrentToOutputWeights() const
+    {
+        return Deref(m_RecurrentToOutputWeights);
+    }
+    const TensorInfo& GetCellToInputWeights() const
+    {
+        return Deref(m_CellToInputWeights);
+    }
+    const TensorInfo& GetCellToForgetWeights() const
+    {
+        return Deref(m_CellToForgetWeights);
+    }
+    const TensorInfo& GetCellToOutputWeights() const
+    {
+        return Deref(m_CellToOutputWeights);
+    }
+    const TensorInfo& GetInputGateBias() const
+    {
+        return Deref(m_InputGateBias);
+    }
+    const TensorInfo& GetForgetGateBias() const
+    {
+        return Deref(m_ForgetGateBias);
+    }
+    const TensorInfo& GetCellBias() const
+    {
+        return Deref(m_CellBias);
+    }
+    const TensorInfo& GetOutputGateBias() const
+    {
+        return Deref(m_OutputGateBias);
+    }
+    const TensorInfo& GetProjectionWeights() const
+    {
+        return Deref(m_ProjectionWeights);
+    }
+    const TensorInfo& GetProjectionBias() const
+    {
+        return Deref(m_ProjectionBias);
+    }
+    const TensorInfo& GetInputLayerNormWeights() const
+    {
+        return Deref(m_InputLayerNormWeights);
+    }
+    const TensorInfo& GetForgetLayerNormWeights() const
+    {
+        return Deref(m_ForgetLayerNormWeights);
+    }
+    const TensorInfo& GetCellLayerNormWeights() const
+    {
+        return Deref(m_CellLayerNormWeights);
+    }
+    const TensorInfo& GetOutputLayerNormWeights() const
+    {
+        return Deref(m_OutputLayerNormWeights);
+    }
+};
+
+} // namespace armnn
+
diff --git a/arch/arm/ARMnn/include/armnn/MemorySources.hpp b/arch/arm/ARMnn/include/armnn/MemorySources.hpp
new file mode 100644
index 0000000000..53746a3bc3
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/MemorySources.hpp
@@ -0,0 +1,46 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+
+#include <type_traits>
+
+namespace armnn
+{
+
+using MemorySourceFlags = unsigned int;
+
+template<typename T>
+struct IsMemorySource
+{
+    static const bool value = false;
+};
+
+template<>
+struct IsMemorySource<MemorySource>
+{
+    static const bool value = true;
+};
+
+template <typename Arg, typename std::enable_if<IsMemorySource<Arg>::value>::type* = nullptr>
+MemorySourceFlags Combine(Arg sourceA, Arg sourceB)
+{
+    return static_cast<MemorySourceFlags>(sourceA) | static_cast<MemorySourceFlags>(sourceB);
+}
+
+template <typename Arg, typename ... Args, typename std::enable_if<IsMemorySource<Arg>::value>::type* = nullptr>
+MemorySourceFlags Combine(Arg source, Args... rest)
+{
+    return static_cast<MemorySourceFlags>(source) | Combine(rest...);
+}
+
+inline bool CheckFlag(MemorySourceFlags flags, MemorySource source)
+{
+    return (static_cast<MemorySourceFlags>(source) & flags) != 0;
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/NetworkFwd.hpp b/arch/arm/ARMnn/include/armnn/NetworkFwd.hpp
new file mode 100644
index 0000000000..5db9ec4ebe
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/NetworkFwd.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+namespace armnn
+{
+
+struct LstmInputParams;
+struct QuantizedLstmInputParams;
+
+class INetwork;
+class IOptimizedNetwork;
+class Graph;
+class IInputSlot;
+class IOutputSlot;
+class IConnectableLayer;
+class IDataLayer;
+
+} // end armnn namespace
diff --git a/arch/arm/ARMnn/include/armnn/Optional.hpp b/arch/arm/ARMnn/include/armnn/Optional.hpp
new file mode 100644
index 0000000000..1b61e20425
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Optional.hpp
@@ -0,0 +1,310 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "Exceptions.hpp"
+
+#include <cstring>
+#include <type_traits>
+
+/// Optional is a drop in replacement for std::optional until we migrate
+/// to c++-17. Only a subset of the optional features are implemented that
+/// we intend to use in ArmNN.
+
+/// There are two distinct implementations here:
+///
+///   1, for normal constructable/destructable types and reference types
+///   2, for reference types
+
+/// The std::optional features we support are:
+///
+/// - has_value() and operator bool() to tell if the optional has a value
+/// - value() returns a reference to the held object
+///
+
+namespace armnn
+{
+
+/// EmptyOptional is used to initialize the Optional class in case we want
+/// to have default value for an Optional in a function declaration.
+struct EmptyOptional {};
+
+/// Disambiguation tag that can be passed to the constructor to indicate that
+/// the contained object should be constructed in-place
+struct ConstructInPlace
+{
+    explicit ConstructInPlace() = default;
+};
+
+#define CONSTRUCT_IN_PLACE armnn::ConstructInPlace{}
+
+/// OptionalBase is the common functionality between reference and non-reference
+/// optional types.
+class OptionalBase
+{
+public:
+    OptionalBase() noexcept
+        : m_HasValue{false}
+    {
+    }
+
+    bool has_value() const noexcept
+    {
+        return m_HasValue;
+    }
+
+    /// Conversion to bool, so can be used in if-statements and similar contexts expecting a bool.
+    /// Note this is explicit so that it doesn't get implicitly converted to a bool in unwanted cases,
+    /// for example "Optional<TypeA> == Optional<TypeB>" should not compile.
+    explicit operator bool() const noexcept
+    {
+        return has_value();
+    }
+
+protected:
+    OptionalBase(bool hasValue) noexcept
+        : m_HasValue{hasValue}
+    {
+    }
+
+    bool m_HasValue;
+};
+
+///
+/// The default implementation is the non-reference case. This
+/// has an unsigned char array for storing the optional value which
+/// is in-place constructed there.
+///
+template <bool IsReference, typename T>
+class OptionalReferenceSwitch : public OptionalBase
+{
+public:
+    using Base = OptionalBase;
+
+    OptionalReferenceSwitch() noexcept : Base{} {}
+    OptionalReferenceSwitch(EmptyOptional) noexcept : Base{} {}
+
+    OptionalReferenceSwitch(const T& value)
+        : Base{}
+    {
+        Construct(value);
+    }
+
+    template<class... Args>
+    OptionalReferenceSwitch(ConstructInPlace, Args&&... args)
+        : Base{}
+    {
+        Construct(CONSTRUCT_IN_PLACE, std::forward<Args>(args)...);
+    }
+
+    OptionalReferenceSwitch(const OptionalReferenceSwitch& other)
+        : Base{}
+    {
+        *this = other;
+    }
+
+    OptionalReferenceSwitch& operator=(const T& value)
+    {
+        reset();
+        Construct(value);
+        return *this;
+    }
+
+    OptionalReferenceSwitch& operator=(const OptionalReferenceSwitch& other)
+    {
+        reset();
+        if (other.has_value())
+        {
+            Construct(other.value());
+        }
+
+        return *this;
+    }
+
+    OptionalReferenceSwitch& operator=(EmptyOptional)
+    {
+        reset();
+        return *this;
+    }
+
+    ~OptionalReferenceSwitch()
+    {
+        reset();
+    }
+
+    void reset()
+    {
+        if (Base::has_value())
+        {
+            value().T::~T();
+            Base::m_HasValue = false;
+        }
+    }
+
+    const T& value() const
+    {
+        if (!Base::has_value())
+        {
+            throw BadOptionalAccessException("Optional has no value");
+        }
+
+        auto valuePtr = reinterpret_cast<const T*>(m_Storage);
+        return *valuePtr;
+    }
+
+    T& value()
+    {
+        if (!Base::has_value())
+        {
+            throw BadOptionalAccessException("Optional has no value");
+        }
+
+        auto valuePtr = reinterpret_cast<T*>(m_Storage);
+        return *valuePtr;
+    }
+
+private:
+    void Construct(const T& value)
+    {
+        new (m_Storage) T(value);
+        m_HasValue = true;
+    }
+
+    template<class... Args>
+    void Construct(ConstructInPlace, Args&&... args)
+    {
+        new (m_Storage) T(std::forward<Args>(args)...);
+        m_HasValue = true;
+    }
+
+    alignas(alignof(T)) unsigned char m_Storage[sizeof(T)];
+};
+
+///
+/// This is the special case for reference types. This holds a pointer
+/// to the referenced type. This doesn't own the referenced memory and
+/// it never calls delete on the pointer.
+///
+template <typename T>
+class OptionalReferenceSwitch<true, T> : public OptionalBase
+{
+public:
+    using Base = OptionalBase;
+    using NonRefT = typename std::remove_reference<T>::type;
+
+    OptionalReferenceSwitch() noexcept : Base{}, m_Storage{nullptr} {}
+    OptionalReferenceSwitch(EmptyOptional) noexcept : Base{}, m_Storage{nullptr} {}
+
+    OptionalReferenceSwitch(const OptionalReferenceSwitch& other) : Base{}
+    {
+        *this = other;
+    }
+
+    OptionalReferenceSwitch(T value)
+        : Base{true}
+        , m_Storage{&value}
+    {
+    }
+
+    template<class... Args>
+    OptionalReferenceSwitch(ConstructInPlace, Args&&... args) = delete;
+
+    OptionalReferenceSwitch& operator=(const T value)
+    {
+        m_Storage = &value;
+        Base::m_HasValue = true;
+        return *this;
+    }
+
+    OptionalReferenceSwitch& operator=(const OptionalReferenceSwitch& other)
+    {
+        m_Storage = other.m_Storage;
+        Base::m_HasValue = other.has_value();
+        return *this;
+    }
+
+    OptionalReferenceSwitch& operator=(EmptyOptional)
+    {
+        reset();
+        return *this;
+    }
+
+    ~OptionalReferenceSwitch()
+    {
+        reset();
+    }
+
+    void reset()
+    {
+        Base::m_HasValue = false;
+        m_Storage = nullptr;
+    }
+
+    const T value() const
+    {
+        if (!Base::has_value())
+        {
+            throw BadOptionalAccessException("Optional has no value");
+        }
+
+        return *m_Storage;
+    }
+
+    T value()
+    {
+        if (!Base::has_value())
+        {
+            throw BadOptionalAccessException("Optional has no value");
+        }
+
+        return *m_Storage;
+    }
+
+private:
+    NonRefT* m_Storage;
+};
+
+template <typename T>
+class Optional final : public OptionalReferenceSwitch<std::is_reference<T>::value, T>
+{
+public:
+    using BaseSwitch = OptionalReferenceSwitch<std::is_reference<T>::value, T>;
+
+    Optional() noexcept : BaseSwitch{} {}
+    Optional(const T& value) : BaseSwitch{value} {}
+    Optional& operator=(const Optional& other) = default;
+    Optional(EmptyOptional empty) : BaseSwitch{empty} {}
+    Optional(const Optional& other) : BaseSwitch{other} {}
+    Optional(const BaseSwitch& other) : BaseSwitch{other} {}
+
+    template<class... Args>
+    explicit Optional(ConstructInPlace, Args&&... args) :
+        BaseSwitch(CONSTRUCT_IN_PLACE, std::forward<Args>(args)...) {}
+
+    /// Two optionals are considered equal if they are both empty or both contain values which
+    /// themselves are considered equal (via their own == operator).
+    bool operator==(const Optional<T>& rhs) const
+    {
+        if (!this->has_value() && !rhs.has_value())
+        {
+            return true;
+        }
+        if (this->has_value() && rhs.has_value() && this->value() == rhs.value())
+        {
+            return true;
+        }
+        return false;
+    }
+};
+
+/// Utility template that constructs an object of type T in-place and wraps
+/// it inside an Optional<T> object
+template<typename T, class... Args>
+Optional<T> MakeOptional(Args&&... args)
+{
+    return Optional<T>(CONSTRUCT_IN_PLACE, std::forward<Args>(args)...);
+}
+
+}
diff --git a/arch/arm/ARMnn/include/armnn/QuantizedLstmParams.hpp b/arch/arm/ARMnn/include/armnn/QuantizedLstmParams.hpp
new file mode 100644
index 0000000000..f68e607dac
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/QuantizedLstmParams.hpp
@@ -0,0 +1,218 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "TensorFwd.hpp"
+#include "Exceptions.hpp"
+
+namespace armnn
+{
+
+struct QuantizedLstmInputParams
+{
+    QuantizedLstmInputParams()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+    {
+    }
+
+    const ConstTensor* m_InputToInputWeights;
+    const ConstTensor* m_InputToForgetWeights;
+    const ConstTensor* m_InputToCellWeights;
+    const ConstTensor* m_InputToOutputWeights;
+
+    const ConstTensor* m_RecurrentToInputWeights;
+    const ConstTensor* m_RecurrentToForgetWeights;
+    const ConstTensor* m_RecurrentToCellWeights;
+    const ConstTensor* m_RecurrentToOutputWeights;
+
+    const ConstTensor* m_InputGateBias;
+    const ConstTensor* m_ForgetGateBias;
+    const ConstTensor* m_CellBias;
+    const ConstTensor* m_OutputGateBias;
+
+    const ConstTensor& Deref(const ConstTensor* tensorPtr) const
+    {
+        if (tensorPtr != nullptr)
+        {
+            const ConstTensor &temp = *tensorPtr;
+            return temp;
+        }
+        throw InvalidArgumentException("QuantizedLstmInputParams: Can't dereference a null pointer");
+    }
+
+    const ConstTensor& GetInputToInputWeights() const
+    {
+        return Deref(m_InputToInputWeights);
+    }
+
+    const ConstTensor& GetInputToForgetWeights() const
+    {
+        return Deref(m_InputToForgetWeights);
+    }
+
+    const ConstTensor& GetInputToCellWeights() const
+    {
+        return Deref(m_InputToCellWeights);
+    }
+
+    const ConstTensor& GetInputToOutputWeights() const
+    {
+        return Deref(m_InputToOutputWeights);
+    }
+
+    const ConstTensor& GetRecurrentToInputWeights() const
+    {
+        return Deref(m_RecurrentToInputWeights);
+    }
+
+    const ConstTensor& GetRecurrentToForgetWeights() const
+    {
+        return Deref(m_RecurrentToForgetWeights);
+    }
+
+    const ConstTensor& GetRecurrentToCellWeights() const
+    {
+        return Deref(m_RecurrentToCellWeights);
+    }
+
+    const ConstTensor& GetRecurrentToOutputWeights() const
+    {
+        return Deref(m_RecurrentToOutputWeights);
+    }
+
+    const ConstTensor& GetInputGateBias() const
+    {
+        return Deref(m_InputGateBias);
+    }
+
+    const ConstTensor& GetForgetGateBias() const
+    {
+        return Deref(m_ForgetGateBias);
+    }
+
+    const ConstTensor& GetCellBias() const
+    {
+        return Deref(m_CellBias);
+    }
+
+    const ConstTensor& GetOutputGateBias() const
+    {
+        return Deref(m_OutputGateBias);
+    }
+};
+
+struct QuantizedLstmInputParamsInfo
+{
+    QuantizedLstmInputParamsInfo()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+    {
+    }
+
+    const TensorInfo* m_InputToInputWeights;
+    const TensorInfo* m_InputToForgetWeights;
+    const TensorInfo* m_InputToCellWeights;
+    const TensorInfo* m_InputToOutputWeights;
+
+    const TensorInfo* m_RecurrentToInputWeights;
+    const TensorInfo* m_RecurrentToForgetWeights;
+    const TensorInfo* m_RecurrentToCellWeights;
+    const TensorInfo* m_RecurrentToOutputWeights;
+
+    const TensorInfo* m_InputGateBias;
+    const TensorInfo* m_ForgetGateBias;
+    const TensorInfo* m_CellBias;
+    const TensorInfo* m_OutputGateBias;
+
+
+    const TensorInfo& Deref(const TensorInfo* tensorInfo) const
+    {
+        if (tensorInfo != nullptr)
+        {
+            const TensorInfo &temp = *tensorInfo;
+            return temp;
+        }
+        throw InvalidArgumentException("Can't dereference a null pointer");
+    }
+
+    const TensorInfo& GetInputToInputWeights() const
+    {
+        return Deref(m_InputToInputWeights);
+    }
+    const TensorInfo& GetInputToForgetWeights() const
+    {
+        return Deref(m_InputToForgetWeights);
+    }
+    const TensorInfo& GetInputToCellWeights() const
+    {
+        return Deref(m_InputToCellWeights);
+    }
+    const TensorInfo& GetInputToOutputWeights() const
+    {
+        return Deref(m_InputToOutputWeights);
+    }
+
+    const TensorInfo& GetRecurrentToInputWeights() const
+    {
+        return Deref(m_RecurrentToInputWeights);
+    }
+    const TensorInfo& GetRecurrentToForgetWeights() const
+    {
+        return Deref(m_RecurrentToForgetWeights);
+    }
+    const TensorInfo& GetRecurrentToCellWeights() const
+    {
+        return Deref(m_RecurrentToCellWeights);
+    }
+    const TensorInfo& GetRecurrentToOutputWeights() const
+    {
+        return Deref(m_RecurrentToOutputWeights);
+    }
+
+    const TensorInfo& GetInputGateBias() const
+    {
+        return Deref(m_InputGateBias);
+    }
+    const TensorInfo& GetForgetGateBias() const
+    {
+        return Deref(m_ForgetGateBias);
+    }
+    const TensorInfo& GetCellBias() const
+    {
+        return Deref(m_CellBias);
+    }
+    const TensorInfo& GetOutputGateBias() const
+    {
+        return Deref(m_OutputGateBias);
+    }
+};
+
+} // namespace armnn
+
diff --git a/arch/arm/ARMnn/include/armnn/StrategyBase.hpp b/arch/arm/ARMnn/include/armnn/StrategyBase.hpp
new file mode 100644
index 0000000000..78f393f44f
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/StrategyBase.hpp
@@ -0,0 +1,55 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+
+#include <armnn/INetwork.hpp>
+#include <armnn/IStrategy.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+
+namespace armnn
+{
+
+struct ThrowingStrategy
+{
+    void Apply(const std::string& errorMessage = "") { throw UnimplementedException(errorMessage); };
+};
+
+struct NoThrowStrategy
+{
+    void Apply(const std::string&) {};
+};
+
+/// Strategy base class with empty implementations.
+template <typename DefaultStrategy>
+class StrategyBase : public IStrategy
+{
+protected:
+    virtual ~StrategyBase() {};
+
+public:
+    virtual void ExecuteStrategy(const armnn::IConnectableLayer* layer,
+                                 const armnn::BaseDescriptor& descriptor,
+                                 const std::vector<armnn::ConstTensor>& constants,
+                                 const char* name,
+                                 const armnn::LayerBindingId id = 0) override
+    {
+        armnn::IgnoreUnused(descriptor, constants, id, name);
+        switch (layer->GetType())
+        {
+            default:
+            {
+                m_DefaultStrategy.Apply(GetLayerTypeAsCString(layer->GetType()));
+            }
+        }
+    }
+
+protected:
+    DefaultStrategy m_DefaultStrategy;
+
+};
+
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/Tensor.hpp b/arch/arm/ARMnn/include/armnn/Tensor.hpp
new file mode 100644
index 0000000000..8dbc9127b2
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Tensor.hpp
@@ -0,0 +1,395 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "TensorFwd.hpp"
+
+#include "Exceptions.hpp"
+#include "Optional.hpp"
+#include "Types.hpp"
+
+#include <array>
+#include <initializer_list>
+#include <vector>
+
+namespace armnn
+{
+
+class TensorShape
+{
+public:
+    /// Empty (invalid) constructor.
+    TensorShape();
+
+    /// Constructor for TensorShape
+    /// @param numDimensions - Tensor rank.
+    /// @param initDimensionsSpecificity (optional) - value to initialize the specificity of each dimension size.
+    explicit TensorShape(unsigned int numDimensions, bool initDimensionsSpecificity = true);
+
+    /// Constructor for TensorShape
+    /// @param numDimensions - Tensor rank.
+    /// @param dimensionSizes - Size of each of dimension.
+    TensorShape(unsigned int numDimensions, const unsigned int* dimensionSizes);
+
+    /// Constructor for TensorShape
+    /// @param dimensionSizeList - Size of each of dimension.
+    TensorShape(std::initializer_list<unsigned int> dimensionSizeList);
+
+    /// Copy Constructor for TensorShape
+    /// @param other - TensorShape to copy from.
+    TensorShape(const TensorShape& other);
+
+    /// Constructor for TensorShape
+    /// @param numDimensions - Tensor rank.
+    /// @param dimensionSizes - Size of each of dimension.
+    /// @param dimensionsSpecificity - Flags to indicate which dimension has its size specified.
+    TensorShape(unsigned int numDimensions, const unsigned int* dimensionSizes, const bool* dimensionsSpecificity);
+
+    /// Constructor for TensorShape
+    /// @param dimensionSizeList - Size of each of dimension.
+    /// @param dimensionsSpecificityList - Flags to indicate which dimension size is specified.
+    TensorShape(std::initializer_list<unsigned int> dimensionSizeList,
+                std::initializer_list<bool> dimensionsSpecificityList);
+
+    /// Constructor for TensorShape
+    /// @param dimensionality - Parameter to indicate if the Tensor is a Scalar, a Tensor of known dimensionality
+    /// or a Tensor of unknown dimensionality.
+    explicit TensorShape(Dimensionality dimensionality);
+
+    /// Assignation function
+    /// @param other - TensorShape to copy from.
+    TensorShape& operator=(const TensorShape& other);
+
+    /// Read only operator
+    /// @param i - Dimension index.
+    unsigned int operator[](unsigned int i) const;
+
+    /// Read and write operator
+    /// @param i - Dimension index.
+    unsigned int& operator[](unsigned int i);
+
+    /// Equality comparison operator
+    /// @param other - TensorShape to compare with.
+    bool operator==(const TensorShape& other) const;
+
+    /// Inequality comparison operator
+    /// @param other - TensorShape to compare with.
+    bool operator!=(const TensorShape& other) const;
+
+    /// Function that returns the tensor rank.
+    /// @return - Tensor rank.
+    unsigned int GetNumDimensions() const;
+
+    /// Function that calculates the tensor elements by multiplying all dimension size which are Specified.
+    /// @return - Total number of elements in the tensor.
+    unsigned int GetNumElements() const;
+
+    /// Function that returns the tensor type.
+    /// @return - Parameter to indicate if the Tensor is a scalar, a Tensor of known dimensionality or
+    /// a Tensor of unknown dimensionality
+    Dimensionality GetDimensionality() const { return m_Dimensionality; }
+
+    /// Gets information about if the dimension size has been specified or not
+    /// @param i - Dimension index.
+    /// @return - Flag to indicate if the dimension "i" has a specified size.
+    bool GetDimensionSpecificity(unsigned int i) const;
+
+    /// Sets the tensor rank and therefore the Dimensionality is set to Specified if it was not.
+    /// @param numDimensions - Tensor rank.
+    /// @param initDimensionsSpecificity (optional) - value to initialize the specificity of each dimension size.
+    void SetNumDimensions(unsigned int numDimensions, bool initDimensionsSpecificity = false);
+
+    /// Sets the size of the indicated dimension and Specificity for that dimension is set to true.
+    /// @param i - Dimension index.
+    /// @param dimensionSize - size of one dimension.
+    void SetDimensionSize(unsigned int i, unsigned int dimensionSize);
+
+    /// Checks if there is at least one dimension not specified. AND of all array elements.
+    /// @return - True when all dimension sizes are specified. False when at least one dimension size is not specified.
+    bool AreAllDimensionsSpecified() const;
+
+    /// Checks if there is at least one dimension specified. OR of all array elements.
+    /// @return - True at least one dimension sizes is specified. False when all dimension sizes are not specified.
+    bool IsAtLeastOneDimensionSpecified() const;
+
+private:
+    /// Array of the dimension sizes.
+    std::array<unsigned int, MaxNumOfTensorDimensions> m_Dimensions{};
+
+    /// Array of flags to indicate if the size of each of the dimensions is specified or not
+    std::array<bool, MaxNumOfTensorDimensions> m_DimensionsSpecificity = { {true} };
+
+    /// Tensor rank
+    unsigned int m_NumDimensions{};
+
+    /// Tensor type: Specified, NotSpecified or Scalar.
+    Dimensionality m_Dimensionality = Dimensionality::Specified;
+
+    /// Checks if the dimension index given is within range.
+    /// @param i - Dimension index.
+    void CheckDimensionIndex(unsigned int i) const;
+
+    /// Checks if the tensor rank given is within range.
+    /// @param numDimensions - Tensor rank.
+    static void CheckValidNumDimensions(unsigned int numDimensions) ;
+
+    /// Checks if the size of the dimension index given is specified.
+    /// @param i - Dimension index.
+    void CheckDimensionSpecified(unsigned int i) const;
+
+    /// Checks if this is a scalar.
+    void CheckScalar() const;
+
+    /// Checks if the number of dimensions is unknown, i.e. rank is unspecified.
+    void CheckUnspecifiedNumDimensions() const;
+
+    /// Checks if the number of dimensions is known, i.e. rank is specified.
+    void CheckSpecifiedNumDimensions() const;
+};
+
+class TensorInfo
+{
+public:
+    /// Empty (invalid) constructor.
+    TensorInfo();
+
+    TensorInfo(const TensorShape& shape,
+               DataType dataType,
+               float quantizationScale = 0.0f,
+               int32_t quantizationOffset = 0,
+               bool isConstant = false);
+
+    TensorInfo(unsigned int numDimensions,
+               const unsigned int* dimensionSizes,
+               DataType dataType,
+               float quantizationScale = 0.0f,
+               int32_t quantizationOffset = 0,
+               bool isConstant = false);
+
+    TensorInfo(const TensorShape& shape,
+               DataType dataType,
+               const std::vector<float>& quantizationScales,
+               unsigned int quantizationDim,
+               bool isConstant = false);
+
+    TensorInfo(unsigned int numDimensions,
+               const unsigned int* dimensionSizes,
+               DataType dataType,
+               const std::vector<float>& quantizationScales,
+               unsigned int quantizationDim,
+               bool isConstant = false);
+
+    TensorInfo(const TensorInfo& other);
+
+    TensorInfo& operator=(const TensorInfo& other);
+
+    bool operator==(const TensorInfo& other) const;
+    bool operator!=(const TensorInfo& other) const;
+
+    const TensorShape& GetShape() const              { return m_Shape; }
+    TensorShape& GetShape()                          { return m_Shape; }
+    void SetShape(const TensorShape& newShape)       { m_Shape = newShape; }
+
+    unsigned int GetNumDimensions() const            { return m_Shape.GetNumDimensions(); }
+    unsigned int GetNumElements() const              { return m_Shape.GetNumElements(); }
+
+    DataType GetDataType() const                     { return m_DataType; }
+    void SetDataType(DataType type)                  { m_DataType = type; }
+
+    bool HasMultipleQuantizationScales() const       { return m_Quantization.m_Scales.size() > 1; }
+
+    bool HasPerAxisQuantization() const;
+
+    std::vector<float> GetQuantizationScales() const;
+    void SetQuantizationScales(const std::vector<float>& scales);
+
+    float GetQuantizationScale() const;
+    void SetQuantizationScale(float scale);
+
+    int32_t GetQuantizationOffset() const;
+    void SetQuantizationOffset(int32_t offset);
+
+    Optional<unsigned int> GetQuantizationDim() const;
+    void SetQuantizationDim(const Optional<unsigned int>& quantizationDim);
+
+    bool IsQuantized() const;
+
+    bool IsConstant() const;
+
+    /// Marks the data corresponding to this tensor info as constant.
+    ///
+    /// @details: This can allow further optimization on execution
+    /// @Note: The user has to ensure that the underlying data actually is constant.
+    void SetConstant(const bool IsConstant=true);
+
+    /// Check that the types are the same and, if quantize, that the quantization parameters are the same.
+    bool IsTypeSpaceMatch(const TensorInfo& other) const;
+
+    unsigned int GetNumBytes() const;
+
+private:
+    TensorShape m_Shape;
+    DataType    m_DataType;
+    bool        m_IsConstant;
+
+    /// Vectors of scale and offset are used for per-axis quantization.
+    struct Quantization
+    {
+        Quantization()
+            : m_Scales{}
+            , m_Offset(EmptyOptional())
+            , m_QuantizationDim(EmptyOptional()) {}
+
+        Quantization(const Quantization& other)
+            : m_Scales(other.m_Scales)
+            , m_Offset(other.m_Offset)
+            , m_QuantizationDim(other.m_QuantizationDim) {}
+
+        bool operator==(const Quantization& other) const
+        {
+            return ((m_Scales == other.m_Scales) && (m_Offset == other.m_Offset) &&
+                (m_QuantizationDim == other.m_QuantizationDim));
+        }
+
+        Quantization& operator=(const Quantization& other)
+        {
+            if(this != &other)
+            {
+                m_Scales = other.m_Scales;
+                m_Offset = other.m_Offset;
+                m_QuantizationDim = other.m_QuantizationDim;
+            }
+            return *this;
+        }
+
+        std::vector<float>     m_Scales;
+        Optional<int32_t>      m_Offset;
+        Optional<unsigned int> m_QuantizationDim;
+
+    } m_Quantization;
+};
+
+using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>;
+
+template<typename MemoryType>
+class BaseTensor
+{
+public:
+    /// Empty (invalid) constructor.
+    BaseTensor();
+
+    /// Constructor from a raw memory pointer.
+    /// @param memoryArea - Region of CPU-addressable memory where tensor data will be stored. Must be valid while
+    /// workloads are on the fly. Tensor instances do not claim ownership of referenced memory regions, that is,
+    /// no attempt will be made by ArmNN to free these memory regions automatically.
+    BaseTensor(const TensorInfo& info, MemoryType memoryArea);
+
+    /// Tensors are copyable.
+    BaseTensor(const BaseTensor& other);
+
+    /// Tensors are copyable.
+    BaseTensor& operator=(const BaseTensor&);
+
+    const TensorInfo& GetInfo() const { return m_Info; }
+    TensorInfo& GetInfo() { return m_Info; }
+    const TensorShape& GetShape() const { return m_Info.GetShape(); }
+    TensorShape& GetShape() { return m_Info.GetShape(); }
+
+    DataType GetDataType() const { return m_Info.GetDataType(); }
+    unsigned int GetNumDimensions() const { return m_Info.GetNumDimensions(); }
+    unsigned int GetNumBytes() const { return m_Info.GetNumBytes(); }
+    unsigned int GetNumElements() const { return m_Info.GetNumElements(); }
+
+    MemoryType GetMemoryArea() const { return m_MemoryArea; }
+
+protected:
+    /// Protected destructor to stop users from making these
+    /// (could still new one on the heap and then leak it...)
+    ~BaseTensor() {}
+
+    MemoryType m_MemoryArea;
+
+private:
+    TensorInfo m_Info;
+};
+
+/// A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
+class Tensor : public BaseTensor<void*>
+{
+public:
+    /// Brings in the constructors and assignment operator.
+    using BaseTensor<void*>::BaseTensor;
+};
+
+/// A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
+class ConstTensor : public BaseTensor<const void*>
+{
+public:
+    /// Brings in the constructors and assignment operator.
+    using BaseTensor<const void*>::BaseTensor;
+    ConstTensor() : BaseTensor<const void*>()
+    {
+        this->GetInfo().SetConstant();
+    }
+
+    /// ConstTensor implicitly constructed from non-const Tensor.
+    ///
+    /// @param other - reference to a constant Tensor.
+    ///
+    /// @throws InvalidArgumentException when Tensor parameter TensorInfo is non-constant.
+    ConstTensor(const Tensor& other) : BaseTensor<const void*>(other.GetInfo(), other.GetMemoryArea())
+    {
+        if (!this->GetInfo().IsConstant())
+        {
+            throw InvalidArgumentException("Invalid attempt to construct ConstTensor "
+                                           "from Tensor due to non-constant TensorInfo");
+        }
+    }
+
+    /// Constructor from a backing container.
+    ///
+    /// @param container - An stl-like container type which implements data() and size() methods.
+    /// Presence of data() and size() is a strong indicator of the continuous memory layout of the container,
+    /// which is a requirement for Tensor data. Tensor instances do not claim ownership of referenced memory regions,
+    /// that is, no attempt will be made by ArmNN to free these memory regions automatically.
+    ///
+    /// @throws InvalidArgumentException when isConstant parameter of input TensorInfo is false.
+    template < template<typename, typename...> class ContainerType, typename T, typename...ContainerArgs >
+    ConstTensor(const TensorInfo& info, const ContainerType<T, ContainerArgs...>& container)
+        : BaseTensor<const void*>(info, container.data())
+    {
+        if (!this->GetInfo().IsConstant())
+        {
+            throw InvalidArgumentException("Invalid attempt to construct ConstTensor from non-constant TensorInfo.");
+        }
+        if (container.size() * sizeof(T) != info.GetNumBytes())
+        {
+            throw InvalidArgumentException("Container size is not correct");
+        }
+    }
+
+    /// ConstTensor constructed from TensorInfo and MemoryType template (a raw memory pointer).
+    ///
+    /// @param info - reference to a constant TensorInfo.
+    /// @param memoryArea - Region of CPU-addressable memory where tensor data will be stored. Must be valid while
+    /// workloads are on the fly. Tensor instances do not claim ownership of referenced memory regions, that is,
+    /// no attempt will be made by ArmNN to free these memory regions automatically.
+    ///
+    /// @throws InvalidArgumentException when TensorInfo isConstant parameter is false.
+    template<typename MemoryType>
+    ConstTensor(const TensorInfo& info, MemoryType memoryArea)
+        : BaseTensor<const void*>(info, memoryArea)
+    {
+        if (!this->GetInfo().IsConstant())
+        {
+            throw InvalidArgumentException("Invalid attempt to construct ConstTensor from non-constant TensorInfo.");
+        }
+    }
+};
+
+using InputTensors = std::vector<std::pair<LayerBindingId, class ConstTensor>>;
+using OutputTensors = std::vector<std::pair<LayerBindingId, class Tensor>>;
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/TensorFwd.hpp b/arch/arm/ARMnn/include/armnn/TensorFwd.hpp
new file mode 100644
index 0000000000..ffd459061a
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/TensorFwd.hpp
@@ -0,0 +1,15 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+namespace armnn
+{
+
+class TensorShape;
+class TensorInfo;
+class Tensor;
+class ConstTensor;
+
+}
diff --git a/arch/arm/ARMnn/include/armnn/Threadpool.hpp b/arch/arm/ARMnn/include/armnn/Threadpool.hpp
new file mode 100644
index 0000000000..e2458dbb65
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Threadpool.hpp
@@ -0,0 +1,78 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
+
+#include "INetwork.hpp"
+#include "IRuntime.hpp"
+
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#include <unordered_map>
+#include <queue>
+
+namespace armnn
+{
+namespace experimental
+{
+class Threadpool
+{
+public:
+    Threadpool(std::size_t numThreads,
+               IRuntime* runtimePtr,
+               std::vector<std::shared_ptr<IWorkingMemHandle>> memHandles);
+
+    ~Threadpool()
+    {
+        TerminateThreadPool();
+    }
+
+    void LoadMemHandles(std::vector<std::shared_ptr<IWorkingMemHandle>> memHandles);
+    void UnloadMemHandles(NetworkId networkId);
+
+    /// Schedule an asynchronous execution on the loaded network
+    void Schedule(NetworkId networkId,
+                  const InputTensors &inputTensors,
+                  const OutputTensors &outputTensors,
+                  const QosExecPriority priority,
+                  std::shared_ptr<IAsyncExecutionCallback> cb);
+
+    void TerminateThreadPool() noexcept;
+
+private:
+    using ExecutionTuple = std::tuple<NetworkId,
+                                      InputTensors,
+                                      OutputTensors,
+                                      std::shared_ptr<IAsyncExecutionCallback>>;
+
+    using ExecutionQueue = std::queue<std::shared_ptr<ExecutionTuple>>;
+
+    void ProcessExecPriorities(uint32_t index);
+
+    IRuntime* m_RuntimePtr;
+
+    ExecutionQueue m_HighPriorityQueue;
+    ExecutionQueue m_MediumPriorityQueue;
+    ExecutionQueue m_LowPriorityQueue;
+
+    // Condition Variables require mutex which will guard the shared state.
+    // Has an event happened? Stop signal for example
+    std::condition_variable m_ThreadPoolEvent;
+    std::mutex m_ThreadPoolMutex;
+
+    // The shared state for conditional variable
+    bool m_TerminatePool = false;
+
+    std::unordered_map<NetworkId, std::vector<std::shared_ptr<IWorkingMemHandle>>> m_WorkingMemHandleMap;
+    std::vector<std::unique_ptr<std::thread>> m_Threads;
+};
+
+} // namespace experimental
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/Types.hpp b/arch/arm/ARMnn/include/armnn/Types.hpp
new file mode 100644
index 0000000000..880a6dd816
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Types.hpp
@@ -0,0 +1,469 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <array>
+#include <functional>
+#include <stdint.h>
+#include <chrono>
+#include "BackendId.hpp"
+#include "Exceptions.hpp"
+#include "Deprecated.hpp"
+
+namespace armnn
+{
+
+constexpr unsigned int MaxNumOfTensorDimensions = 5U;
+
+/// The lowest performance data capture interval we support is 10 miliseconds.
+constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u;
+
+/// Variable to control expire rate of priority queue
+constexpr unsigned int EXPIRE_RATE = 3U;
+
+/// @enum Status enumeration
+/// @var Status::Successful
+/// @var Status::Failure
+enum class Status
+{
+    Success = 0,
+    Failure = 1
+};
+
+enum class DataType
+{
+    Float16  = 0,
+    Float32  = 1,
+    QAsymmU8 = 2,
+    Signed32 = 3,
+    Boolean  = 4,
+    QSymmS16 = 5,
+    QSymmS8  = 6,
+    QAsymmS8 = 7,
+    BFloat16 = 8,
+    Signed64 = 9,
+};
+
+enum class DataLayout
+{
+    NCHW = 1,
+    NHWC = 2,
+    NDHWC = 3,
+    NCDHW = 4
+};
+
+/// Define the behaviour of the internal profiler when outputting network details
+enum class ProfilingDetailsMethod
+{
+    Undefined = 0,
+    DetailsWithEvents = 1,
+    DetailsOnly = 2
+};
+
+
+enum class QosExecPriority
+{
+    Low    = 0,
+    Medium = 1,
+    High   = 2
+};
+
+enum class ActivationFunction
+{
+    Sigmoid     = 0,
+    TanH        = 1,
+    Linear      = 2,
+    ReLu        = 3,
+    BoundedReLu = 4, ///< min(a, max(b, input)) ReLu1 & ReLu6.
+    SoftReLu    = 5,
+    LeakyReLu   = 6,
+    Abs         = 7,
+    Sqrt        = 8,
+    Square      = 9,
+    Elu         = 10,
+    HardSwish   = 11
+};
+
+enum class ArgMinMaxFunction
+{
+    Min = 0,
+    Max = 1
+};
+
+enum class ComparisonOperation
+{
+    Equal          = 0,
+    Greater        = 1,
+    GreaterOrEqual = 2,
+    Less           = 3,
+    LessOrEqual    = 4,
+    NotEqual       = 5
+};
+
+enum class LogicalBinaryOperation
+{
+    LogicalAnd = 0,
+    LogicalOr  = 1
+};
+
+enum class UnaryOperation
+{
+    Abs        = 0,
+    Exp        = 1,
+    Sqrt       = 2,
+    Rsqrt      = 3,
+    Neg        = 4,
+    LogicalNot = 5,
+    Log        = 6,
+    Sin        = 7
+};
+
+enum class PoolingAlgorithm
+{
+    Max     = 0,
+    Average = 1,
+    L2      = 2
+};
+
+enum class ReduceOperation
+{
+    Sum  = 0,
+    Max  = 1,
+    Mean = 2,
+    Min  = 3,
+    Prod = 4
+};
+
+enum class ResizeMethod
+{
+    Bilinear        = 0,
+    NearestNeighbor = 1
+};
+
+enum class Dimensionality
+{
+    NotSpecified = 0,
+    Specified    = 1,
+    Scalar       = 2
+};
+
+///
+/// The padding method modifies the output of pooling layers.
+/// In both supported methods, the values are ignored (they are
+/// not even zeroes, which would make a difference for max pooling
+/// a tensor with negative values). The difference between
+/// IgnoreValue and Exclude is that the former counts the padding
+/// fields in the divisor of Average and L2 pooling, while
+/// Exclude does not.
+///
+enum class PaddingMethod
+{
+    /// The padding fields count, but are ignored
+    IgnoreValue = 0,
+    /// The padding fields don't count and are ignored
+    Exclude     = 1
+};
+
+///
+/// The padding mode controls whether the padding should be filled with constant values (Constant), or
+/// reflect the input, either including the border values (Symmetric) or not (Reflect).
+///
+enum class PaddingMode
+{
+    Constant  = 0,
+    Reflect   = 1,
+    Symmetric = 2
+};
+
+enum class NormalizationAlgorithmChannel
+{
+    Across = 0,
+    Within = 1
+};
+
+enum class NormalizationAlgorithmMethod
+{
+    /// Krichevsky 2012: Local Brightness Normalization
+    LocalBrightness = 0,
+    /// Jarret 2009: Local Contrast Normalization
+    LocalContrast = 1
+};
+
+enum class OutputShapeRounding
+{
+    Floor       = 0,
+    Ceiling     = 1
+};
+
+///
+/// The ShapeInferenceMethod modify how the output shapes are treated.
+/// When ValidateOnly is selected, the output shapes are inferred from the input parameters of the layer
+/// and any mismatch is reported.
+/// When InferAndValidate is selected 2 actions are performed: (1)infer output shape from inputs and (2)validate the
+/// shapes as in ValidateOnly. This option has been added to work with tensors which rank or dimension sizes are not
+/// specified explicitly, however this information can be calculated from the inputs.
+///
+enum class ShapeInferenceMethod
+{
+    /// Validate all output shapes
+    ValidateOnly     = 0,
+    /// Infer missing output shapes and validate all output shapes
+    InferAndValidate = 1
+};
+
+/// Define the Memory Source to reduce copies
+enum class MemorySource : uint32_t
+{
+    Undefined = 0,
+    Malloc = 1,
+    DmaBuf = 2,
+    DmaBufProtected = 4,
+    Gralloc = 5
+};
+
+enum class MemBlockStrategyType
+{
+    // MemBlocks can be packed on the Y axis only, overlap allowed on X axis.
+    // In other words MemBlocks with overlapping lifetimes cannot use the same MemBin,
+    // equivalent to blob or pooling memory management.
+    SingleAxisPacking  = 0,
+
+    // MemBlocks can be packed on either Y or X axis but cannot overlap on both.
+    // In other words MemBlocks with overlapping lifetimes can use the same MemBin,
+    // equivalent to offset or slab memory management.
+    MultiAxisPacking  = 1
+};
+
+/// Each backend should implement an IBackend.
+class IBackend
+{
+protected:
+    IBackend() {}
+    virtual ~IBackend() {}
+
+public:
+    virtual const BackendId& GetId() const = 0;
+};
+
+using IBackendSharedPtr = std::shared_ptr<IBackend>;
+using IBackendUniquePtr = std::unique_ptr<IBackend, void(*)(IBackend* backend)>;
+
+/// BackendCapability class
+enum class BackendCapability : uint32_t
+{
+    /// Constant weights can be accessed through the descriptors,
+    /// On the other hand, non-const weights can be accessed through inputs.
+    NonConstWeights,
+
+    /// Asynchronous Execution.
+    AsyncExecution,
+
+    // add new enum values here
+};
+
+/// Device specific knowledge to be passed to the optimizer.
+class IDeviceSpec
+{
+protected:
+    IDeviceSpec() {}
+    virtual ~IDeviceSpec() {}
+public:
+    virtual const BackendIdSet& GetSupportedBackends() const = 0;
+};
+
+/// Type of identifiers for bindable layers (inputs, outputs).
+using LayerBindingId = int;
+using ImportedInputId = unsigned int;
+using ImportedOutputId = unsigned int;
+
+
+class PermutationVector
+{
+public:
+    using ValueType = unsigned int;
+    using SizeType = unsigned int;
+    using ArrayType = std::array<ValueType, MaxNumOfTensorDimensions>;
+    using ConstIterator = typename ArrayType::const_iterator;
+
+    /// @param dimMappings - Indicates how to translate tensor elements from a given source into the target destination,
+    /// when source and target potentially have different memory layouts.
+    ///
+    /// E.g. For a 4-d tensor laid out in a memory with the format (Batch Element, Height, Width, Channels),
+    /// which is to be passed as an input to ArmNN, each source dimension is mapped to the corresponding
+    /// ArmNN dimension. The Batch dimension remains the same (0 -> 0). The source Height dimension is mapped
+    /// to the location of the ArmNN Height dimension (1 -> 2). Similar arguments are made for the Width and
+    /// Channels (2 -> 3 and 3 -> 1). This will lead to @ref m_DimMappings pointing to the following array:
+    /// [ 0, 2, 3, 1 ].
+    ///
+    /// Note that the mapping should be reversed if considering the case of ArmNN 4-d outputs (Batch Element,
+    /// Channels, Height, Width) being written to a destination with the format mentioned above. We now have
+    /// 0 -> 0, 2 -> 1, 3 -> 2, 1 -> 3, which, when reordered, lead to the following @ref m_DimMappings contents:
+    /// [ 0, 3, 1, 2 ].
+    ///
+    PermutationVector(const ValueType *dimMappings, SizeType numDimMappings);
+
+    PermutationVector(std::initializer_list<ValueType> dimMappings);
+
+    ///
+    /// Indexing method with out-of-bounds error checking for the m_DimMappings array.
+    /// @param i - integer value corresponding to index of m_DimMappings array to retrieve element from.
+    /// @return element at index i of m_DimMappings array.
+    /// @throws InvalidArgumentException when indexing out-of-bounds index of m_DimMappings array.
+    ///
+    ValueType operator[](SizeType i) const
+    {
+        if (i >= GetSize())
+        {
+            throw InvalidArgumentException("Invalid indexing of PermutationVector of size " + std::to_string(GetSize())
+                                            + " at location [" + std::to_string(i) + "].");
+        }
+        return m_DimMappings.at(i);
+    }
+
+    SizeType GetSize() const { return m_NumDimMappings; }
+
+    ConstIterator begin() const { return m_DimMappings.begin(); }
+    /**
+     *
+     * @return pointer one past the end of the number of mapping not the length of m_DimMappings.
+     */
+    ConstIterator end() const { return m_DimMappings.begin() + m_NumDimMappings; }
+
+    bool IsEqual(const PermutationVector& other) const
+    {
+        if (m_NumDimMappings != other.m_NumDimMappings) return false;
+        for (unsigned int i = 0; i < m_NumDimMappings; ++i)
+        {
+            if (m_DimMappings[i] != other.m_DimMappings[i]) return false;
+        }
+        return true;
+    }
+
+    bool IsInverse(const PermutationVector& other) const
+    {
+        bool isInverse = (GetSize() == other.GetSize());
+        for (SizeType i = 0; isInverse && (i < GetSize()); ++i)
+        {
+            isInverse = (m_DimMappings[other.m_DimMappings[i]] == i);
+        }
+        return isInverse;
+    }
+
+private:
+    ArrayType m_DimMappings;
+    /// Number of valid entries in @ref m_DimMappings
+    SizeType m_NumDimMappings;
+};
+
+namespace profiling { class ProfilingGuid; }
+
+/// Define LayerGuid type.
+using LayerGuid = profiling::ProfilingGuid;
+
+class ITensorHandle;
+
+/// Define the type of callback for the Debug layer to call
+/// @param guid - guid of layer connected to the input of the Debug layer
+/// @param slotIndex - index of the output slot connected to the input of the Debug layer
+/// @param tensorHandle - TensorHandle for the input tensor to the Debug layer
+using DebugCallbackFunction = std::function<void(LayerGuid guid, unsigned int slotIndex, ITensorHandle* tensorHandle)>;
+
+/// Define a timer and associated inference ID for recording execution times
+using HighResolutionClock = std::chrono::high_resolution_clock::time_point;
+using InferenceTimingPair = std::pair<HighResolutionClock, HighResolutionClock>;
+
+
+/// This list uses X macro technique.
+/// See https://en.wikipedia.org/wiki/X_Macro for more info
+#define LIST_OF_LAYER_TYPE \
+    X(Activation) \
+    X(Addition) \
+    X(ArgMinMax) \
+    X(BatchNormalization) \
+    X(BatchToSpaceNd)      \
+    X(Comparison) \
+    X(Concat) \
+    X(Constant) \
+    X(ConvertBf16ToFp32) \
+    X(ConvertFp16ToFp32) \
+    X(ConvertFp32ToBf16) \
+    X(ConvertFp32ToFp16) \
+    X(Convolution2d) \
+    X(Debug) \
+    X(DepthToSpace) \
+    X(DepthwiseConvolution2d) \
+    X(Dequantize) \
+    X(DetectionPostProcess) \
+    X(Division) \
+    X(ElementwiseUnary) \
+    X(FakeQuantization) \
+    X(Fill) \
+    X(Floor) \
+    X(FullyConnected) \
+    X(Gather) \
+    X(Input) \
+    X(InstanceNormalization) \
+    X(L2Normalization) \
+    X(LogicalBinary) \
+    X(LogSoftmax) \
+    X(Lstm) \
+    X(QLstm) \
+    X(Map) \
+    X(Maximum) \
+    X(Mean) \
+    X(MemCopy) \
+    X(MemImport) \
+    X(Merge) \
+    X(Minimum) \
+    X(Multiplication) \
+    X(Normalization) \
+    X(Output) \
+    X(Pad) \
+    X(Permute) \
+    X(Pooling2d) \
+    X(PreCompiled) \
+    X(Prelu) \
+    X(Quantize) \
+    X(QuantizedLstm) \
+    X(Reshape) \
+    X(Rank) \
+    X(Resize) \
+    X(Reduce) \
+    X(Slice) \
+    X(Softmax) \
+    X(SpaceToBatchNd) \
+    X(SpaceToDepth) \
+    X(Splitter) \
+    X(Stack) \
+    X(StandIn) \
+    X(StridedSlice) \
+    X(Subtraction) \
+    X(Switch) \
+    X(Transpose) \
+    X(TransposeConvolution2d) \
+    X(Unmap) \
+    X(Cast) \
+    X(Shape) \
+    X(UnidirectionalSequenceLstm) \
+    X(ChannelShuffle) \
+    X(Convolution3d) \
+    X(Pooling3d) \
+
+// New layers should be added at last to minimize instability.
+
+/// When adding a new layer, adapt also the LastLayer enum value in the
+/// enum class LayerType below
+enum class LayerType
+{
+#define X(name) name,
+    LIST_OF_LAYER_TYPE
+#undef X
+    FirstLayer = Activation,
+    LastLayer = UnidirectionalSequenceLstm
+};
+
+const char* GetLayerTypeAsCString(LayerType type);
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/TypesUtils.hpp b/arch/arm/ARMnn/include/armnn/TypesUtils.hpp
new file mode 100644
index 0000000000..ccb0280457
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/TypesUtils.hpp
@@ -0,0 +1,349 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
+
+#include <cmath>
+#include <ostream>
+#include <set>
+
+namespace armnn
+{
+
+constexpr char const* GetStatusAsCString(Status status)
+{
+    switch (status)
+    {
+        case armnn::Status::Success: return "Status::Success";
+        case armnn::Status::Failure: return "Status::Failure";
+        default:                     return "Unknown";
+    }
+}
+
+constexpr char const* GetActivationFunctionAsCString(ActivationFunction activation)
+{
+    switch (activation)
+    {
+        case ActivationFunction::Sigmoid:       return "Sigmoid";
+        case ActivationFunction::TanH:          return "TanH";
+        case ActivationFunction::Linear:        return "Linear";
+        case ActivationFunction::ReLu:          return "ReLu";
+        case ActivationFunction::BoundedReLu:   return "BoundedReLu";
+        case ActivationFunction::SoftReLu:      return "SoftReLu";
+        case ActivationFunction::LeakyReLu:     return "LeakyReLu";
+        case ActivationFunction::Abs:           return "Abs";
+        case ActivationFunction::Sqrt:          return "Sqrt";
+        case ActivationFunction::Square:        return "Square";
+        case ActivationFunction::Elu:           return "Elu";
+        case ActivationFunction::HardSwish:     return "HardSwish";
+        default:                                return "Unknown";
+    }
+}
+
+constexpr char const* GetArgMinMaxFunctionAsCString(ArgMinMaxFunction function)
+{
+    switch (function)
+    {
+        case ArgMinMaxFunction::Max:    return "Max";
+        case ArgMinMaxFunction::Min:    return "Min";
+        default:                        return "Unknown";
+    }
+}
+
+constexpr char const* GetComparisonOperationAsCString(ComparisonOperation operation)
+{
+    switch (operation)
+    {
+        case ComparisonOperation::Equal:          return "Equal";
+        case ComparisonOperation::Greater:        return "Greater";
+        case ComparisonOperation::GreaterOrEqual: return "GreaterOrEqual";
+        case ComparisonOperation::Less:           return "Less";
+        case ComparisonOperation::LessOrEqual:    return "LessOrEqual";
+        case ComparisonOperation::NotEqual:       return "NotEqual";
+        default:                                  return "Unknown";
+    }
+}
+
+constexpr char const* GetUnaryOperationAsCString(UnaryOperation operation)
+{
+    switch (operation)
+    {
+        case UnaryOperation::Abs:        return "Abs";
+        case UnaryOperation::Exp:        return "Exp";
+        case UnaryOperation::Sqrt:       return "Sqrt";
+        case UnaryOperation::Rsqrt:      return "Rsqrt";
+        case UnaryOperation::Neg:        return "Neg";
+        case UnaryOperation::Log:        return "Log";
+        case UnaryOperation::LogicalNot: return "LogicalNot";
+        case UnaryOperation::Sin:        return "Sin";
+        default:                         return "Unknown";
+    }
+}
+
+constexpr char const* GetLogicalBinaryOperationAsCString(LogicalBinaryOperation operation)
+{
+    switch (operation)
+    {
+        case LogicalBinaryOperation::LogicalAnd: return "LogicalAnd";
+        case LogicalBinaryOperation::LogicalOr:  return "LogicalOr";
+        default:                                 return "Unknown";
+    }
+}
+
+constexpr char const* GetPoolingAlgorithmAsCString(PoolingAlgorithm pooling)
+{
+    switch (pooling)
+    {
+        case PoolingAlgorithm::Average:  return "Average";
+        case PoolingAlgorithm::Max:      return "Max";
+        case PoolingAlgorithm::L2:       return "L2";
+        default:                         return "Unknown";
+    }
+}
+
+constexpr char const* GetOutputShapeRoundingAsCString(OutputShapeRounding rounding)
+{
+    switch (rounding)
+    {
+        case OutputShapeRounding::Ceiling:  return "Ceiling";
+        case OutputShapeRounding::Floor:    return "Floor";
+        default:                            return "Unknown";
+    }
+}
+
+constexpr char const* GetPaddingMethodAsCString(PaddingMethod method)
+{
+    switch (method)
+    {
+        case PaddingMethod::Exclude:       return "Exclude";
+        case PaddingMethod::IgnoreValue:   return "IgnoreValue";
+        default:                           return "Unknown";
+    }
+}
+
+constexpr char const* GetPaddingModeAsCString(PaddingMode mode)
+{
+    switch (mode)
+    {
+        case PaddingMode::Constant:   return "Exclude";
+        case PaddingMode::Symmetric:  return "Symmetric";
+        case PaddingMode::Reflect:    return "Reflect";
+        default:                      return "Unknown";
+    }
+}
+
+constexpr char const* GetReduceOperationAsCString(ReduceOperation reduce_operation)
+{
+    switch (reduce_operation)
+    {
+        case ReduceOperation::Sum:  return "Sum";
+        case ReduceOperation::Max:  return "Max";
+        case ReduceOperation::Mean: return "Mean";
+        case ReduceOperation::Min:  return "Min";
+        case ReduceOperation::Prod: return "Prod";
+        default:                    return "Unknown";
+    }
+}
+constexpr unsigned int GetDataTypeSize(DataType dataType)
+{
+    switch (dataType)
+    {
+        case DataType::BFloat16:
+        case DataType::Float16:               return 2U;
+        case DataType::Float32:
+        case DataType::Signed32:              return 4U;
+        case DataType::Signed64:              return 8U;
+        case DataType::QAsymmU8:              return 1U;
+        case DataType::QAsymmS8:              return 1U;
+        case DataType::QSymmS8:               return 1U;
+        case DataType::QSymmS16:              return 2U;
+        case DataType::Boolean:               return 1U;
+        default:                              return 0U;
+    }
+}
+
+template <unsigned N>
+constexpr bool StrEqual(const char* strA, const char (&strB)[N])
+{
+    bool isEqual = true;
+    for (unsigned i = 0; isEqual && (i < N); ++i)
+    {
+        isEqual = (strA[i] == strB[i]);
+    }
+    return isEqual;
+}
+
+/// Deprecated function that will be removed together with
+/// the Compute enum
+constexpr armnn::Compute ParseComputeDevice(const char* str)
+{
+    if (armnn::StrEqual(str, "CpuAcc"))
+    {
+        return armnn::Compute::CpuAcc;
+    }
+    else if (armnn::StrEqual(str, "CpuRef"))
+    {
+        return armnn::Compute::CpuRef;
+    }
+    else if (armnn::StrEqual(str, "GpuAcc"))
+    {
+        return armnn::Compute::GpuAcc;
+    }
+    else
+    {
+        return armnn::Compute::Undefined;
+    }
+}
+
+constexpr const char* GetDataTypeName(DataType dataType)
+{
+    switch (dataType)
+    {
+        case DataType::Float16:               return "Float16";
+        case DataType::Float32:               return "Float32";
+        case DataType::Signed64:              return "Signed64";
+        case DataType::QAsymmU8:              return "QAsymmU8";
+        case DataType::QAsymmS8:              return "QAsymmS8";
+        case DataType::QSymmS8:               return "QSymmS8";
+        case DataType::QSymmS16:              return "QSymm16";
+        case DataType::Signed32:              return "Signed32";
+        case DataType::Boolean:               return "Boolean";
+        case DataType::BFloat16:              return "BFloat16";
+
+        default:
+            return "Unknown";
+    }
+}
+
+constexpr const char* GetDataLayoutName(DataLayout dataLayout)
+{
+    switch (dataLayout)
+    {
+        case DataLayout::NCHW:  return "NCHW";
+        case DataLayout::NHWC:  return "NHWC";
+        case DataLayout::NDHWC: return "NDHWC";
+        case DataLayout::NCDHW: return "NCDHW";
+        default:                return "Unknown";
+    }
+}
+
+constexpr const char* GetNormalizationAlgorithmChannelAsCString(NormalizationAlgorithmChannel channel)
+{
+    switch (channel)
+    {
+        case NormalizationAlgorithmChannel::Across: return "Across";
+        case NormalizationAlgorithmChannel::Within: return "Within";
+        default:                                    return "Unknown";
+    }
+}
+
+constexpr const char* GetNormalizationAlgorithmMethodAsCString(NormalizationAlgorithmMethod method)
+{
+    switch (method)
+    {
+        case NormalizationAlgorithmMethod::LocalBrightness: return "LocalBrightness";
+        case NormalizationAlgorithmMethod::LocalContrast:   return "LocalContrast";
+        default:                                            return "Unknown";
+    }
+}
+
+constexpr const char* GetResizeMethodAsCString(ResizeMethod method)
+{
+    switch (method)
+    {
+        case ResizeMethod::Bilinear:        return "Bilinear";
+        case ResizeMethod::NearestNeighbor: return "NearestNeighbour";
+        default:                            return "Unknown";
+    }
+}
+
+constexpr const char* GetMemBlockStrategyTypeName(MemBlockStrategyType memBlockStrategyType)
+{
+    switch (memBlockStrategyType)
+    {
+        case MemBlockStrategyType::SingleAxisPacking: return "SingleAxisPacking";
+        case MemBlockStrategyType::MultiAxisPacking:  return "MultiAxisPacking";
+        default:                                      return "Unknown";
+    }
+}
+
+template<typename T>
+struct IsHalfType
+    : std::integral_constant<bool, std::is_floating_point<T>::value && sizeof(T) == 2>
+{};
+
+template<typename T>
+constexpr bool IsQuantizedType()
+{
+    return std::is_integral<T>::value;
+}
+
+constexpr bool IsQuantized8BitType(DataType dataType)
+{
+    return dataType == DataType::QAsymmU8        ||
+           dataType == DataType::QAsymmS8        ||
+           dataType == DataType::QSymmS8;
+}
+
+constexpr bool IsQuantizedType(DataType dataType)
+{
+    return dataType == DataType::QSymmS16 || IsQuantized8BitType(dataType);
+}
+
+inline std::ostream& operator<<(std::ostream& os, Status stat)
+{
+    os << GetStatusAsCString(stat);
+    return os;
+}
+
+
+inline std::ostream & operator<<(std::ostream & os, const armnn::TensorShape & shape)
+{
+    os << "[";
+    for (uint32_t i=0; i<shape.GetNumDimensions(); ++i)
+    {
+        if (i!=0)
+        {
+            os << ",";
+        }
+        os << shape[i];
+    }
+    os << "]";
+    return os;
+}
+
+/// Quantize a floating point data type into an 8-bit data type.
+/// @param value - The value to quantize.
+/// @param scale - The scale (must be non-zero).
+/// @param offset - The offset.
+/// @return - The quantized value calculated as round(value/scale)+offset.
+///
+template<typename QuantizedType>
+QuantizedType Quantize(float value, float scale, int32_t offset);
+
+/// Dequantize an 8-bit data type into a floating point data type.
+/// @param value - The value to dequantize.
+/// @param scale - The scale (must be non-zero).
+/// @param offset - The offset.
+/// @return - The dequantized value calculated as (value-offset)*scale.
+///
+template <typename QuantizedType>
+float Dequantize(QuantizedType value, float scale, int32_t offset);
+
+inline void VerifyTensorInfoDataType(const armnn::TensorInfo & info, armnn::DataType dataType)
+{
+    if (info.GetDataType() != dataType)
+    {
+        std::stringstream ss;
+        ss << "Unexpected datatype:" << armnn::GetDataTypeName(info.GetDataType())
+           << " for tensor:" << info.GetShape()
+           << ". The type expected to be: " << armnn::GetDataTypeName(dataType);
+        throw armnn::Exception(ss.str());
+    }
+}
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/Utils.hpp b/arch/arm/ARMnn/include/armnn/Utils.hpp
new file mode 100644
index 0000000000..7d442ba4b5
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Utils.hpp
@@ -0,0 +1,44 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/TypesUtils.hpp>
+
+#include <iostream>
+
+namespace armnn
+{
+
+enum class LogSeverity
+{
+    Trace,
+    Debug,
+    Info,
+    Warning,
+    Error,
+    Fatal
+};
+
+/// Configures the logging behaviour of the ARMNN library.
+///     printToStandardOutput: Set to true if log messages should be printed to the standard output.
+///     printToDebugOutput: Set to true if log messages be printed to a platform-specific debug output
+///       (where supported).
+///     severity: All log messages that are at this severity level or higher will be printed, others will be ignored.
+void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity);
+
+
+#if defined(__clang__) &&((__clang_major__>=3)||(__clang_major__==3 && __clang_minor__ >= 5))
+#   define ARMNN_FALLTHROUGH [[clang::fallthrough]]
+#elif defined(__GNUC__) && (__GNUC__ >= 7)
+#   define ARMNN_FALLTHROUGH __attribute__((fallthrough))
+#else
+#   define ARMNN_FALLTHROUGH ((void)0)
+#endif
+
+bool NeonDetected();
+
+const std::string GetVersion();
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/Version.hpp b/arch/arm/ARMnn/include/armnn/Version.hpp
new file mode 100644
index 0000000000..7e172246e3
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/Version.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+/// Macro utils
+#define STRINGIFY_VALUE(s) STRINGIFY_MACRO(s)
+#define STRINGIFY_MACRO(s) #s
+
+// ArmNN version components
+#define ARMNN_MAJOR_VERSION 28
+#define ARMNN_MINOR_VERSION 0
+#define ARMNN_PATCH_VERSION 0
+
+/// ARMNN_VERSION: "X.Y.Z"
+/// where:
+///   X = Major version number
+///   Y = Minor version number
+///   Z = Patch version number
+#define ARMNN_VERSION STRINGIFY_VALUE(ARMNN_MAJOR_VERSION) "." \
+                      STRINGIFY_VALUE(ARMNN_MINOR_VERSION) "." \
+                      STRINGIFY_VALUE(ARMNN_PATCH_VERSION)
diff --git a/arch/arm/ARMnn/include/armnn/backends/CMakeLists.txt b/arch/arm/ARMnn/include/armnn/backends/CMakeLists.txt
new file mode 100644
index 0000000000..19046ed977
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright © 2019 Arm Ltd. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+list(APPEND armnnBackendsAPI_sources
+     CpuTensorHandleFwd.hpp
+     TensorHandleFwd.hpp
+     DynamicBackend.hpp
+     IBackendInternal.hpp
+     IBackendContext.hpp
+     IMemoryManager.hpp
+     IMemoryOptimizerStrategy.hpp
+     ITensorHandle.hpp
+     ITensorHandleFactory.hpp
+     IWorkload.hpp
+     OptimizationViews.hpp
+     WorkloadInfo.hpp
+     profiling/IBackendProfiling.hpp
+     profiling/IBackendProfilingContext.hpp
+)
+
+add_library(armnnBackendsAPI OBJECT ${armnnBackendsAPI_sources})
diff --git a/arch/arm/ARMnn/include/armnn/backends/CpuTensorHandleFwd.hpp b/arch/arm/ARMnn/include/armnn/backends/CpuTensorHandleFwd.hpp
new file mode 100644
index 0000000000..a5a28d8135
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/CpuTensorHandleFwd.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+// This file is deprecated and will be removed soon.
+// Please use the new file include/armnn/TensorHandleFwd.hpp instead.
+
+#pragma once
+
+namespace armnn
+{
+
+class ConstCpuTensorHandle;
+class CpuTensorHandle;
+class ScopedCpuTensorHandle;
+class PassthroughCpuTensorHandle;
+class ConstPassthroughCpuTensorHandle;
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/DynamicBackend.hpp b/arch/arm/ARMnn/include/armnn/backends/DynamicBackend.hpp
new file mode 100644
index 0000000000..f888b1e27e
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/DynamicBackend.hpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IBackendInternal.hpp"
+
+#include <armnn/BackendRegistry.hpp>
+
+#include <functional>
+#include <memory>
+
+namespace armnn
+{
+
+class DynamicBackend final
+{
+public:
+    using HandleCloser = std::function<void(const void*)>;
+    using HandlePtr = std::unique_ptr<void, HandleCloser>;
+
+    explicit DynamicBackend(const void* sharedObjectHandle);
+
+    /// Public dynamic backend functions
+    BackendId GetBackendId();
+    BackendVersion GetBackendVersion();
+    IBackendInternalUniquePtr GetBackend();
+    BackendRegistry::FactoryFunction GetFactoryFunction();
+
+private:
+    /// Private utility functions
+    template<typename BackendFunctionType>
+    BackendFunctionType SetFunctionPointer(const std::string& backendFunctionName);
+    IBackendInternalUniquePtr CreateBackend();
+
+    /// Backend function pointer types
+    using IdFunctionType      = const char*(*)();
+    using VersionFunctionType = void(*)(uint32_t*, uint32_t*);
+    using FactoryFunctionType = void*(*)();
+
+    /// Backend function pointers
+    IdFunctionType      m_BackendIdFunction;
+    VersionFunctionType m_BackendVersionFunction;
+    FactoryFunctionType m_BackendFactoryFunction;
+
+    /// Shared object handle
+    HandlePtr m_Handle;
+};
+
+using DynamicBackendPtr = std::unique_ptr<DynamicBackend>;
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/IBackendContext.hpp b/arch/arm/ARMnn/include/armnn/backends/IBackendContext.hpp
new file mode 100644
index 0000000000..6fca42d2ed
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/IBackendContext.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/BackendOptions.hpp>
+#include <armnn/IRuntime.hpp>
+#include <memory>
+
+namespace armnn
+{
+
+class IBackendContext
+{
+protected:
+    IBackendContext(const IRuntime::CreationOptions&) {}
+
+public:
+    /// Before and after Load network events
+    virtual bool BeforeLoadNetwork(NetworkId networkId) = 0;
+    virtual bool AfterLoadNetwork(NetworkId networkId) = 0;
+
+    /// Before and after Unload network events
+    virtual bool BeforeUnloadNetwork(NetworkId networkId) = 0;
+    virtual bool AfterUnloadNetwork(NetworkId networkId) = 0;
+
+    // After Enqueue workload events
+    virtual bool AfterEnqueueWorkload(NetworkId networkId) = 0;
+
+    virtual ~IBackendContext() {}
+};
+
+using IBackendContextUniquePtr = std::unique_ptr<IBackendContext>;
+
+class IBackendModelContext
+{
+public:
+    virtual ~IBackendModelContext() {}
+};
+
+} // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/backends/IBackendInternal.hpp b/arch/arm/ARMnn/include/armnn/backends/IBackendInternal.hpp
new file mode 100644
index 0000000000..c64150a484
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/IBackendInternal.hpp
@@ -0,0 +1,215 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/Deprecated.hpp>
+
+#include <ISubgraphViewConverter.hpp>
+
+#include <armnn/backends/IBackendContext.hpp>
+#include <armnn/backends/IMemoryManager.hpp>
+#include <armnn/backends/ITensorHandleFactory.hpp>
+#include <armnn/backends/OptimizationViews.hpp>
+#include <armnn/backends/SubgraphView.hpp>
+#include <armnn/backends/profiling/IBackendProfiling.hpp>
+#include <armnn/backends/profiling/IBackendProfilingContext.hpp>
+
+#include <vector>
+#include <memory>
+
+namespace armnn
+{
+class IWorkloadFactory;
+class IMemoryManager;
+class ILayerSupport;
+
+struct BackendVersion
+{
+    uint32_t m_Major;
+    uint32_t m_Minor;
+
+    constexpr BackendVersion()
+        : m_Major(0)
+        , m_Minor(0)
+    {}
+    constexpr BackendVersion(uint32_t major, uint32_t minor)
+        : m_Major(major)
+        , m_Minor(minor)
+    {}
+
+    bool operator==(const BackendVersion& other) const
+    {
+        return this == &other ||
+               (this->m_Major == other.m_Major &&
+                this->m_Minor == other.m_Minor);
+    }
+
+    bool operator<=(const BackendVersion& other) const
+    {
+        return this->m_Major < other.m_Major ||
+               (this->m_Major == other.m_Major &&
+                this->m_Minor <= other.m_Minor);
+    }
+
+    bool operator>=(const BackendVersion& other) const
+    {
+        return this->m_Major > other.m_Major ||
+               (this->m_Major == other.m_Major &&
+                this->m_Minor >= other.m_Minor);
+    }
+};
+
+inline std::ostream& operator<<(std::ostream& os, const BackendVersion& backendVersion)
+{
+    os << "[" << backendVersion.m_Major << "." << backendVersion.m_Minor << "]";
+
+    return os;
+}
+
+class IBackendInternal : public IBackend
+{
+protected:
+    /// Creation must be done through a specific
+    /// backend interface.
+    IBackendInternal() = default;
+
+public:
+    /// Allow backends created by the factory function
+    /// to be destroyed through IBackendInternal.
+    ~IBackendInternal() override = default;
+
+    using IWorkloadFactoryPtr = std::unique_ptr<IWorkloadFactory>;
+    using IBackendContextPtr = std::unique_ptr<IBackendContext>;
+    /// This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
+    using IBackendProfilingContextPtr = std::shared_ptr<armnn::profiling::IBackendProfilingContext>;
+    using IBackendProfilingPtr = std::unique_ptr<armnn::profiling::IBackendProfiling>;
+    using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>;
+
+    using IBackendSpecificModelContextPtr = std::shared_ptr<IBackendModelContext>;
+
+    using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>;
+    using IMemoryManagerSharedPtr = std::shared_ptr<IMemoryManager>;
+
+    virtual IMemoryManagerUniquePtr CreateMemoryManager() const;
+
+    virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+        const IMemoryManagerSharedPtr& memoryManager = nullptr) const = 0;
+
+    virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+        class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const;
+
+    virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+        const IMemoryManagerSharedPtr& memoryManager,
+        const ModelOptions& modelOptions) const;
+
+    virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+        class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+        const ModelOptions& modelOptions) const;
+
+    virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+        class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+        const ModelOptions& modelOptions,
+        MemorySourceFlags inputFlags,
+        MemorySourceFlags outputFlags) const;
+
+    /// Create the runtime context of the backend
+    ///
+    /// Implementations may return a default-constructed IBackendContextPtr if
+    /// no context is needed at runtime.
+    /// Implementations must throw BackendUnavailableException if the backend
+    /// cannot be used (for example, necessary accelerator hardware is not present).
+    /// The default implementation always returns a default-constructed pointer.
+    virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const;
+
+    virtual IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions& modelOptions) const;
+
+    /// Create context specifically used for profiling interaction from backends.
+    virtual IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions& creationOptions,
+                                                                      IBackendProfilingPtr& backendProfiling);
+
+    virtual ILayerSupportSharedPtr GetLayerSupport() const = 0;
+
+    virtual ILayerSupportSharedPtr GetLayerSupport(const ModelOptions& modelOptions) const;
+
+    virtual OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph) const;
+
+    virtual OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph,
+                                                   const ModelOptions& modelOptions) const;
+
+    bool SupportsTensorAllocatorAPI() const;
+
+    ITensorHandleFactory::FactoryId GetBackwardCompatibleFavoriteHandleFactory();
+
+    /// (Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
+    virtual std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const;
+
+    /// (Optional) Register TensorHandleFactories
+    /// Either this method or CreateMemoryManager() and
+    /// IWorkloadFactory::CreateTensor()/IWorkloadFactory::CreateSubtensor() methods must be implemented.
+    virtual void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& /*registry*/) {}
+
+    /// (Optional) Register TensorHandleFactories
+    /// Either this method or CreateMemoryManager() and
+    /// IWorkloadFactory::CreateTensor()/IWorkloadFactory::CreateSubtensor() methods must be implemented.
+    virtual void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry,
+                                               MemorySourceFlags inputFlags,
+                                               MemorySourceFlags outputFlags);
+
+    /// Returns the version of the Backend API
+    static constexpr BackendVersion GetApiVersion() { return BackendVersion(1, 0); }
+
+    /// Returns a BackendCapability if the backend lists the capability
+    /// The BackendCapability must then be inspected to check whether or not that BackendCapability is supported
+    /// Otherwise returns an EmptyOptional if the BackendCapability is unlisted
+    virtual BackendCapabilities GetCapabilities() const
+    {
+        return BackendCapabilities("IBackendInternal NullCapabilities");
+    };
+
+    /// Returns true if backend support the capability false otherwise
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated in favour of GetCapability", "22.05")
+    virtual bool HasCapability(BackendCapability /*capabilityClass*/) const { return false; }
+
+    /// Signals the backend to use a custom memory allocator provided by the user
+    ///
+    /// \param allocator - a pointer to the provided ICustomAllocator to use with this backend
+    /// \param errMsg - Optional string variable to return error messages
+    /// \return - Returns true if switching to custom allocator was successful
+    virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
+                                          armnn::Optional<std::string&> errMsg)
+    {
+        IgnoreUnused(allocator);
+        if (errMsg)
+        {
+            std::stringstream message;
+            message << "The backend " << GetId() << " doesn't support using a custom allocator. This error might"
+                                                    " be related with the protected mode if the backend doesn't"
+                                                    " fully support it.";
+
+            errMsg.value() = message.str();
+        }
+        return false;
+    }
+
+    /// Returns the default memory allocator for the backend
+    ///
+    /// \return - Returns unique pointer to the Default Allocator of the Backend
+    virtual std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const
+    {
+        throw armnn::Exception("GetDefaultAllocator: Function has not been implemented in backend.");
+    }
+
+    /// Returns the number of files cached if backend supports caching
+    ///
+    /// \return - Returns 0 if backend does not support caching otherwise number of files cached
+    virtual unsigned int GetNumberOfCacheFiles() const { return 0; }
+};
+
+using IBackendInternalUniquePtr = std::unique_ptr<IBackendInternal>;
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/ICustomAllocator.hpp b/arch/arm/ARMnn/include/armnn/backends/ICustomAllocator.hpp
new file mode 100644
index 0000000000..59ad27c6f4
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/ICustomAllocator.hpp
@@ -0,0 +1,56 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <armnn/MemorySources.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+
+namespace armnn
+{
+/** Custom Allocator interface */
+class ICustomAllocator
+{
+public:
+    /** Default virtual destructor. */
+    virtual ~ICustomAllocator() = default;
+
+    /** Interface to be implemented by the child class to allocate bytes
+     *
+     * @param[in] size      Size to allocate
+     * @param[in] alignment Alignment that the returned pointer should comply with
+     *
+     * @return A pointer to the allocated memory
+     * The returned pointer must be host write accessible
+     */
+    virtual void* allocate(size_t size, size_t alignment) = 0;
+
+    /** Interface to be implemented by the child class to free the allocated bytes */
+    virtual void free(void* ptr) = 0;
+
+    /**  Used to specify what type of memory is being allocated by this allocator.
+     *  Supported types are:
+     *       MemorySource::Malloc
+     *       MemorySource::DmaBuf
+     *       MemorySource::DmaBufProtected
+     */
+    virtual armnn::MemorySource GetMemorySourceType() = 0;
+
+    /** Interface that may be implemented to allow retrieval of Memory Region
+     *  from allocated buffer at a certain offset
+     */
+    virtual void* GetMemoryRegionAtOffset(void* buffer, size_t offset, size_t alignment = 0)
+    {
+        IgnoreUnused(offset);
+        IgnoreUnused(alignment);
+        IgnoreUnused(buffer);
+        throw armnn::Exception(
+                "ICustomerAllocator::GetMemoryRegionAtOffset(): This function should be overridden in subclass.");
+    }
+
+};
+} // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/backends/ILayerSupport.hpp b/arch/arm/ARMnn/include/armnn/backends/ILayerSupport.hpp
new file mode 100644
index 0000000000..bfdede686f
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/ILayerSupport.hpp
@@ -0,0 +1,574 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Deprecated.hpp>
+#include <armnn/Descriptors.hpp>
+#include <armnn/LstmParams.hpp>
+#include <armnn/Optional.hpp>
+#include <armnn/QuantizedLstmParams.hpp>
+
+#include <cctype>
+#include <functional>
+#include <memory>
+#include <vector>
+
+namespace armnn
+{
+
+class TensorInfo;
+
+class ILayerSupport
+{
+protected:
+    ILayerSupport() {}
+    virtual ~ILayerSupport() {}
+
+public:
+    virtual bool IsLayerSupported(const LayerType& type,
+                                  const std::vector<TensorInfo>& infos,
+                                  const BaseDescriptor& descriptor,
+                                  const Optional<LstmInputParamsInfo>& lstmParamsInfo = EmptyOptional(),
+                                  const Optional<QuantizedLstmInputParamsInfo>& quantizedLstmParamsInfo =
+                                      EmptyOptional(),
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsActivationSupported(const TensorInfo& input,
+                                       const TensorInfo& output,
+                                       const ActivationDescriptor& descriptor,
+                                       Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsAdditionSupported(const TensorInfo& input0,
+                                     const TensorInfo& input1,
+                                     const TensorInfo& output,
+                                     Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsArgMinMaxSupported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      const ArgMinMaxDescriptor& descriptor,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsBatchNormalizationSupported(const TensorInfo& input,
+                                               const TensorInfo& output,
+                                               const TensorInfo& mean,
+                                               const TensorInfo& var,
+                                               const TensorInfo& beta,
+                                               const TensorInfo& gamma,
+                                               const BatchNormalizationDescriptor& descriptor,
+                                               Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsBatchToSpaceNdSupported(const TensorInfo& input,
+                                           const TensorInfo& output,
+                                           const BatchToSpaceNdDescriptor& descriptor,
+                                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsCastSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsChannelShuffleSupported(const TensorInfo& input,
+                                           const TensorInfo& output,
+                                           const ChannelShuffleDescriptor& descriptor,
+                                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsComparisonSupported(const TensorInfo& input0,
+                                       const TensorInfo& input1,
+                                       const TensorInfo& output,
+                                       const ComparisonDescriptor& descriptor,
+                                       Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsConcatSupported(const std::vector<const TensorInfo*> inputs,
+                                   const TensorInfo& output,
+                                   const OriginsDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsConstantSupported(const TensorInfo& output,
+                                     Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsConvertBf16ToFp32Supported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsConvertFp32ToBf16Supported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsConvertFp16ToFp32Supported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsConvertFp32ToFp16Supported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsConvolution2dSupported(const TensorInfo& input,
+                                          const TensorInfo& output,
+                                          const Convolution2dDescriptor& descriptor,
+                                          const TensorInfo& weights,
+                                          const Optional<TensorInfo>& biases,
+                                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsConvolution3dSupported(const TensorInfo& input,
+                                          const TensorInfo& output,
+                                          const Convolution3dDescriptor& descriptor,
+                                          const TensorInfo& weights,
+                                          const Optional<TensorInfo>& biases,
+                                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsDebugSupported(const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsDepthToSpaceSupported(const TensorInfo& input,
+                                         const TensorInfo& output,
+                                         const DepthToSpaceDescriptor& descriptor,
+                                         Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsDepthwiseConvolutionSupported(
+                     const TensorInfo& input,
+                     const TensorInfo& output,
+                     const DepthwiseConvolution2dDescriptor& descriptor,
+                     const TensorInfo& weights,
+                     const Optional<TensorInfo>& biases,
+                     Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsDequantizeSupported(const TensorInfo& input,
+                                       const TensorInfo& output,
+                                       Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
+                                                 const TensorInfo& scores,
+                                                 const TensorInfo& anchors,
+                                                 const TensorInfo& detectionBoxes,
+                                                 const TensorInfo& detectionClasses,
+                                                 const TensorInfo& detectionScores,
+                                                 const TensorInfo& numDetections,
+                                                 const DetectionPostProcessDescriptor& descriptor,
+                                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const =0;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsDilatedDepthwiseConvolutionSupported(
+                    const TensorInfo& input,
+                    const TensorInfo& output,
+                    const DepthwiseConvolution2dDescriptor& descriptor,
+                    const TensorInfo& weights,
+                    const Optional<TensorInfo>& biases,
+                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsDivisionSupported(const TensorInfo& input0,
+                                     const TensorInfo& input1,
+                                     const TensorInfo& output,
+                                     Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsElementwiseUnarySupported(const TensorInfo& input,
+                                             const TensorInfo& output,
+                                             const ElementwiseUnaryDescriptor& descriptor,
+                                             Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsFakeQuantizationSupported(const TensorInfo& input,
+                                             const FakeQuantizationDescriptor& descriptor,
+                                             Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsFillSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const FillDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsFloorSupported(const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsFullyConnectedSupported(const TensorInfo& input,
+                                           const TensorInfo& output,
+                                           const TensorInfo& weights,
+                                           const TensorInfo& biases,
+                                           const FullyConnectedDescriptor& descriptor,
+                                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsGatherSupported(const TensorInfo& input0,
+                                   const TensorInfo& input1,
+                                   const TensorInfo& output,
+                                   const GatherDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsInputSupported(const TensorInfo& input,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsInstanceNormalizationSupported(
+        const TensorInfo& input,
+        const TensorInfo& output,
+        const InstanceNormalizationDescriptor& descriptor,
+        Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsL2NormalizationSupported(const TensorInfo& input,
+                                            const TensorInfo& output,
+                                            const L2NormalizationDescriptor& descriptor,
+                                            Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsLogicalBinarySupported(const TensorInfo& input0,
+                                          const TensorInfo& input1,
+                                          const TensorInfo& output,
+                                          const LogicalBinaryDescriptor& descriptor,
+                                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsLogicalUnarySupported(const TensorInfo& input,
+                                         const TensorInfo& output,
+                                         const ElementwiseUnaryDescriptor& descriptor,
+                                         Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsLogSoftmaxSupported(const TensorInfo& input,
+                                       const TensorInfo& output,
+                                       const LogSoftmaxDescriptor& descriptor,
+                                       Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsLstmSupported(const TensorInfo& input,
+                                 const TensorInfo& outputStateIn,
+                                 const TensorInfo& cellStateIn,
+                                 const TensorInfo& scratchBuffer,
+                                 const TensorInfo& outputStateOut,
+                                 const TensorInfo& cellStateOut,
+                                 const TensorInfo& output,
+                                 const LstmDescriptor& descriptor,
+                                 const LstmInputParamsInfo& paramsInfo,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsMaximumSupported(const TensorInfo& input0,
+                                    const TensorInfo& input1,
+                                    const TensorInfo& output,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsMeanSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const MeanDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsMemCopySupported(const TensorInfo& input,
+                                    const TensorInfo& output,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsMemImportSupported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsMergeSupported(const TensorInfo& input0,
+                                  const TensorInfo& input1,
+                                  const TensorInfo& output,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsMinimumSupported(const TensorInfo& input0,
+                                    const TensorInfo& input1,
+                                    const TensorInfo& ouput,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsMultiplicationSupported(const TensorInfo& input0,
+                                           const TensorInfo& input1,
+                                           const TensorInfo& output,
+                                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsNormalizationSupported(const TensorInfo& input,
+                                          const TensorInfo& output,
+                                          const NormalizationDescriptor& descriptor,
+                                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsOutputSupported(const TensorInfo& output,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsPadSupported(const TensorInfo& input,
+                                const TensorInfo& output,
+                                const PadDescriptor& descriptor,
+                                Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsPermuteSupported(const TensorInfo& input,
+                                    const TensorInfo& output,
+                                    const PermuteDescriptor& descriptor,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsPooling2dSupported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      const Pooling2dDescriptor& descriptor,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsPooling3dSupported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      const Pooling3dDescriptor& descriptor,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsPreCompiledSupported(const TensorInfo& input,
+                                        const PreCompiledDescriptor& descriptor,
+                                        Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsPreluSupported(const TensorInfo& input,
+                                  const TensorInfo& alpha,
+                                  const TensorInfo& output,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsQuantizeSupported(const TensorInfo& input,
+                                     const TensorInfo& output,
+                                     Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsQLstmSupported(const TensorInfo& input,
+                                  const TensorInfo& previousOutputIn,
+                                  const TensorInfo& previousCellStateIn,
+                                  const TensorInfo& outputStateOut,
+                                  const TensorInfo& cellStateOut,
+                                  const TensorInfo& output,
+                                  const QLstmDescriptor& descriptor,
+                                  const LstmInputParamsInfo& paramsInfo,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsQuantizedLstmSupported(const TensorInfo& input,
+                                          const TensorInfo& previousCellStateIn,
+                                          const TensorInfo& previousOutputIn,
+                                          const TensorInfo& cellStateOut,
+                                          const TensorInfo& output,
+                                          const QuantizedLstmInputParamsInfo& paramsInfo,
+                                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsRankSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsReduceSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const ReduceDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsReshapeSupported(const TensorInfo& input,
+                                    const TensorInfo& output,
+                                    const ReshapeDescriptor& descriptor,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsResizeSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const ResizeDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsShapeSupported(const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsSliceSupported(const TensorInfo& input,
+                                  const TensorInfo& output,
+                                  const SliceDescriptor& descriptor,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsSoftmaxSupported(const TensorInfo& input,
+                                    const TensorInfo& output,
+                                    const SoftmaxDescriptor& descriptor,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsSpaceToBatchNdSupported(const TensorInfo& input,
+                                           const TensorInfo& output,
+                                           const SpaceToBatchNdDescriptor& descriptor,
+                                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsSpaceToDepthSupported(const TensorInfo& input,
+                                         const TensorInfo& output,
+                                         const SpaceToDepthDescriptor& descriptor,
+                                         Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsSplitterSupported(const TensorInfo& input,
+                                     const std::vector<std::reference_wrapper<TensorInfo>>& outputs,
+                                     const ViewsDescriptor& descriptor,
+                                     Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsStackSupported(const std::vector<const TensorInfo*>& inputs,
+                                  const TensorInfo& output,
+                                  const StackDescriptor& descriptor,
+                                  Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsStandInSupported(const std::vector<const TensorInfo*>& inputs,
+                                    const std::vector<const TensorInfo*>& outputs,
+                                    const StandInDescriptor& descriptor,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsStridedSliceSupported(const TensorInfo& input,
+                                         const TensorInfo& output,
+                                         const StridedSliceDescriptor& descriptor,
+                                         Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsSubtractionSupported(const TensorInfo& input0,
+                                        const TensorInfo& input1,
+                                        const TensorInfo& output,
+                                        Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsSwitchSupported(const TensorInfo& input0,
+                                   const TensorInfo& input1,
+                                   const TensorInfo& output0,
+                                   const TensorInfo& output1,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsTransposeConvolution2dSupported(
+        const TensorInfo& input,
+        const TensorInfo& output,
+        const TransposeConvolution2dDescriptor& descriptor,
+        const TensorInfo& weights,
+        const Optional<TensorInfo>& biases,
+        Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsTransposeSupported(const TensorInfo& input,
+                                      const TensorInfo& output,
+                                      const TransposeDescriptor& descriptor,
+                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This method is deprecated. "
+                                      "Use ABI Stable IsLayerSupported accepting LayerType argument instead.", "22.11")
+    virtual bool IsUnidirectionalSequenceLstmSupported(
+        const TensorInfo& input,
+        const TensorInfo& outputStateIn,
+        const TensorInfo& cellStateIn,
+        const TensorInfo& output,
+        const Optional<TensorInfo>& hiddenStateOutput,
+        const Optional<TensorInfo>& cellStateOutput,
+        const LstmDescriptor& descriptor,
+        const LstmInputParamsInfo& paramsInfo,
+        Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
+}; // class ILayerSupport
+
+using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>;
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/IMemoryManager.hpp b/arch/arm/ARMnn/include/armnn/backends/IMemoryManager.hpp
new file mode 100644
index 0000000000..28b81e79ef
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/IMemoryManager.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <memory>
+
+namespace armnn
+{
+
+class IMemoryManager
+{
+protected:
+    IMemoryManager() {}
+
+public:
+    virtual void Acquire() = 0;
+    virtual void Release() = 0;
+
+    virtual ~IMemoryManager() {}
+};
+
+using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>;
+
+} // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/backends/IMemoryOptimizerStrategy.hpp b/arch/arm/ARMnn/include/armnn/backends/IMemoryOptimizerStrategy.hpp
new file mode 100644
index 0000000000..bdb2f5bd30
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/IMemoryOptimizerStrategy.hpp
@@ -0,0 +1,57 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+// A MemBlock represents a memory usage requirement in time and space and can be seen as essentially a rectangle
+struct MemBlock
+{
+    MemBlock(const unsigned int startOfLife,
+             const unsigned int endOfLife,
+             const size_t memSize,
+             size_t offset,
+             const unsigned int index)
+    : m_StartOfLife(startOfLife), m_EndOfLife(endOfLife), m_MemSize(memSize), m_Offset(offset), m_Index(index) {}
+
+    const unsigned int m_StartOfLife; // Y start inclusive
+    const unsigned int m_EndOfLife; // Y end inclusive
+
+    const size_t m_MemSize; // Offset + Memsize = X end
+    size_t m_Offset; // X start
+
+    const unsigned int m_Index; // Index to keep order
+};
+
+// A MemBin represents a single contiguous area of memory that can store 1-n number of MemBlocks
+struct MemBin
+{
+    std::vector<MemBlock> m_MemBlocks;
+    size_t m_MemSize;
+};
+
+// IMemoryOptimizerStrategy will set m_Offset of the MemBlocks,
+// sort them into 1-n bins, then pair each bin of MemBlocks with an int specifying it's total size
+// A IMemoryOptimizerStrategy must ensure that
+// 1: All MemBlocks have been assigned to a MemBin
+// 2: No MemBlock is assigned to multiple MemBins
+// 3: No two Memblocks in a MemBin overlap in both the X and Y axis
+//    (a strategy cannot change the y axis or length of a MemBlock)
+class IMemoryOptimizerStrategy
+{
+public:
+    virtual ~IMemoryOptimizerStrategy() {}
+
+    virtual std::string GetName() const = 0;
+
+    virtual MemBlockStrategyType GetMemBlockStrategyType() const = 0;
+
+    virtual std::vector<MemBin> Optimize(std::vector<MemBlock>& memBlocks) = 0;
+};
+
+} // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/backends/ITensorHandle.hpp b/arch/arm/ARMnn/include/armnn/backends/ITensorHandle.hpp
new file mode 100644
index 0000000000..78404d0e39
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/ITensorHandle.hpp
@@ -0,0 +1,96 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/MemorySources.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+
+namespace armnn
+{
+
+class TensorShape;
+
+class ITensorHandle
+{
+public:
+    virtual ~ITensorHandle(){}
+
+    /// Indicate to the memory manager that this resource is active.
+    /// This is used to compute overlapping lifetimes of resources.
+    virtual void Manage() = 0;
+
+    /// Indicate to the memory manager that this resource is no longer active.
+    /// This is used to compute overlapping lifetimes of resources.
+    virtual void Allocate() = 0;
+
+    /// Get the parent tensor if this is a subtensor.
+    /// \return a pointer to the parent tensor. Otherwise nullptr if not a subtensor.
+    virtual ITensorHandle* GetParent() const = 0;
+
+    /// Map the tensor data for access.
+    /// \param blocking hint to block the calling thread until all other accesses are complete. (backend dependent)
+    /// \return pointer to the first element of the mapped data.
+    virtual const void* Map(bool blocking=true) const = 0;
+
+    /// Unmap the tensor data
+    virtual void Unmap() const = 0;
+
+    /// Map the tensor data for access. Must be paired with call to Unmap().
+    /// \param blocking hint to block the calling thread until all other accesses are complete. (backend dependent)
+    /// \return pointer to the first element of the mapped data.
+    void* Map(bool blocking=true)
+    {
+        return const_cast<void*>(static_cast<const ITensorHandle*>(this)->Map(blocking));
+    }
+
+    /// Unmap the tensor data that was previously mapped with call to Map().
+    void Unmap()
+    {
+        return static_cast<const ITensorHandle*>(this)->Unmap();
+    }
+
+    /// Get the strides for each dimension ordered from largest to smallest where
+    /// the smallest value is the same as the size of a single element in the tensor.
+    /// \return a TensorShape filled with the strides for each dimension
+    virtual TensorShape GetStrides() const = 0;
+
+    /// Get the number of elements for each dimension ordered from slowest iterating dimension
+    /// to fastest iterating dimension.
+    /// \return a TensorShape filled with the number of elements for each dimension.
+    virtual TensorShape GetShape() const = 0;
+
+    /// Testing support to be able to verify and set tensor data content
+    virtual void CopyOutTo(void* memory) const = 0;
+    virtual void CopyInFrom(const void* memory) = 0;
+
+    /// Get flags describing supported import sources.
+    virtual unsigned int GetImportFlags() const { return 0; }
+
+    /// Import externally allocated memory
+    /// \param memory base address of the memory being imported.
+    /// \param source source of the allocation for the memory being imported.
+    /// \return true on success or false on failure
+    virtual bool Import(void* memory, MemorySource source)
+    {
+        IgnoreUnused(memory, source);
+        return false;
+    };
+
+    /// Implementations must determine if this memory block can be imported.
+    /// This might be based on alignment or memory source type.
+    /// \return true if this memory can be imported.
+    /// \return false by default, cannot be imported.
+    virtual bool CanBeImported(void* memory, MemorySource source)
+    {
+        IgnoreUnused(memory, source);
+        return false;
+    };
+
+    /// Unimport externally allocated memory
+    virtual void Unimport()
+    {};
+};
+
+}
diff --git a/arch/arm/ARMnn/include/armnn/backends/ITensorHandleFactory.hpp b/arch/arm/ARMnn/include/armnn/backends/ITensorHandleFactory.hpp
new file mode 100644
index 0000000000..501d97b852
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/ITensorHandleFactory.hpp
@@ -0,0 +1,108 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ITensorHandle.hpp"
+
+#include <armnn/IRuntime.hpp>
+#include <armnn/MemorySources.hpp>
+#include <armnn/Types.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+
+namespace armnn
+{
+
+/// Capability class to calculate in the GetCapabilities function
+/// so that only the capability in the scope can be choose to calculate
+enum class CapabilityClass
+{
+    PaddingRequired = 1,
+    FallbackImportDisabled = 2,
+
+    // add new enum values here
+
+    CapabilityClassMax = 254
+};
+
+/// Capability of the TensorHandleFactory
+struct Capability
+{
+    Capability(CapabilityClass capabilityClass, bool value)
+        : m_CapabilityClass(capabilityClass)
+        , m_Value(value)
+    {}
+
+    CapabilityClass m_CapabilityClass;
+    bool            m_Value;
+};
+
+class ITensorHandleFactory
+{
+public:
+    using FactoryId = std::string;
+    static const FactoryId LegacyFactoryId;   /// Use the workload factory to create the tensor handle
+    static const FactoryId DeferredFactoryId; /// Some TensorHandleFactory decisions are deferred to run-time
+
+    virtual ~ITensorHandleFactory() {}
+
+    virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+                                                                 TensorShape const& subTensorShape,
+                                                                 unsigned int const* subTensorOrigin) const = 0;
+
+    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0;
+
+    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                              DataLayout dataLayout) const = 0;
+
+    /// Utility Functions for backends which require TensorHandles to have unmanaged memory.
+    /// These should be overloaded if required to facilitate direct import of input tensors
+    /// and direct export of output tensors.
+    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                              const bool IsMemoryManaged) const
+    {
+        IgnoreUnused(IsMemoryManaged);
+        return CreateTensorHandle(tensorInfo);
+    }
+
+    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                              DataLayout dataLayout,
+                                                              const bool IsMemoryManaged) const
+    {
+        IgnoreUnused(IsMemoryManaged);
+        return CreateTensorHandle(tensorInfo, dataLayout);
+    }
+
+    virtual const FactoryId& GetId() const = 0;
+
+    virtual bool SupportsInPlaceComputation() const { return false; }
+
+    virtual bool SupportsSubTensors() const = 0;
+
+    virtual bool SupportsMapUnmap() const { return true; }
+
+    virtual MemorySourceFlags GetExportFlags() const { return 0; }
+    virtual MemorySourceFlags GetImportFlags() const { return 0; }
+
+    virtual std::vector<Capability> GetCapabilities(const IConnectableLayer* layer,
+                                                    const IConnectableLayer* connectedLayer,
+                                                    CapabilityClass capabilityClass)
+    {
+        IgnoreUnused(layer);
+        IgnoreUnused(connectedLayer);
+        IgnoreUnused(capabilityClass);
+        return std::vector<Capability>();
+    }
+};
+
+enum class EdgeStrategy
+{
+    Undefined,              /// No strategy has been defined. Used internally to verify integrity of optimizations.
+    DirectCompatibility,    /// Destination backend can work directly with tensors on source backend.
+    ExportToTarget,         /// Source backends tensor data can be exported to destination backend tensor without copy.
+    CopyToTarget            /// Copy contents from source backend tensor to destination backend tensor.
+};
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/IWorkload.hpp b/arch/arm/ARMnn/include/armnn/backends/IWorkload.hpp
new file mode 100644
index 0000000000..ce3914bc5a
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/IWorkload.hpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+namespace experimental
+{
+
+struct WorkingMemDescriptor;
+
+} // end experimental namespace
+
+using namespace armnn::experimental;
+
+/// Workload interface to enqueue a layer computation.
+class IWorkload {
+public:
+    virtual ~IWorkload() {}
+
+    virtual void PostAllocationConfigure() = 0;
+
+    virtual void Execute() const = 0;
+
+    virtual void ExecuteAsync(WorkingMemDescriptor& desc) = 0;
+
+    virtual profiling::ProfilingGuid GetGuid() const = 0;
+
+    // SupportsTensorHandleReplacement signals that a given workload is capable of
+    // replacing any of its I/O tensors via ReplaceInput/OutputTensorHandle
+    virtual bool SupportsTensorHandleReplacement() const = 0;
+
+    // Replace input tensor handle with the given TensorHandle
+    virtual void ReplaceInputTensorHandle(ITensorHandle* /*input*/, unsigned int /*slot*/) = 0;
+
+    // Replace output tensor handle with the given TensorHandle
+    virtual void ReplaceOutputTensorHandle(ITensorHandle* /*output*/, unsigned int /*slot*/) = 0;
+
+    virtual void RegisterDebugCallback(const DebugCallbackFunction& /*func*/) {}
+};
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/MemCopyWorkload.hpp b/arch/arm/ARMnn/include/armnn/backends/MemCopyWorkload.hpp
new file mode 100644
index 0000000000..da23f52be6
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/MemCopyWorkload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "TensorHandle.hpp"
+#include "Workload.hpp"
+
+#include <utility>
+
+namespace armnn
+{
+
+class CopyMemGenericWorkload : public BaseWorkload<MemCopyQueueDescriptor>
+{
+public:
+    CopyMemGenericWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+    void ExecuteAsync(WorkingMemDescriptor& descriptor) override;
+
+private:
+    using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>;
+    std::vector<TensorHandlePair> m_TensorHandlePairs;
+};
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/OptimizationViews.hpp b/arch/arm/ARMnn/include/armnn/backends/OptimizationViews.hpp
new file mode 100644
index 0000000000..f7346de7ab
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/OptimizationViews.hpp
@@ -0,0 +1,78 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/backends/SubgraphView.hpp>
+
+namespace armnn
+{
+
+class OptimizationViews
+{
+public:
+    OptimizationViews() = default;
+    OptimizationViews(const OptimizationViews&) = delete;
+    OptimizationViews& operator=(const OptimizationViews&) = delete;
+    OptimizationViews(OptimizationViews&&) = default;
+    OptimizationViews& operator=(OptimizationViews&&) = default;
+
+    struct SubstitutionPair
+    {
+        /// Subgraph of Layers from the original graph which should be replaced
+        SubgraphView m_SubstitutableSubgraph;
+
+        /// A subgraph of new layers which will replace layers in m_SubstitutableSubgraph
+        SubgraphView m_ReplacementSubgraph;
+    };
+
+    using Subgraphs = std::vector<SubgraphView>;
+    using Substitutions = std::vector<SubstitutionPair>;
+
+    void AddSubstitution(SubstitutionPair&& substitution)
+    {
+        m_SuccesfulOptimizations.emplace_back(substitution);
+    }
+
+    void AddFailedSubgraph(SubgraphView&& subgraph)
+    {
+        m_FailedOptimizations.emplace_back(subgraph);
+    }
+
+    void AddUntouchedSubgraph(SubgraphView&& subgraph)
+    {
+        m_UntouchedSubgraphs.emplace_back(subgraph);
+    }
+
+    const Substitutions& GetSubstitutions() const { return m_SuccesfulOptimizations; }
+    const Subgraphs& GetFailedSubgraphs() const { return m_FailedOptimizations; }
+    const Subgraphs& GetUntouchedSubgraphs() const { return m_UntouchedSubgraphs; }
+
+    Substitutions& GetSubstitutions() { return m_SuccesfulOptimizations; }
+    Subgraphs& GetFailedSubgraphs() { return m_FailedOptimizations; }
+    Subgraphs& GetUntouchedSubgraphs() { return m_UntouchedSubgraphs; }
+
+    bool Validate(const SubgraphView& originalSubgraph) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("GetGraph is deprecated. Use GetINetwork instead.", "22.11")
+    Graph& GetGraph() { return m_Graph; }
+
+    INetworkPtr& GetINetwork() { return m_INetwork; }
+
+private:
+    Substitutions m_SuccesfulOptimizations;     ///< Proposed substitutions from successful optimizations
+    Subgraphs m_FailedOptimizations;            ///< Subgraphs from the original subgraph which cannot be supported
+    Subgraphs m_UntouchedSubgraphs;             ///< Subgraphs from the original subgraph which remain unmodified
+
+    /// Graph object used only as a container for any layer generated by the optimization process
+    Graph m_Graph;
+
+    /// INetworkPtr object used only as a container for any layer generated by the optimization process
+    /// Also, can use to AddPrecompiledLayer to the SubstitutionPair
+    /// Use in favour of m_Graph which depreciates 22.11
+    INetworkPtr m_INetwork = INetwork::Create();;
+};
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/SubgraphView.hpp b/arch/arm/ARMnn/include/armnn/backends/SubgraphView.hpp
new file mode 100644
index 0000000000..dbf0544651
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/SubgraphView.hpp
@@ -0,0 +1,198 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <Layer.hpp>
+#include <Graph.hpp>
+
+#include <vector>
+#include <list>
+
+namespace armnn
+{
+
+///
+/// The SubgraphView class represents a subgraph of a Graph.
+/// The data it holds, points to data held by layers of the Graph, so the
+/// the contents of the SubgraphView become invalid when the Layers are destroyed
+/// or changed.
+///
+class SubgraphView final
+{
+public:
+    template <typename Func>
+    void ForEachLayer(Func func) const
+    {
+        for (auto it = m_Layers.begin(); it != m_Layers.end(); )
+        {
+             auto next = std::next(it);
+             func(*it);
+             it = next;
+        }
+    }
+
+    template <typename Func>
+    void ForEachIConnectableLayer(Func func) const
+    {
+        for (auto it = m_IConnectableLayers.begin(); it != m_IConnectableLayers.end(); )
+        {
+             auto next = std::next(it);
+             func(*it);
+             it = next;
+        }
+    }
+
+    using SubgraphViewPtr = std::unique_ptr<SubgraphView>;
+    using InputSlots = std::vector<InputSlot*>;
+    using IInputSlots = std::vector<IInputSlot*>;
+    using OutputSlots = std::vector<OutputSlot*>;
+    using IOutputSlots = std::vector<IOutputSlot*>;
+    using Layers = std::list<Layer*>;
+    using IConnectableLayers = std::list<IConnectableLayer*>;
+    using Iterator = Layers::iterator;
+    using IConnectableLayerIterator = IConnectableLayers::iterator;
+    using ConstIterator = Layers::const_iterator;
+    using ConstIConnectableIterator = IConnectableLayers::const_iterator;
+
+    /// Constructs a sub-graph from the entire given graph.
+    explicit SubgraphView(Graph& graph);
+
+    /// Constructs a sub-graph with the given arguments.
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated, please use constructor with arguments: "
+                                      "IConnectableLayers, IInputSlots and IOutputSlots", "22.11")
+    SubgraphView(InputSlots&& inputs, OutputSlots&& outputs, Layers&& layers);
+
+    /// Constructs a sub-graph with the given arguments.
+    SubgraphView(IConnectableLayers&& layers, IInputSlots&& inputs, IOutputSlots&& outputs);
+
+    /// Copy-constructor.
+    SubgraphView(const SubgraphView& subgraph);
+
+    /// Move-constructor.
+    SubgraphView(SubgraphView&& subgraph);
+
+    /// Constructs a sub-graph with only the given layer.
+    SubgraphView(IConnectableLayer* layer);
+
+    /// Move-assignment operator.
+    SubgraphView& operator=(SubgraphView&& other);
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated, please use GetIInputSlots() returning"
+                                      " public IInputSlots", "22.11")
+    const InputSlots& GetInputSlots() const;
+    const IInputSlots& GetIInputSlots() const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated, please use GetIOutputSlots() returning"
+                                      " public IOutputSlots", "22.11")
+    const OutputSlots& GetOutputSlots() const;
+    const IOutputSlots& GetIOutputSlots() const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated, please use GetIConnectableLayers() "
+                                      "returning public IConnectableLayers", "22.11")
+    const Layers& GetLayers() const;
+    const IConnectableLayers& GetIConnectableLayers() const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated, please use GetIInputSlot() returning public "
+                                      "IInputSlot", "22.11")
+    const InputSlot* GetInputSlot(unsigned int index) const;
+    const IInputSlot* GetIInputSlot(unsigned int index) const;
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated, please use GetIInputSlot() returning public "
+                                      "IInputSlot", "22.11")
+    InputSlot* GetInputSlot(unsigned int index);
+    IInputSlot* GetIInputSlot(unsigned int index);
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated, please use GetIOutputSlot() returning"
+                                      " public IOutputSlot", "22.11")
+    const OutputSlot* GetOutputSlot(unsigned int index) const;
+    const IOutputSlot* GetIOutputSlot(unsigned int index) const;
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("This function has been deprecated, please use GetIOutputSlot() returning"
+                                      " public IOutputSlot", "22.11")
+    OutputSlot* GetOutputSlot(unsigned int index);
+    IOutputSlot* GetIOutputSlot(unsigned int index);
+
+    unsigned int GetNumInputSlots() const;
+    unsigned int GetNumOutputSlots() const;
+
+    ARMNN_DEPRECATED_MSG_CHANGE_DATE("This function is deprecated and will be changed to return an "
+                                     "IConnectableLayerIterator, until that occurs in 23.02; please use "
+                                     "beginIConnectable() returning public IConnectableLayerIterator", "23.02")
+    Iterator begin();
+    IConnectableLayerIterator beginIConnectable();
+    ARMNN_DEPRECATED_MSG_CHANGE_DATE("This function is deprecated and will be changed to return an "
+                                     "IConnectableLayerIterator, until that occurs in 23.02; please use "
+                                     "endIConnectable() returning public IConnectableLayerIterator", "23.02")
+    Iterator end();
+    IConnectableLayerIterator endIConnectable();
+
+    ARMNN_DEPRECATED_MSG_CHANGE_DATE("This function is deprecated and will be changed to return an "
+                                     "ConstIConnectableIterator, until that occurs in 23.02; please use "
+                                     "beginIConnectable() returning public ConstIConnectableIterator", "23.02")
+    ConstIterator begin() const;
+    ConstIConnectableIterator beginIConnectable() const;
+    ARMNN_DEPRECATED_MSG_CHANGE_DATE("This function is deprecated and will be changed to return an "
+                                     "ConstIConnectableIterator, until that occurs in 23.02; please use "
+                                     "endIConnectable() returning public ConstIConnectableIterator", "23.02")
+    ConstIterator end() const;
+    ConstIConnectableIterator endIConnectable() const;
+
+    ARMNN_DEPRECATED_MSG_CHANGE_DATE("This function is deprecated and will be changed to return an "
+                                     "ConstIConnectableIterator, until that occurs in 23.02; please use "
+                                     "cbeginIConnectable() returning public ConstIConnectableIterator", "23.02")
+    ConstIterator cbegin() const;
+    ConstIConnectableIterator cbeginIConnectable() const;
+    ARMNN_DEPRECATED_MSG_CHANGE_DATE("This function is deprecated and will be changed to return an "
+                                     "ConstIConnectableIterator, until that occurs in 23.02; please use "
+                                     "cendIConnectable() returning public ConstIConnectableIterator", "23.02")
+    ConstIterator cend() const;
+    ConstIConnectableIterator cendIConnectable() const;
+
+    void Clear();
+
+    /// This method returns a copy of the original SubgraphView provided by OptimizeSubgraphView with a separate
+    /// underlying graph from the main ArmNN graph.
+    /// Backend users should edit this working copy and then add it as a SubstitutionPair, along with original
+    /// SubgraphView, to the OptimizationViews returned by OptimizeSubgraphView.
+    /// ArmNN will then decide on whether or not to carry out Substitution of the two SubgraphViews.
+    SubgraphView GetWorkingCopy();
+
+    /// These methods should be called on a working copy subgraph created from GetWorkingCopy.
+    /// They take a SubgraphView pattern to replace and the substitute layer or subgraphView to substitute in.
+    void SubstituteSubgraph(SubgraphView&, IConnectableLayer*);
+    void SubstituteSubgraph(SubgraphView&, const SubgraphView&);
+
+private:
+    struct SubgraphViewWorkingCopy;
+
+    /// Constructs a sub-graph with the given arguments.
+    SubgraphView(IConnectableLayers&& layers,
+                 IInputSlots&& inputs,
+                 IOutputSlots&& outputs,
+                 std::shared_ptr<SubgraphViewWorkingCopy> ptr);
+
+    void CheckSubgraph();
+
+    /// Arrange the order of layers topologically so that nodes can be visited in valid order
+    void ArrangeBySortOrder();
+
+    /// The list of pointers to the input slots of the parent graph.
+    InputSlots m_InputSlots;
+    IInputSlots m_IInputSlots;
+
+    /// The list of pointers to the output slots of the parent graph.
+    OutputSlots m_OutputSlots;
+    IOutputSlots m_IOutputSlots;
+
+    /// The list of pointers to the layers of the parent graph.
+    Layers m_Layers;
+    IConnectableLayers m_IConnectableLayers;
+
+    /// Pointer to internal graph implementation. This stores a working copy of a graph, separate from the main
+    /// ArmNN graph, for use by Backends so that they can edit it and add as a SubstitutionPair to OptimizationViews
+    /// along with the original SubgraphView.
+    /// ArmNN will then decide on whether or not to substitute in the provided SubgraphView working copy.
+    std::shared_ptr<SubgraphViewWorkingCopy> p_WorkingCopyImpl;
+};
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/TensorHandle.hpp b/arch/arm/ARMnn/include/armnn/backends/TensorHandle.hpp
new file mode 100644
index 0000000000..2e6c8485d1
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/TensorHandle.hpp
@@ -0,0 +1,267 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ITensorHandle.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <armnn/utility/Assert.hpp>
+#include <armnnUtils/CompatibleTypes.hpp>
+
+#include <algorithm>
+
+namespace armnn
+{
+
+// Get a TensorShape representing the strides (in bytes) for each dimension
+// of a tensor, assuming fully packed data with no padding
+TensorShape GetUnpaddedTensorStrides(const TensorInfo& tensorInfo);
+
+// Abstract tensor handles wrapping a readable region of memory, interpreting it as tensor data.
+class ConstTensorHandle : public ITensorHandle
+{
+public:
+    template <typename T>
+    const T* GetConstTensor() const
+    {
+        if (armnnUtils::CompatibleTypes<T>(GetTensorInfo().GetDataType()))
+        {
+            return reinterpret_cast<const T*>(m_Memory);
+        }
+        else
+        {
+            throw armnn::Exception("Attempting to get not compatible type tensor!");
+        }
+    }
+
+    const TensorInfo& GetTensorInfo() const
+    {
+        return m_TensorInfo;
+    }
+
+    virtual void Manage() override {}
+
+    virtual ITensorHandle* GetParent() const override { return nullptr; }
+
+    virtual const void* Map(bool /* blocking = true */) const override { return m_Memory; }
+    virtual void Unmap() const override {}
+
+    TensorShape GetStrides() const override
+    {
+        return GetUnpaddedTensorStrides(m_TensorInfo);
+    }
+    TensorShape GetShape() const override { return m_TensorInfo.GetShape(); }
+
+protected:
+    ConstTensorHandle(const TensorInfo& tensorInfo);
+
+    void SetConstMemory(const void* mem) { m_Memory = mem; }
+
+private:
+    // Only used for testing
+    void CopyOutTo(void *) const override { ARMNN_ASSERT_MSG(false, "Unimplemented"); }
+    void CopyInFrom(const void*) override { ARMNN_ASSERT_MSG(false, "Unimplemented"); }
+
+    ConstTensorHandle(const ConstTensorHandle& other) = delete;
+    ConstTensorHandle& operator=(const ConstTensorHandle& other) = delete;
+
+    TensorInfo m_TensorInfo;
+    const void* m_Memory;
+};
+
+template<>
+const void* ConstTensorHandle::GetConstTensor<void>() const;
+
+// Abstract specialization of ConstTensorHandle that allows write access to the same data.
+class TensorHandle : public ConstTensorHandle
+{
+public:
+    template <typename T>
+    T* GetTensor() const
+    {
+        if (armnnUtils::CompatibleTypes<T>(GetTensorInfo().GetDataType()))
+        {
+            return reinterpret_cast<T*>(m_MutableMemory);
+        }
+        else
+        {
+            throw armnn::Exception("Attempting to get not compatible type tensor!");
+        }
+    }
+
+protected:
+    TensorHandle(const TensorInfo& tensorInfo);
+
+    void SetMemory(void* mem)
+    {
+        m_MutableMemory = mem;
+        SetConstMemory(m_MutableMemory);
+    }
+
+private:
+
+    TensorHandle(const TensorHandle& other) = delete;
+    TensorHandle& operator=(const TensorHandle& other) = delete;
+    void* m_MutableMemory;
+};
+
+template <>
+void* TensorHandle::GetTensor<void>() const;
+
+// A TensorHandle that owns the wrapped memory region.
+class ScopedTensorHandle : public TensorHandle
+{
+public:
+    explicit ScopedTensorHandle(const TensorInfo& tensorInfo);
+
+    // Copies contents from Tensor.
+    explicit ScopedTensorHandle(const ConstTensor& tensor);
+
+    // Copies contents from ConstTensorHandle
+    explicit ScopedTensorHandle(const ConstTensorHandle& tensorHandle);
+
+    ScopedTensorHandle(const ScopedTensorHandle& other);
+    ScopedTensorHandle& operator=(const ScopedTensorHandle& other);
+    ~ScopedTensorHandle();
+
+    virtual void Allocate() override;
+
+private:
+    // Only used for testing
+    void CopyOutTo(void* memory) const override;
+    void CopyInFrom(const void* memory) override;
+
+    void CopyFrom(const ScopedTensorHandle& other);
+    void CopyFrom(const void* srcMemory, unsigned int numBytes);
+};
+
+// A TensorHandle that wraps an already allocated memory region.
+//
+// Clients must make sure the passed in memory region stays alive for the lifetime of
+// the PassthroughTensorHandle instance.
+//
+// Note there is no polymorphism to/from ConstPassthroughTensorHandle.
+class PassthroughTensorHandle : public TensorHandle
+{
+public:
+    PassthroughTensorHandle(const TensorInfo& tensorInfo, void* mem)
+    :   TensorHandle(tensorInfo)
+    {
+        SetMemory(mem);
+    }
+
+    virtual void Allocate() override;
+};
+
+// A ConstTensorHandle that wraps an already allocated memory region.
+//
+// This allows users to pass in const memory to a network.
+// Clients must make sure the passed in memory region stays alive for the lifetime of
+// the PassthroughTensorHandle instance.
+//
+// Note there is no polymorphism to/from PassthroughTensorHandle.
+class ConstPassthroughTensorHandle : public ConstTensorHandle
+{
+public:
+    ConstPassthroughTensorHandle(const TensorInfo& tensorInfo, const void* mem)
+    :   ConstTensorHandle(tensorInfo)
+    {
+        SetConstMemory(mem);
+    }
+
+    virtual void Allocate() override;
+};
+
+
+// Template specializations.
+
+template <>
+const void* ConstTensorHandle::GetConstTensor() const;
+
+template <>
+void* TensorHandle::GetTensor() const;
+
+class ManagedConstTensorHandle
+{
+
+public:
+    explicit ManagedConstTensorHandle(std::shared_ptr<ConstTensorHandle> ptr)
+        : m_Mapped(false)
+        , m_TensorHandle(std::move(ptr)) {};
+
+    /// RAII Managed resource Unmaps MemoryArea once out of scope
+    const void* Map(bool blocking = true)
+    {
+        if (m_TensorHandle)
+        {
+            auto pRet = m_TensorHandle->Map(blocking);
+            m_Mapped = true;
+            return pRet;
+        }
+        else
+        {
+            throw armnn::Exception("Attempting to Map null TensorHandle");
+        }
+
+    }
+
+    // Delete copy constructor as it's unnecessary
+    ManagedConstTensorHandle(const ConstTensorHandle& other) = delete;
+
+    // Delete copy assignment as it's unnecessary
+    ManagedConstTensorHandle& operator=(const ManagedConstTensorHandle& other) = delete;
+
+    // Delete move assignment as it's unnecessary
+    ManagedConstTensorHandle& operator=(ManagedConstTensorHandle&& other) noexcept = delete;
+
+    ~ManagedConstTensorHandle()
+    {
+        // Bias tensor handles need to be initialized empty before entering scope of if statement checking if enabled
+        if (m_TensorHandle)
+        {
+            Unmap();
+        }
+    }
+
+    void Unmap()
+    {
+        // Only unmap if mapped and TensorHandle exists.
+        if (m_Mapped && m_TensorHandle)
+        {
+            m_TensorHandle->Unmap();
+            m_Mapped = false;
+        }
+    }
+
+    const TensorInfo& GetTensorInfo() const
+    {
+        return m_TensorHandle->GetTensorInfo();
+    }
+
+    bool IsMapped() const
+    {
+        return m_Mapped;
+    }
+
+private:
+    bool m_Mapped;
+    std::shared_ptr<ConstTensorHandle> m_TensorHandle;
+};
+
+using ConstCpuTensorHandle ARMNN_DEPRECATED_MSG_REMOVAL_DATE("ConstCpuTensorHandle is deprecated, "
+                                                "use ConstTensorHandle instead", "22.05") = ConstTensorHandle;
+using CpuTensorHandle ARMNN_DEPRECATED_MSG_REMOVAL_DATE("CpuTensorHandle is deprecated, "
+                                           "use TensorHandle instead", "22.05") = TensorHandle;
+using ScopedCpuTensorHandle ARMNN_DEPRECATED_MSG_REMOVAL_DATE("ScopedCpuTensorHandle is deprecated, "
+                                                 "use ScopedTensorHandle instead", "22.05") = ScopedTensorHandle;
+using PassthroughCpuTensorHandle ARMNN_DEPRECATED_MSG_REMOVAL_DATE("PassthroughCpuTensorHandle is deprecated, use "
+                                                      "PassthroughTensorHandle instead",
+                                                      "22.05") = PassthroughTensorHandle;
+using ConstPassthroughCpuTensorHandle ARMNN_DEPRECATED_MSG_REMOVAL_DATE("ConstPassthroughCpuTensorHandle is "
+                                                           "deprecated, use ConstPassthroughTensorHandle "
+                                                           "instead", "22.05") = ConstPassthroughTensorHandle;
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/TensorHandleFwd.hpp b/arch/arm/ARMnn/include/armnn/backends/TensorHandleFwd.hpp
new file mode 100644
index 0000000000..9faa499932
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/TensorHandleFwd.hpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace armnn
+{
+
+class ConstTensorHandle;
+class TensorHandle;
+class ScopedTensorHandle;
+class PassthroughTensorHandle;
+class ConstPassthroughTensorHandle;
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/Workload.hpp b/arch/arm/ARMnn/include/armnn/backends/Workload.hpp
new file mode 100644
index 0000000000..21109480dc
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/Workload.hpp
@@ -0,0 +1,238 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "IWorkload.hpp"
+#include "WorkloadData.hpp"
+#include "WorkloadInfo.hpp"
+#include "WorkingMemDescriptor.hpp"
+
+#include <Profiling.hpp>
+#include <ProfilingService.hpp>
+
+#include <algorithm>
+
+namespace armnn
+{
+
+// NullWorkload used to denote an unsupported workload when used by the MakeWorkload<> template
+// in the various workload factories.
+// There should never be an instantiation of a NullWorkload.
+class NullWorkload : public IWorkload
+{
+    NullWorkload()=delete;
+};
+
+template <typename QueueDescriptor>
+class BaseWorkload : public IWorkload
+{
+public:
+
+    BaseWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info)
+        : m_Data(descriptor),
+          m_Guid(profiling::ProfilingService::GetNextGuid())
+    {
+        m_Data.Validate(info);
+    }
+
+    void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override
+    {
+        ARMNN_LOG(info) << "Using default async workload execution, this will network affect performance";
+        std::lock_guard<std::mutex> lockGuard(m_AsyncWorkloadMutex);
+
+        m_Data.m_Inputs = workingMemDescriptor.m_Inputs;
+        m_Data.m_Outputs = workingMemDescriptor.m_Outputs;
+
+        Execute();
+    };
+
+    void PostAllocationConfigure() override {}
+
+    const QueueDescriptor& GetData() const { return m_Data; }
+
+    profiling::ProfilingGuid GetGuid() const final { return m_Guid; }
+
+    virtual bool SupportsTensorHandleReplacement()  const override
+    {
+        return false;
+    }
+
+    // Replace input tensor handle with the given TensorHandle
+    void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override
+    {
+        armnn::IgnoreUnused(tensorHandle, slot);
+        throw armnn::UnimplementedException("ReplaceInputTensorHandle not implemented for this workload");
+    }
+
+    // Replace output tensor handle with the given TensorHandle
+    void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override
+    {
+        armnn::IgnoreUnused(tensorHandle, slot);
+        throw armnn::UnimplementedException("ReplaceOutputTensorHandle not implemented for this workload");
+    }
+
+protected:
+    QueueDescriptor m_Data;
+    const profiling::ProfilingGuid m_Guid;
+
+private:
+    std::mutex m_AsyncWorkloadMutex;
+};
+
+// TypedWorkload used
+template <typename QueueDescriptor, armnn::DataType... DataTypes>
+class TypedWorkload : public BaseWorkload<QueueDescriptor>
+{
+public:
+
+    TypedWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info)
+        : BaseWorkload<QueueDescriptor>(descriptor, info)
+    {
+        std::vector<armnn::DataType> dataTypes = {DataTypes...};
+        armnn::DataType expectedInputType;
+
+        if (!info.m_InputTensorInfos.empty())
+        {
+            expectedInputType = info.m_InputTensorInfos.front().GetDataType();
+
+            if (std::find(dataTypes.begin(), dataTypes.end(), expectedInputType) == dataTypes.end())
+            {
+                ARMNN_ASSERT_MSG(false, "Trying to create workload with incorrect type");
+            }
+            ARMNN_ASSERT_MSG(std::all_of(std::next(info.m_InputTensorInfos.begin()),
+                                         info.m_InputTensorInfos.end(),
+                                         [&](auto it){
+                                             return it.GetDataType() == expectedInputType;
+                                         }),
+                             "Trying to create workload with incorrect type");
+        }
+        armnn::DataType expectedOutputType;
+
+        if (!info.m_OutputTensorInfos.empty())
+        {
+            expectedOutputType = info.m_OutputTensorInfos.front().GetDataType();
+
+            if (!info.m_InputTensorInfos.empty())
+            {
+                if (expectedOutputType != expectedInputType)
+                {
+                    ARMNN_ASSERT_MSG(false, "Trying to create workload with incorrect type");
+                }
+            }
+            else if (std::find(dataTypes.begin(), dataTypes.end(), expectedOutputType) == dataTypes.end())
+            {
+                ARMNN_ASSERT_MSG(false, "Trying to create workload with incorrect type");
+            }
+            ARMNN_ASSERT_MSG(std::all_of(std::next(info.m_OutputTensorInfos.begin()),
+                                         info.m_OutputTensorInfos.end(),
+                                         [&](auto it){
+                                             return it.GetDataType() == expectedOutputType;
+                                         }),
+                             "Trying to create workload with incorrect type");
+        }
+    }
+};
+
+template <typename QueueDescriptor, armnn::DataType InputDataType, armnn::DataType OutputDataType>
+class MultiTypedWorkload : public BaseWorkload<QueueDescriptor>
+{
+public:
+
+    MultiTypedWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info)
+        : BaseWorkload<QueueDescriptor>(descriptor, info)
+    {
+        ARMNN_ASSERT_MSG(std::all_of(info.m_InputTensorInfos.begin(),
+                                     info.m_InputTensorInfos.end(),
+                                     [&](auto it){
+                                         return it.GetDataType() == InputDataType;
+                                     }),
+                         "Trying to create workload with incorrect type");
+
+        ARMNN_ASSERT_MSG(std::all_of(info.m_OutputTensorInfos.begin(),
+                                     info.m_OutputTensorInfos.end(),
+                                     [&](auto it){
+                                         return it.GetDataType() == OutputDataType;
+                                     }),
+                         "Trying to create workload with incorrect type");
+    }
+};
+
+// FirstInputTypedWorkload used to check type of the first input
+template <typename QueueDescriptor, armnn::DataType DataType>
+class FirstInputTypedWorkload : public BaseWorkload<QueueDescriptor>
+{
+public:
+
+    FirstInputTypedWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info)
+        : BaseWorkload<QueueDescriptor>(descriptor, info)
+    {
+        if (!info.m_InputTensorInfos.empty())
+        {
+            ARMNN_ASSERT_MSG(info.m_InputTensorInfos.front().GetDataType() == DataType,
+                                 "Trying to create workload with incorrect type");
+        }
+
+        ARMNN_ASSERT_MSG(std::all_of(info.m_OutputTensorInfos.begin(),
+                                     info.m_OutputTensorInfos.end(),
+                                     [&](auto it){
+                                         return it.GetDataType() == DataType;
+                                     }),
+                         "Trying to create workload with incorrect type");
+    }
+};
+
+template <typename QueueDescriptor>
+using FloatWorkload = TypedWorkload<QueueDescriptor,
+                                    armnn::DataType::Float16,
+                                    armnn::DataType::Float32>;
+
+template <typename QueueDescriptor>
+using Float32Workload = TypedWorkload<QueueDescriptor, armnn::DataType::Float32>;
+
+template <typename QueueDescriptor>
+using Uint8Workload = TypedWorkload<QueueDescriptor, armnn::DataType::QAsymmU8>;
+
+template <typename QueueDescriptor>
+using Int32Workload = TypedWorkload<QueueDescriptor, armnn::DataType::Signed32>;
+
+template <typename QueueDescriptor>
+using BooleanWorkload = TypedWorkload<QueueDescriptor, armnn::DataType::Boolean>;
+
+template <typename QueueDescriptor>
+using BaseFloat32ComparisonWorkload = MultiTypedWorkload<QueueDescriptor,
+                                                         armnn::DataType::Float32,
+                                                         armnn::DataType::Boolean>;
+
+template <typename QueueDescriptor>
+using BaseUint8ComparisonWorkload = MultiTypedWorkload<QueueDescriptor,
+                                                       armnn::DataType::QAsymmU8,
+                                                       armnn::DataType::Boolean>;
+
+template <typename QueueDescriptor>
+using BFloat16ToFloat32Workload = MultiTypedWorkload<QueueDescriptor,
+                                                     armnn::DataType::BFloat16,
+                                                     armnn::DataType::Float32>;
+
+template <typename QueueDescriptor>
+using Float32ToBFloat16Workload = MultiTypedWorkload<QueueDescriptor,
+                                                     armnn::DataType::Float32,
+                                                     armnn::DataType::BFloat16>;
+
+template <typename QueueDescriptor>
+using Float16ToFloat32Workload = MultiTypedWorkload<QueueDescriptor,
+                                                    armnn::DataType::Float16,
+                                                    armnn::DataType::Float32>;
+
+template <typename QueueDescriptor>
+using Float32ToFloat16Workload = MultiTypedWorkload<QueueDescriptor,
+                                                    armnn::DataType::Float32,
+                                                    armnn::DataType::Float16>;
+
+template <typename QueueDescriptor>
+using Uint8ToFloat32Workload = MultiTypedWorkload<QueueDescriptor,
+                                                  armnn::DataType::QAsymmU8,
+                                                  armnn::DataType::Float32>;
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/WorkloadData.hpp b/arch/arm/ARMnn/include/armnn/backends/WorkloadData.hpp
new file mode 100644
index 0000000000..21141583c6
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/WorkloadData.hpp
@@ -0,0 +1,771 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "TensorHandle.hpp"
+
+#include <armnn/Deprecated.hpp>
+#include <armnn/Descriptors.hpp>
+#include <armnn/Exceptions.hpp>
+#include <armnn/Types.hpp>
+#include <armnn/Tensor.hpp>
+#include <common/include/ProfilingGuid.hpp>
+
+namespace armnn
+{
+
+//A helper function that returns the bias data type required for given input data type.
+DataType GetBiasDataType(DataType inputDataType);
+
+struct WorkloadInfo;
+
+struct QueueDescriptor
+{
+    std::vector<ITensorHandle*> m_Inputs;
+    std::vector<ITensorHandle*> m_Outputs;
+    void* m_AdditionalInfoObject;
+
+    virtual ~QueueDescriptor() = default;
+
+    void ValidateInputsOutputs(const std::string& descName,
+                               unsigned int numExpectedIn,
+                               unsigned int numExpectedOut) const;
+
+    template<typename T>
+    const T* GetAdditionalInformation() const
+    {
+        return static_cast<T*>(m_AdditionalInfoObject);
+    }
+
+protected:
+    QueueDescriptor()
+        : m_AdditionalInfoObject(nullptr)
+    {}
+    QueueDescriptor(QueueDescriptor const&) = default;
+    QueueDescriptor& operator=(QueueDescriptor const&) = default;
+};
+
+// Base class for queue descriptors which contain parameters.
+template <typename LayerDescriptor>
+struct QueueDescriptorWithParameters : public QueueDescriptor
+{
+    LayerDescriptor m_Parameters;
+
+    virtual ~QueueDescriptorWithParameters() = default;
+
+protected:
+    QueueDescriptorWithParameters() = default;
+    QueueDescriptorWithParameters(QueueDescriptorWithParameters const&) = default;
+    QueueDescriptorWithParameters& operator=(QueueDescriptorWithParameters const&) = default;
+};
+
+struct MapQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct UnmapQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct MemCopyQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+using InputQueueDescriptor = MemCopyQueueDescriptor;
+using OutputQueueDescriptor = MemCopyQueueDescriptor;
+
+struct MemImportQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct MemSyncQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Softmax layer workload data.
+struct SoftmaxQueueDescriptor : QueueDescriptorWithParameters<SoftmaxDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Splitter layer workload data.
+struct SplitterQueueDescriptor : QueueDescriptorWithParameters<ViewsDescriptor>
+{
+    struct ViewOrigin
+    {
+        ViewOrigin() {}
+        ViewOrigin(std::vector<unsigned int> const& origin) : m_Origin(origin) {}
+
+        //View origin (size of the vector is the same as number of dimensions of the view).
+        std::vector<unsigned int> m_Origin;
+    };
+
+    //View defines a tensor that will be carved from the input tensor.
+    //View origins are stored here, the extents are defined by sizes of the output tensors.
+    std::vector<ViewOrigin> m_ViewOrigins;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Concat layer workload data.
+struct ConcatQueueDescriptor : QueueDescriptorWithParameters<OriginsDescriptor>
+{
+    struct ViewOrigin
+    {
+        ViewOrigin() {}
+        ViewOrigin(const std::vector<unsigned int>& origin) : m_Origin(origin) {}
+
+        //View origin (size of the vector is the same as number of dimensions of the view).
+        std::vector<unsigned int> m_Origin;
+    };
+
+    //View defines a sub-area of the output tensor that will be filled with the corresponding input tensor.
+    //View origins are stored here, the extents are defined by sizes of the input tensors.
+    std::vector<ViewOrigin> m_ViewOrigins;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Deprecated. Use ConcatQueueDescriptor instead
+using MergerQueueDescriptor = ConcatQueueDescriptor;
+
+// Stack layer workload data.
+struct StackQueueDescriptor : QueueDescriptorWithParameters<StackDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Activation layer workload data.
+struct ActivationQueueDescriptor : QueueDescriptorWithParameters<ActivationDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ArgMinMaxQueueDescriptor : QueueDescriptorWithParameters<ArgMinMaxDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct CastQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Fill layer workload data.
+struct FillQueueDescriptor : QueueDescriptorWithParameters<FillDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Fully connected layer workload data.
+struct FullyConnectedQueueDescriptor : QueueDescriptorWithParameters<FullyConnectedDescriptor>
+{
+    FullyConnectedQueueDescriptor()
+        : m_Weight(nullptr)
+        , m_Bias(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_Weight;
+    const ConstTensorHandle* m_Bias;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Permute layer workload data.
+struct PermuteQueueDescriptor : QueueDescriptorWithParameters<PermuteDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Pooling 2D layer workload data.
+struct Pooling2dQueueDescriptor : QueueDescriptorWithParameters<Pooling2dDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Pooling 3D layer workload data.
+struct Pooling3dQueueDescriptor : QueueDescriptorWithParameters<Pooling3dDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+
+// Convolution 2D layer workload data.
+struct Convolution2dQueueDescriptor : QueueDescriptorWithParameters<Convolution2dDescriptor>
+{
+    Convolution2dQueueDescriptor()
+        : m_Weight(nullptr)
+        , m_Bias(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_Weight;
+    const ConstTensorHandle* m_Bias;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Convolution 3D layer workload data.
+struct Convolution3dQueueDescriptor : QueueDescriptorWithParameters<Convolution3dDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+/// Depthwise Convolution 2D layer workload data.
+///
+/// @note
+/// The weights are in the format [1, H, W, I*M]. Where I is the input channel size, M the depthwise mutliplier and
+/// H, W is the height and width of the filter kernel. If per channel quantization is applied
+/// the weights will be quantized along the last dimension/axis (I*M) which corresponds to the output channel size.
+/// If per channel quantization is applied the weights tensor will have I*M scales, one for each dimension
+/// of the quantization axis. You have to be aware of this when reshaping the weights tensor.
+/// Splitting the I*M axis, e.g. [1, H, W, I*M] --> [H, W, I, M], won't work without taking care of the
+/// corresponding quantization scales.
+/// If there is no per channel quantization applied reshaping the weights tensor won't cause any issues. There are
+/// preconfigured permutation functions available @link WorkloadUtils.hpp here.
+///
+struct DepthwiseConvolution2dQueueDescriptor : QueueDescriptorWithParameters<DepthwiseConvolution2dDescriptor>
+{
+    DepthwiseConvolution2dQueueDescriptor()
+        : m_Weight(nullptr)
+        , m_Bias(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_Weight;
+    const ConstTensorHandle* m_Bias;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct DetectionPostProcessQueueDescriptor : QueueDescriptorWithParameters<DetectionPostProcessDescriptor>
+{
+    DetectionPostProcessQueueDescriptor()
+        : m_Anchors(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_Anchors;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Normalization layer workload data.
+struct NormalizationQueueDescriptor : QueueDescriptorWithParameters<NormalizationDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Add layer workload data.
+struct AdditionQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Multiplication layer workload data.
+struct MultiplicationQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Division layer workload data.
+struct DivisionQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Subtraction layer workload data.
+struct SubtractionQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Maximum layer workload data.
+struct MaximumQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Mean layer workload data.
+struct MeanQueueDescriptor : QueueDescriptorWithParameters<MeanDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Pad layer workload data
+struct PadQueueDescriptor : QueueDescriptorWithParameters<PadDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct QuantizeQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Deprecated use ComparisonQueueDescriptor instead
+struct EqualQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Batch norm layer workload data.
+struct BatchNormalizationQueueDescriptor : QueueDescriptorWithParameters<BatchNormalizationDescriptor>
+{
+    BatchNormalizationQueueDescriptor()
+        : m_Mean(nullptr)
+        , m_Variance(nullptr)
+        , m_Beta(nullptr)
+        , m_Gamma(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_Mean;
+    const ConstTensorHandle* m_Variance;
+    const ConstTensorHandle* m_Beta;
+    const ConstTensorHandle* m_Gamma;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct RankQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+ARMNN_NO_DEPRECATE_WARN_BEGIN
+struct
+ARMNN_DEPRECATED_MSG_REMOVAL_DATE("ResizeBilinearQueueDescriptor is deprecated use ResizeQueueDescriptor instead",
+                                  "22.08")
+ResizeBilinearQueueDescriptor : QueueDescriptorWithParameters<ResizeBilinearDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+ARMNN_NO_DEPRECATE_WARN_END
+
+struct ResizeQueueDescriptor : QueueDescriptorWithParameters<ResizeDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct FakeQuantizationQueueDescriptor : QueueDescriptorWithParameters<FakeQuantizationDescriptor>
+{
+    FakeQuantizationQueueDescriptor()
+    : m_Min(nullptr)
+    , m_Max(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_Min;
+    const ConstTensorHandle* m_Max;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct InstanceNormalizationQueueDescriptor : QueueDescriptorWithParameters<InstanceNormalizationDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct L2NormalizationQueueDescriptor : QueueDescriptorWithParameters<L2NormalizationDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct LogSoftmaxQueueDescriptor : QueueDescriptorWithParameters<LogSoftmaxDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ConstantQueueDescriptor : QueueDescriptor
+{
+    ConstantQueueDescriptor()
+        : m_LayerOutput(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_LayerOutput;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ReshapeQueueDescriptor : QueueDescriptorWithParameters<ReshapeDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct SpaceToBatchNdQueueDescriptor : QueueDescriptorWithParameters<SpaceToBatchNdDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct SpaceToDepthQueueDescriptor : QueueDescriptorWithParameters<SpaceToDepthDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct FloorQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct LstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor>
+{
+    LstmQueueDescriptor()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+        , m_CellToInputWeights(nullptr)
+        , m_CellToForgetWeights(nullptr)
+        , m_CellToOutputWeights(nullptr)
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+        , m_ProjectionWeights(nullptr)
+        , m_ProjectionBias(nullptr)
+        , m_InputLayerNormWeights(nullptr)
+        , m_ForgetLayerNormWeights(nullptr)
+        , m_CellLayerNormWeights(nullptr)
+        , m_OutputLayerNormWeights(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_InputToInputWeights;
+    const ConstTensorHandle* m_InputToForgetWeights;
+    const ConstTensorHandle* m_InputToCellWeights;
+    const ConstTensorHandle* m_InputToOutputWeights;
+    const ConstTensorHandle* m_RecurrentToInputWeights;
+    const ConstTensorHandle* m_RecurrentToForgetWeights;
+    const ConstTensorHandle* m_RecurrentToCellWeights;
+    const ConstTensorHandle* m_RecurrentToOutputWeights;
+    const ConstTensorHandle* m_CellToInputWeights;
+    const ConstTensorHandle* m_CellToForgetWeights;
+    const ConstTensorHandle* m_CellToOutputWeights;
+    const ConstTensorHandle* m_InputGateBias;
+    const ConstTensorHandle* m_ForgetGateBias;
+    const ConstTensorHandle* m_CellBias;
+    const ConstTensorHandle* m_OutputGateBias;
+    const ConstTensorHandle* m_ProjectionWeights;
+    const ConstTensorHandle* m_ProjectionBias;
+    const ConstTensorHandle* m_InputLayerNormWeights;
+    const ConstTensorHandle* m_ForgetLayerNormWeights;
+    const ConstTensorHandle* m_CellLayerNormWeights;
+    const ConstTensorHandle* m_OutputLayerNormWeights;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ConvertBf16ToFp32QueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ConvertFp32ToBf16QueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ConvertFp16ToFp32QueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ConvertFp32ToFp16QueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct BatchToSpaceNdQueueDescriptor : QueueDescriptorWithParameters<BatchToSpaceNdDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct StridedSliceQueueDescriptor : QueueDescriptorWithParameters<StridedSliceDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Minimum layer workload data.
+struct MinimumQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+// Deprecated use ComparisonQueueDescriptor instead
+struct GreaterQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct DebugQueueDescriptor : QueueDescriptor
+{
+    DebugQueueDescriptor() : m_Guid(0) {}
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+
+    LayerGuid m_Guid;
+    std::string m_LayerName;
+    unsigned int m_SlotIndex;
+};
+
+struct RsqrtQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct GatherQueueDescriptor : QueueDescriptorWithParameters<GatherDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct PreCompiledQueueDescriptor : QueueDescriptorWithParameters<PreCompiledDescriptor>
+{
+    PreCompiledQueueDescriptor()
+        : m_PreCompiledObject(nullptr)
+    {
+    }
+
+    void* m_PreCompiledObject;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct DequantizeQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct MergeQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct SwitchQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct PreluQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct TransposeConvolution2dQueueDescriptor : QueueDescriptorWithParameters<TransposeConvolution2dDescriptor>
+{
+    TransposeConvolution2dQueueDescriptor() :
+        m_Weight(nullptr),
+        m_Bias(nullptr)
+    {}
+
+    const ConstTensorHandle* m_Weight;
+    const ConstTensorHandle* m_Bias;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct TransposeQueueDescriptor : QueueDescriptorWithParameters<TransposeDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct QLstmQueueDescriptor : QueueDescriptorWithParameters<QLstmDescriptor>
+{
+    QLstmQueueDescriptor()
+            : m_InputToInputWeights(nullptr)
+            , m_InputToForgetWeights(nullptr)
+            , m_InputToCellWeights(nullptr)
+            , m_InputToOutputWeights(nullptr)
+            , m_RecurrentToInputWeights(nullptr)
+            , m_RecurrentToForgetWeights(nullptr)
+            , m_RecurrentToCellWeights(nullptr)
+            , m_RecurrentToOutputWeights(nullptr)
+            , m_CellToInputWeights(nullptr)
+            , m_CellToForgetWeights(nullptr)
+            , m_CellToOutputWeights(nullptr)
+            , m_InputGateBias(nullptr)
+            , m_ForgetGateBias(nullptr)
+            , m_CellBias(nullptr)
+            , m_OutputGateBias(nullptr)
+            , m_ProjectionWeights(nullptr)
+            , m_ProjectionBias(nullptr)
+            , m_InputLayerNormWeights(nullptr)
+            , m_ForgetLayerNormWeights(nullptr)
+            , m_CellLayerNormWeights(nullptr)
+            , m_OutputLayerNormWeights(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_InputToInputWeights;
+    const ConstTensorHandle* m_InputToForgetWeights;
+    const ConstTensorHandle* m_InputToCellWeights;
+    const ConstTensorHandle* m_InputToOutputWeights;
+    const ConstTensorHandle* m_RecurrentToInputWeights;
+    const ConstTensorHandle* m_RecurrentToForgetWeights;
+    const ConstTensorHandle* m_RecurrentToCellWeights;
+    const ConstTensorHandle* m_RecurrentToOutputWeights;
+    const ConstTensorHandle* m_CellToInputWeights;
+    const ConstTensorHandle* m_CellToForgetWeights;
+    const ConstTensorHandle* m_CellToOutputWeights;
+    const ConstTensorHandle* m_InputGateBias;
+    const ConstTensorHandle* m_ForgetGateBias;
+    const ConstTensorHandle* m_CellBias;
+    const ConstTensorHandle* m_OutputGateBias;
+    const ConstTensorHandle* m_ProjectionWeights;
+    const ConstTensorHandle* m_ProjectionBias;
+    const ConstTensorHandle* m_InputLayerNormWeights;
+    const ConstTensorHandle* m_ForgetLayerNormWeights;
+    const ConstTensorHandle* m_CellLayerNormWeights;
+    const ConstTensorHandle* m_OutputLayerNormWeights;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct QuantizedLstmQueueDescriptor : QueueDescriptor
+{
+    QuantizedLstmQueueDescriptor()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+    {}
+
+    const ConstTensorHandle* m_InputToInputWeights;
+    const ConstTensorHandle* m_InputToForgetWeights;
+    const ConstTensorHandle* m_InputToCellWeights;
+    const ConstTensorHandle* m_InputToOutputWeights;
+
+    const ConstTensorHandle* m_RecurrentToInputWeights;
+    const ConstTensorHandle* m_RecurrentToForgetWeights;
+    const ConstTensorHandle* m_RecurrentToCellWeights;
+    const ConstTensorHandle* m_RecurrentToOutputWeights;
+
+    const ConstTensorHandle* m_InputGateBias;
+    const ConstTensorHandle* m_ForgetGateBias;
+    const ConstTensorHandle* m_CellBias;
+    const ConstTensorHandle* m_OutputGateBias;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct AbsQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct SliceQueueDescriptor : QueueDescriptorWithParameters<SliceDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct DepthToSpaceQueueDescriptor : QueueDescriptorWithParameters<DepthToSpaceDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ComparisonQueueDescriptor : QueueDescriptorWithParameters<ComparisonDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ElementwiseUnaryQueueDescriptor : QueueDescriptorWithParameters<ElementwiseUnaryDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct LogicalBinaryQueueDescriptor : QueueDescriptorWithParameters<LogicalBinaryDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ReduceQueueDescriptor : QueueDescriptorWithParameters<ReduceDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ShapeQueueDescriptor : QueueDescriptor
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct UnidirectionalSequenceLstmQueueDescriptor : QueueDescriptorWithParameters<LstmDescriptor>
+{
+    UnidirectionalSequenceLstmQueueDescriptor()
+        : m_InputToInputWeights(nullptr)
+        , m_InputToForgetWeights(nullptr)
+        , m_InputToCellWeights(nullptr)
+        , m_InputToOutputWeights(nullptr)
+        , m_RecurrentToInputWeights(nullptr)
+        , m_RecurrentToForgetWeights(nullptr)
+        , m_RecurrentToCellWeights(nullptr)
+        , m_RecurrentToOutputWeights(nullptr)
+        , m_CellToInputWeights(nullptr)
+        , m_CellToForgetWeights(nullptr)
+        , m_CellToOutputWeights(nullptr)
+        , m_InputGateBias(nullptr)
+        , m_ForgetGateBias(nullptr)
+        , m_CellBias(nullptr)
+        , m_OutputGateBias(nullptr)
+        , m_ProjectionWeights(nullptr)
+        , m_ProjectionBias(nullptr)
+        , m_InputLayerNormWeights(nullptr)
+        , m_ForgetLayerNormWeights(nullptr)
+        , m_CellLayerNormWeights(nullptr)
+        , m_OutputLayerNormWeights(nullptr)
+    {
+    }
+
+    const ConstTensorHandle* m_InputToInputWeights;
+    const ConstTensorHandle* m_InputToForgetWeights;
+    const ConstTensorHandle* m_InputToCellWeights;
+    const ConstTensorHandle* m_InputToOutputWeights;
+    const ConstTensorHandle* m_RecurrentToInputWeights;
+    const ConstTensorHandle* m_RecurrentToForgetWeights;
+    const ConstTensorHandle* m_RecurrentToCellWeights;
+    const ConstTensorHandle* m_RecurrentToOutputWeights;
+    const ConstTensorHandle* m_CellToInputWeights;
+    const ConstTensorHandle* m_CellToForgetWeights;
+    const ConstTensorHandle* m_CellToOutputWeights;
+    const ConstTensorHandle* m_InputGateBias;
+    const ConstTensorHandle* m_ForgetGateBias;
+    const ConstTensorHandle* m_CellBias;
+    const ConstTensorHandle* m_OutputGateBias;
+    const ConstTensorHandle* m_ProjectionWeights;
+    const ConstTensorHandle* m_ProjectionBias;
+    const ConstTensorHandle* m_InputLayerNormWeights;
+    const ConstTensorHandle* m_ForgetLayerNormWeights;
+    const ConstTensorHandle* m_CellLayerNormWeights;
+    const ConstTensorHandle* m_OutputLayerNormWeights;
+
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct ChannelShuffleQueueDescriptor : QueueDescriptorWithParameters<ChannelShuffleDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/WorkloadFactory.hpp b/arch/arm/ARMnn/include/armnn/backends/WorkloadFactory.hpp
new file mode 100644
index 0000000000..13872357b3
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/WorkloadFactory.hpp
@@ -0,0 +1,431 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "ITensorHandle.hpp"
+#include "Workload.hpp"
+
+#include <armnn/Optional.hpp>
+#include <armnn/INetwork.hpp>
+#include <armnn/TensorFwd.hpp>
+
+#include <memory>
+
+namespace armnn
+{
+
+class Layer;
+
+// Workload factory interface for compute backends.
+class IWorkloadFactory
+{
+public:
+    virtual ~IWorkloadFactory() { }
+
+    virtual void AfterWorkloadsCreated() {};
+
+    virtual const BackendId& GetBackendId() const = 0;
+
+    static bool IsLayerSupported(const BackendId& backendId,
+                                 const IConnectableLayer& layer,
+                                 Optional<DataType> dataType,
+                                 std::string& outReasonIfUnsupported);
+
+    static bool IsLayerSupported(const IConnectableLayer& layer,
+                                 Optional<DataType> dataType,
+                                 std::string& outReasonIfUnsupported);
+
+    static bool IsLayerSupported(const IConnectableLayer& layer,
+                                 Optional<DataType> dataType,
+                                 std::string& outReasonIfUnsupported,
+                                 const ModelOptions& modelOptions);
+
+    static bool IsLayerSupported(const BackendId& backendId,
+                                 const IConnectableLayer& layer,
+                                 Optional<DataType> dataType,
+                                 std::string& outReasonIfUnsupported,
+                                 const ModelOptions& modelOptions);
+
+    virtual bool SupportsSubTensors() const = 0;
+
+    ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
+    virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+                                                                 TensorShape const& subTensorShape,
+                                                                 unsigned int const* subTensorOrigin
+                                                                ) const = 0;
+
+    virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const = 0;
+
+    ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateTensorHandle instead")
+    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                              const bool IsMemoryManaged = true) const = 0;
+
+    ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateTensorHandle instead")
+    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                              DataLayout dataLayout,
+                                                              const bool IsMemoryManaged = true) const = 0;
+
+    virtual std::unique_ptr<IWorkload> CreateWorkload(LayerType type,
+                                                      const QueueDescriptor& descriptor,
+                                                      const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor,
+                                                        const WorkloadInfo&              info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor,
+                                                      const WorkloadInfo&            info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
+                                                       const WorkloadInfo&            info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor,
+                                                                const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateCast(const CastQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateChannelShuffle(const ChannelShuffleQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateComparison(const ComparisonQueueDescriptor& descriptor,
+                                                        const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateConcat(const ConcatQueueDescriptor& descriptor,
+                                                    const WorkloadInfo&          info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor,
+                                                      const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateConvertBf16ToFp32(const ConvertBf16ToFp32QueueDescriptor& descriptor,
+                                                               const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
+                                                               const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor& descriptor,
+                                                               const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
+                                                               const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
+                                                           const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateConvolution3d(const Convolution3dQueueDescriptor& descriptor,
+                                                           const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateDebug(const DebugQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
+                                                          const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(
+        const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateDequantize(const DequantizeQueueDescriptor& descriptor,
+                                                        const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateDetectionPostProcess(
+        const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+                                                      const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor,
+                                                              const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
+                                                              const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateFill(const FillQueueDescriptor& descriptor,
+                                                  const WorkloadInfo&        info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
+                                                            const WorkloadInfo&                  info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateInstanceNormalization(
+        const InstanceNormalizationQueueDescriptor& descriptor,
+        const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
+                                                             const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
+                                                           const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateLogicalUnary(const ElementwiseUnaryQueueDescriptor& descriptor,
+                                                          const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
+                                                        const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateMaximum(const MaximumQueueDescriptor& descriptor,
+                                                     const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
+                                                     const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateMemImport(const MemImportQueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateMerge(const MergeQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateMinimum(const MinimumQueueDescriptor& descriptor,
+                                                     const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
+                                                            const WorkloadInfo&                  info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor,
+                                                           const WorkloadInfo&                 info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
+                                                 const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor,
+                                                     const WorkloadInfo&           info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
+                                                       const WorkloadInfo&           info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreatePooling3d(const Pooling3dQueueDescriptor& descriptor,
+                                                       const WorkloadInfo&           info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
+                                                         const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreatePrelu(const PreluQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateQuantize(const QuantizeQueueDescriptor& descriptor,
+                                                      const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateQLstm(const QLstmQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
+                                                           const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
+                                                     const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateResize(const ResizeQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateShape(const ShapeQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateSlice(const SliceQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
+                                                     const WorkloadInfo&           info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
+                                                          const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
+                                                         const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor,
+                                                      const WorkloadInfo&            info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateStack(const StackQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
+                                                          const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateSwitch(const SwitchQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& Info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateTranspose(const TransposeQueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateTransposeConvolution2d(
+        const TransposeConvolution2dQueueDescriptor& descriptor,
+        const WorkloadInfo& info) const;
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable "
+    "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.", "22.11")
+    virtual std::unique_ptr<IWorkload> CreateUnidirectionalSequenceLstm(
+        const UnidirectionalSequenceLstmQueueDescriptor& descriptor,
+        const WorkloadInfo& info) const;
+
+private:
+    static bool IsLayerConfigurationSupported(const BackendId& backendId,
+                                       const IConnectableLayer& connectableLayer,
+                                       Optional<DataType> dataType,
+                                       std::string& outReasonIfUnsupported,
+                                       const ModelOptions& modelOptions = {});
+};
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/WorkloadInfo.hpp b/arch/arm/ARMnn/include/armnn/backends/WorkloadInfo.hpp
new file mode 100644
index 0000000000..1d6967e134
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/WorkloadInfo.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+#include <vector>
+
+namespace armnn
+{
+
+/// Contains information about TensorInfos of a layer.
+/// This is needed at construction of workloads, but are not stored.
+struct WorkloadInfo
+{
+    std::vector<TensorInfo> m_InputTensorInfos;
+    std::vector<TensorInfo> m_OutputTensorInfos;
+    Optional<TensorInfo> m_WeightsTensorInfo = EmptyOptional();
+    Optional<TensorInfo> m_BiasTensorInfo = EmptyOptional();
+    Optional<std::string> m_ConvolutionMethod = EmptyOptional();
+};
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/backends/profiling/IBackendProfiling.hpp b/arch/arm/ARMnn/include/armnn/backends/profiling/IBackendProfiling.hpp
new file mode 100644
index 0000000000..beb7755741
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/profiling/IBackendProfiling.hpp
@@ -0,0 +1,104 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/IRuntime.hpp>
+#include <armnn/profiling/ISendTimelinePacket.hpp>
+
+#include <common/include/IProfilingGuidGenerator.hpp>
+
+#include <memory>
+#include <vector>
+
+namespace armnn
+{
+
+namespace profiling
+{
+
+struct CounterValue
+{
+    CounterValue(uint16_t id, uint32_t value) :
+        counterId(id), counterValue(value) {}
+    uint16_t counterId;
+    uint32_t counterValue;
+};
+
+struct Timestamp
+{
+    uint64_t timestamp;
+    std::vector<CounterValue> counterValues;
+};
+
+struct CounterStatus
+{
+    CounterStatus(uint16_t backendCounterId,
+                  uint16_t globalCounterId,
+                  bool enabled,
+                  uint32_t samplingRateInMicroseconds)
+                  : m_BackendCounterId(backendCounterId),
+                    m_GlobalCounterId(globalCounterId),
+                    m_Enabled(enabled),
+                    m_SamplingRateInMicroseconds(samplingRateInMicroseconds) {}
+    uint16_t m_BackendCounterId;
+    uint16_t m_GlobalCounterId;
+    bool     m_Enabled;
+    uint32_t m_SamplingRateInMicroseconds;
+};
+
+class IRegisterBackendCounters
+{
+public:
+    virtual void RegisterCategory(const std::string& categoryName) = 0;
+
+    virtual uint16_t RegisterDevice(const std::string& deviceName,
+                                    uint16_t cores = 0,
+                                    const Optional<std::string>& parentCategoryName = EmptyOptional()) = 0;
+
+    virtual uint16_t RegisterCounterSet(const std::string& counterSetName,
+                                        uint16_t count = 0,
+                                        const Optional<std::string>& parentCategoryName = EmptyOptional()) = 0;
+
+    virtual uint16_t RegisterCounter(const uint16_t uid,
+                                     const std::string& parentCategoryName,
+                                     uint16_t counterClass,
+                                     uint16_t interpolation,
+                                     double multiplier,
+                                     const std::string& name,
+                                     const std::string& description,
+                                     const Optional<std::string>& units      = EmptyOptional(),
+                                     const Optional<uint16_t>& numberOfCores = EmptyOptional(),
+                                     const Optional<uint16_t>& deviceUid     = EmptyOptional(),
+                                     const Optional<uint16_t>& counterSetUid = EmptyOptional()) = 0;
+
+    virtual ~IRegisterBackendCounters() {}
+};
+
+class IBackendProfiling
+{
+public:
+    virtual ~IBackendProfiling()
+    {}
+
+    virtual std::unique_ptr<IRegisterBackendCounters>
+            GetCounterRegistrationInterface(uint16_t currentMaxGlobalCounterID) = 0;
+
+    virtual std::unique_ptr<ISendTimelinePacket> GetSendTimelinePacket() = 0;
+
+    virtual IProfilingGuidGenerator& GetProfilingGuidGenerator() = 0;
+
+    virtual void ReportCounters(const std::vector<Timestamp>& counterValues) = 0;
+
+    virtual CounterStatus GetCounterStatus(uint16_t backendCounterId) = 0;
+
+    virtual std::vector<CounterStatus> GetActiveCounters() = 0;
+
+    virtual bool IsProfilingEnabled() const = 0;
+
+};
+
+}    // namespace profiling
+
+}    // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/backends/profiling/IBackendProfilingContext.hpp b/arch/arm/ARMnn/include/armnn/backends/profiling/IBackendProfilingContext.hpp
new file mode 100644
index 0000000000..77959e959b
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/backends/profiling/IBackendProfilingContext.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "IBackendProfiling.hpp"
+#include <armnn/IRuntime.hpp>
+#include <vector>
+
+namespace armnn
+{
+namespace profiling
+{
+
+class IBackendProfilingContext
+{
+public:
+    virtual ~IBackendProfilingContext()
+    {}
+    virtual uint16_t RegisterCounters(uint16_t currentMaxGlobalCounterID) = 0;
+    virtual Optional<std::string> ActivateCounters(uint32_t capturePeriod, const std::vector<uint16_t>& counterIds) = 0;
+    virtual std::vector<Timestamp> ReportCounterValues() = 0;
+    virtual bool EnableProfiling(bool flag) = 0;
+    virtual bool EnableTimelineReporting(bool flag) = 0;
+};
+
+using IBackendProfilingContextUniquePtr = std::unique_ptr<IBackendProfilingContext>;
+}    // namespace profiling
+}    // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/profiling/ILocalPacketHandler.hpp b/arch/arm/ARMnn/include/armnn/profiling/ILocalPacketHandler.hpp
new file mode 100644
index 0000000000..af1b0f88b8
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/profiling/ILocalPacketHandler.hpp
@@ -0,0 +1,78 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+// forward declare to prevent a circular dependency
+namespace arm
+{
+namespace pipe
+{
+    class Packet;
+} // namespace pipe
+} // namespace arm
+
+namespace armnn
+{
+
+namespace profiling
+{
+
+enum class TargetEndianness
+{
+    BeWire,
+    LeWire
+};
+
+// the handlers need to be able to do two
+// things to service the FileOnlyProfilingConnection
+// and any other implementation of IProfilingConnection
+// set the endianness and write a packet back i.e.
+// return a packet and close the connection
+class IInternalProfilingConnection
+{
+public:
+    virtual ~IInternalProfilingConnection() {};
+
+    virtual void SetEndianess(const TargetEndianness& endianness) = 0;
+
+    virtual void ReturnPacket(arm::pipe::Packet& packet) = 0;
+
+    virtual void Close() = 0;
+};
+
+class ILocalPacketHandler
+{
+public:
+    virtual ~ILocalPacketHandler() {};
+
+    /// @return lists the headers of the packets that this handler accepts
+    ///         only these packets will get sent to this handler.
+    ///         If this function returns an empty list then ALL packets
+    ///         will be sent to the PacketHandler i.e. a universal handler.
+    virtual std::vector<uint32_t> GetHeadersAccepted() = 0;
+
+    /// process the packet
+    virtual void HandlePacket(const arm::pipe::Packet& packet) = 0;
+
+    /// Set a profiling connection on the handler. Only need to implement this
+    /// function if the handler will be writing data back to the profiled application.
+    virtual void SetConnection(IInternalProfilingConnection* profilingConnection)
+    {armnn::IgnoreUnused(profilingConnection);}
+};
+
+using ILocalPacketHandlerPtr = std::unique_ptr<ILocalPacketHandler>;
+using ILocalPacketHandlerSharedPtr = std::shared_ptr<ILocalPacketHandler>;
+
+} // namespace profiling
+
+} // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/profiling/ISendTimelinePacket.hpp b/arch/arm/ARMnn/include/armnn/profiling/ISendTimelinePacket.hpp
new file mode 100644
index 0000000000..c5da5219f7
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/profiling/ISendTimelinePacket.hpp
@@ -0,0 +1,61 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+namespace armnn
+{
+
+namespace profiling
+{
+
+enum class ProfilingRelationshipType
+{
+    RetentionLink,    /// Head retains(parents) Tail
+    ExecutionLink,    /// Head execution start depends on Tail execution completion
+    DataLink,         /// Head uses data of Tail
+    LabelLink         /// Head uses label Tail (Tail MUST be a guid of a label).
+};
+
+class ISendTimelinePacket
+{
+public:
+    virtual ~ISendTimelinePacket()
+    {}
+
+    /// Commits the current buffer and reset the member variables
+    virtual void Commit() = 0;
+
+    /// Create and write a TimelineEntityBinaryPacket from the parameters to the buffer.
+    virtual void SendTimelineEntityBinaryPacket(uint64_t profilingGuid) = 0;
+
+    /// Create and write a TimelineEventBinaryPacket from the parameters to the buffer.
+    virtual void
+        SendTimelineEventBinaryPacket(uint64_t timestamp, int threadId, uint64_t profilingGuid) = 0;
+
+    /// Create and write a TimelineEventClassBinaryPacket from the parameters to the buffer.
+    virtual void SendTimelineEventClassBinaryPacket(uint64_t profilingGuid, uint64_t nameGuid) = 0;
+
+    /// Create and write a TimelineLabelBinaryPacket from the parameters to the buffer.
+    virtual void SendTimelineLabelBinaryPacket(uint64_t profilingGuid, const std::string& label) = 0;
+
+    /// Create and write a TimelineMessageDirectoryPackage in the buffer
+    virtual void SendTimelineMessageDirectoryPackage() = 0;
+
+    /// Create and write a TimelineRelationshipBinaryPacket from the parameters to the buffer.
+    virtual void SendTimelineRelationshipBinaryPacket(ProfilingRelationshipType relationshipType,
+                                                      uint64_t relationshipGuid,
+                                                      uint64_t headGuid,
+                                                      uint64_t tailGuid,
+                                                      uint64_t attributeGuid) = 0;
+};
+
+}    // namespace profiling
+
+}    // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/utility/Assert.hpp b/arch/arm/ARMnn/include/armnn/utility/Assert.hpp
new file mode 100644
index 0000000000..3c336b36f5
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/utility/Assert.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cassert>
+
+namespace armnn
+{
+
+#ifndef NDEBUG
+#   define ARMNN_ASSERT(COND) assert(COND)
+#   define ARMNN_ASSERT_MSG(COND, MSG) assert((COND) && MSG)
+#else
+#   define ARMNN_ASSERT(COND)
+#   define ARMNN_ASSERT_MSG(COND, MSG)
+#endif
+
+} //namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/utility/IgnoreUnused.hpp b/arch/arm/ARMnn/include/armnn/utility/IgnoreUnused.hpp
new file mode 100644
index 0000000000..1534f774b9
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/utility/IgnoreUnused.hpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace armnn
+{
+
+// Utility function to selectively silence unused variable compiler warnings
+
+template<typename ... Ts>
+inline void IgnoreUnused(Ts&&...){}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/utility/NumericCast.hpp b/arch/arm/ARMnn/include/armnn/utility/NumericCast.hpp
new file mode 100644
index 0000000000..cdbfd89638
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/utility/NumericCast.hpp
@@ -0,0 +1,250 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Assert.hpp"
+
+#include <type_traits>
+#include <limits>
+
+namespace armnn
+{
+
+#if !defined(NDEBUG) || defined(ARMNN_NUMERIC_CAST_TESTABLE)
+#define ENABLE_NUMERIC_CAST_CHECKS 1
+#else
+#define ENABLE_NUMERIC_CAST_CHECKS 0
+#endif
+
+#if defined(ARMNN_NUMERIC_CAST_TESTABLE)
+#   define ARMNN_NUMERIC_CAST_CHECK(cond, msg) ConditionalThrow<std::bad_cast>(cond)
+#else
+#define ARMNN_NUMERIC_CAST_CHECK(cond, msg) ARMNN_ASSERT_MSG(cond, msg)
+#endif
+
+// Unsigned to Unsigned
+
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_unsigned<Source>::value &&
+    std::is_unsigned<Dest>::value,
+    Dest>
+numeric_cast(Source source)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (source > std::numeric_limits<Dest>::max())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to "
+                                        "narrower unsigned type. Overflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(source);
+}
+
+// Signed to Signed
+
+// numeric cast from signed integral to signed integral types, checked for narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_signed<Source>::value &&
+    std::is_integral<Source>::value &&
+    std::is_signed<Dest>::value &&
+    std::is_integral<Dest>::value,
+    Dest>
+numeric_cast(Source source)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (source > std::numeric_limits<Dest>::max())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
+                                        "Overflow detected.");
+    }
+
+    if (source < std::numeric_limits<Dest>::lowest())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
+                                        "Underflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(source);
+}
+
+// numeric cast from floating point to floating point types, checked for narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_floating_point<Source>::value &&
+    std::is_floating_point<Dest>::value,
+    Dest>
+numeric_cast(Source source)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (source > std::numeric_limits<Dest>::max())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
+                                        "Overflow detected.");
+    }
+
+    if (source < std::numeric_limits<Dest>::lowest())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
+                                        "Underflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(source);
+}
+
+// numeric cast from floating point types (signed) to signed integral types, checked for narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_floating_point<Source>::value &&
+    std::is_signed<Dest>::value &&
+    std::is_integral<Dest>::value,
+    Dest>
+numeric_cast(Source source)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (source > static_cast<Source>(std::numeric_limits<Dest>::max()))
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
+                                        "Overflow detected.");
+    }
+
+    if (source < static_cast<Source>(std::numeric_limits<Dest>::lowest()))
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
+                                        "Underflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(source);
+}
+
+// numeric cast from signed integral types to floating point types (signed), checked for narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_signed<Source>::value &&
+    std::is_integral<Source>::value &&
+    std::is_floating_point<Dest>::value,
+    Dest>
+numeric_cast(Source source)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    Dest sourceConverted = static_cast<Dest>(source);
+
+    if (sourceConverted > std::numeric_limits<Dest>::max())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
+                                        "Overflow detected.");
+    }
+
+    if (sourceConverted < std::numeric_limits<Dest>::lowest())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
+                                        "Underflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(source);
+}
+
+// Unsigned to Signed
+
+// numeric cast from unsigned integral type to signed integral type, checked for narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_signed<Dest>::value &&
+    std::is_integral<Dest>::value &&
+    std::is_unsigned<Source>::value,
+    Dest>
+numeric_cast(Source sValue)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (sValue > static_cast< typename std::make_unsigned<Dest>::type >(std::numeric_limits<Dest>::max()))
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to signed type. "
+                                        "Overflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(sValue);
+}
+
+// numeric cast from unsigned integral type to floating point (signed), checked for narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_floating_point<Dest>::value &&
+    std::is_unsigned<Source>::value,
+    Dest>
+numeric_cast(Source sValue)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (static_cast<Dest>(sValue) > std::numeric_limits<Dest>::max())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to floating point type. "
+                                        "Overflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(sValue);
+}
+
+// Signed to Unsigned
+
+// numeric cast from signed integral types to unsigned integral type, checked for underflows and narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_unsigned<Dest>::value &&
+    std::is_signed<Source>::value &&
+    std::is_integral<Source>::value,
+    Dest>
+numeric_cast(Source sValue)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (sValue < 0)
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
+                                        "Underflow detected.");
+    }
+
+    if (static_cast< typename std::make_unsigned<Source>::type >(sValue) > std::numeric_limits<Dest>::max())
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to unsigned type. "
+                                        "Overflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+    return static_cast<Dest>(sValue);
+}
+
+// numeric cast from floating point (signed) to unsigned integral type, checked for underflows and narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_unsigned<Dest>::value &&
+    std::is_floating_point<Source>::value,
+    Dest>
+numeric_cast(Source sValue)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (sValue < 0)
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
+                                        "Underflow detected.");
+    }
+
+    if (sValue > static_cast<Source>(std::numeric_limits<Dest>::max()))
+    {
+        ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to unsigned type. "
+                                        "Overflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+    return static_cast<Dest>(sValue);
+}
+
+#undef ENABLE_NUMERIC_CAST_CHECKS
+
+} //namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/utility/PolymorphicDowncast.hpp b/arch/arm/ARMnn/include/armnn/utility/PolymorphicDowncast.hpp
new file mode 100644
index 0000000000..76b00fa888
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/utility/PolymorphicDowncast.hpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Assert.hpp"
+
+#include <armnn/Exceptions.hpp>
+
+#include <memory>
+#include <type_traits>
+
+namespace armnn
+{
+
+// If we are testing then throw an exception, otherwise regular assert
+#if defined(ARMNN_POLYMORPHIC_CAST_TESTABLE)
+#   define ARMNN_POLYMORPHIC_CAST_CHECK_METHOD(cond) ConditionalThrow<std::bad_cast>(cond)
+#else
+#   define ARMNN_POLYMORPHIC_CAST_CHECK_METHOD(cond) ARMNN_ASSERT(cond)
+#endif
+
+//Only check the condition if debug build or during testing
+#if !defined(NDEBUG) || defined(ARMNN_POLYMORPHIC_CAST_TESTABLE)
+#   define ARMNN_POLYMORPHIC_CAST_CHECK(cond)  ARMNN_POLYMORPHIC_CAST_CHECK_METHOD(cond)
+#else
+#   define ARMNN_POLYMORPHIC_CAST_CHECK(cond) // release builds dont check the cast
+#endif
+
+
+namespace utility
+{
+// static_pointer_cast overload for std::shared_ptr
+template <class T1, class T2>
+std::shared_ptr<T1> StaticPointerCast (const std::shared_ptr<T2>& sp)
+{
+    return std::static_pointer_cast<T1>(sp);
+}
+
+// dynamic_pointer_cast overload for std::shared_ptr
+template <class T1, class T2>
+std::shared_ptr<T1> DynamicPointerCast (const std::shared_ptr<T2>& sp)
+{
+    return std::dynamic_pointer_cast<T1>(sp);
+}
+
+// static_pointer_cast overload for raw pointers
+template<class T1, class T2>
+inline T1* StaticPointerCast(T2* ptr)
+{
+    return static_cast<T1*>(ptr);
+}
+
+// dynamic_pointer_cast overload for raw pointers
+template<class T1, class T2>
+inline T1* DynamicPointerCast(T2* ptr)
+{
+    return dynamic_cast<T1*>(ptr);
+}
+
+} // namespace utility
+
+/// Polymorphic downcast for build in pointers only
+///
+/// Usage: Child* pChild = PolymorphicDowncast<Child*>(pBase);
+///
+/// \tparam DestType    Pointer type to the target object (Child pointer type)
+/// \tparam SourceType  Pointer type to the source object (Base pointer type)
+/// \param value        Pointer to the source object
+/// \return             Pointer of type DestType (Pointer of type child)
+template<typename DestType, typename SourceType>
+DestType PolymorphicDowncast(SourceType* value)
+{
+    static_assert(std::is_pointer<DestType>::value,
+                  "PolymorphicDowncast only works with pointer types.");
+
+    ARMNN_POLYMORPHIC_CAST_CHECK(dynamic_cast<DestType>(value) == value);
+    return static_cast<DestType>(value);
+}
+
+
+/// Polymorphic downcast for shared pointers and build in pointers
+///
+/// Usage: auto pChild = PolymorphicPointerDowncast<Child>(pBase)
+///
+/// \tparam DestType    Type of the target object (Child type)
+/// \tparam SourceType  Pointer type to the source object (Base (shared) pointer type)
+/// \param value        Pointer to the source object
+/// \return             Pointer of type DestType ((Shared) pointer of type child)
+template<typename DestType, typename SourceType>
+auto PolymorphicPointerDowncast(const SourceType& value)
+{
+    ARMNN_POLYMORPHIC_CAST_CHECK(utility::DynamicPointerCast<DestType>(value)
+                                 == value);
+    return utility::StaticPointerCast<DestType>(value);
+}
+
+} //namespace armnn
diff --git a/arch/arm/ARMnn/include/armnn/utility/StringUtils.hpp b/arch/arm/ARMnn/include/armnn/utility/StringUtils.hpp
new file mode 100644
index 0000000000..172b1798c5
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/utility/StringUtils.hpp
@@ -0,0 +1,164 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <vector>
+#include <armnn/Exceptions.hpp>
+
+namespace armnn
+{
+
+namespace stringUtils
+{
+
+/// Function to take a string and a list of delimiters and split the string into tokens based on those delimiters
+/// This assumes that tokens are also to be split by newlines
+/// Enabling tokenCompression merges adjacent delimiters together, preventing empty tokens
+inline std::vector<std::string> StringTokenizer(const std::string& str,
+                                                const char* delimiters,
+                                                bool tokenCompression = true)
+{
+    std::stringstream stringStream(str);
+    std::string line;
+    std::vector<std::string> tokenVector;
+    while (std::getline(stringStream, line))
+    {
+        std::size_t prev = 0;
+        std::size_t pos;
+        while ((pos = line.find_first_of(delimiters, prev)) != std::string::npos)
+        {
+            // Ignore adjacent tokens
+            if (pos > prev)
+            {
+                tokenVector.push_back(line.substr(prev, pos - prev));
+            }
+            // Unless token compression is disabled
+            else if (!tokenCompression)
+            {
+                tokenVector.push_back(line.substr(prev, pos - prev));
+            }
+            prev = pos + 1;
+        }
+        if (prev < line.length())
+        {
+            tokenVector.push_back(line.substr(prev, std::string::npos));
+        }
+    }
+    return tokenVector;
+}
+
+// Set of 3 utility functions for trimming std::strings
+// Default char set for common whitespace characters
+
+///
+/// Trim from the start of a string
+///
+inline std::string& StringStartTrim(std::string& str, const std::string& chars = "\t\n\v\f\r ")
+{
+    str.erase(0, str.find_first_not_of(chars));
+    return str;
+}
+
+///
+/// Trim for the end of a string
+///
+inline std::string& StringEndTrim(std::string& str, const std::string& chars = "\t\n\v\f\r ")
+{
+    str.erase(str.find_last_not_of(chars) + 1);
+    return str;
+}
+
+///
+/// Trim from both the start and the end of a string
+///
+inline std::string& StringTrim(std::string& str, const std::string& chars = "\t\n\v\f\r ")
+{
+    return StringStartTrim(StringEndTrim(str, chars), chars);
+}
+
+///
+/// Trim from both the start and the end of a string, returns a trimmed copy of the string
+///
+inline std::string StringTrimCopy(const std::string& str, const std::string& chars = "\t\n\v\f\r ")
+{
+    std::string strCopy = str;
+    return StringStartTrim(StringEndTrim(strCopy, chars), chars);
+}
+
+/// Takes a vector of strings and concatenates them together into one long std::string with an optional
+/// seperator between each.
+inline std::string StringConcat(const std::vector<std::string>& strings, std::string seperator = "")
+{
+    std::stringstream ss;
+    for (auto string : strings)
+    {
+        ss << string << seperator;
+    }
+    return ss.str();
+}
+
+///
+/// Iterates over a given str and replaces all instance of substring oldStr with newStr
+///
+inline void StringReplaceAll(std::string& str,
+                             const std::string& oldStr,
+                             const std::string& newStr)
+{
+    std::string::size_type pos = 0u;
+    while ((pos = str.find(oldStr, pos)) != std::string::npos)
+    {
+        str.replace(pos, oldStr.length(), newStr);
+        pos += newStr.length();
+    }
+}
+
+///
+/// Converts a string to bool.
+/// Accepts "true", "false" (case-insensitive) and numbers, 1 (true) or 0 (false).
+///
+/// \param s               String to convert to bool
+/// \param throw_on_error  Bool variable to suppress error if conversion failed (Will return false in that case)
+/// \return bool value
+///
+inline bool StringToBool(const std::string& s, bool throw_on_error = true)
+{
+    // in case of failure to convert returns false
+    auto result = false;
+
+    // isstringstream fails if parsing didn't work
+    std::istringstream is(s);
+
+    // try integer conversion first. For the case s is a number
+    is >> result;
+
+    if (is.fail())
+    {
+        // transform to lower case to make case-insensitive
+        std::string s_lower = s;
+        std::transform(s_lower.begin(),
+                       s_lower.end(),
+                       s_lower.begin(),
+                       [](unsigned char c){ return std::tolower(c); });
+        is.str(s_lower);
+        // try boolean -> s="false" or "true"
+        is.clear();
+        is >> std::boolalpha >> result;
+    }
+
+    if (is.fail() && throw_on_error)
+    {
+        throw armnn::InvalidArgumentException(s + " is not convertable to bool");
+    }
+
+    return result;
+}
+
+} // namespace stringUtils
+
+} // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/utility/Timer.hpp b/arch/arm/ARMnn/include/armnn/utility/Timer.hpp
new file mode 100644
index 0000000000..daf689e74f
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/utility/Timer.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <chrono>
+#include <iomanip>
+
+namespace armnn
+{
+
+inline std::chrono::high_resolution_clock::time_point GetTimeNow()
+{
+    return std::chrono::high_resolution_clock::now();
+}
+
+inline std::chrono::duration<double, std::milli> GetTimeDuration(
+        std::chrono::high_resolution_clock::time_point start_time)
+{
+    return std::chrono::duration<double, std::milli>(GetTimeNow() - start_time);
+}
+
+}
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnn/utility/TransformIterator.hpp b/arch/arm/ARMnn/include/armnn/utility/TransformIterator.hpp
new file mode 100644
index 0000000000..f65ac790d0
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnn/utility/TransformIterator.hpp
@@ -0,0 +1,86 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <iterator>
+
+namespace armnn
+{
+
+template<typename Function,
+        typename Iterator,
+        typename Category = typename std::iterator_traits<Iterator>::iterator_category,
+        typename T = typename std::iterator_traits<Iterator>::value_type,
+        typename Distance = typename std::iterator_traits<Iterator>::difference_type,
+        typename Pointer = typename std::iterator_traits<Iterator>::pointer,
+        typename Reference =
+        typename std::result_of<const Function(typename std::iterator_traits<Iterator>::reference)>::type
+>
+class TransformIterator : public std::iterator<Category, T, Distance, Pointer, Reference>
+{
+
+public:
+
+    TransformIterator() = default;
+    TransformIterator(TransformIterator const& transformIterator) = default;
+    TransformIterator(TransformIterator&& transformIterator) = default;
+
+    TransformIterator(Iterator& it, Function fn) : m_it(it), m_fn(fn) {}
+    TransformIterator(Iterator&& it, Function fn) : m_it(it), m_fn(fn) {}
+
+    ~TransformIterator() = default;
+
+    TransformIterator& operator=(TransformIterator const& rhs)
+    {
+        m_fn = rhs.m_fn;
+        m_it = rhs.m_it;
+        return *this;
+    }
+
+    TransformIterator& operator=(TransformIterator&& rhs)
+    {
+        m_fn = std::move(rhs.m_fn);
+        m_it = std::move(rhs.m_it);
+        return *this;
+    }
+
+    TransformIterator operator++() {++m_it; return *this;}
+    TransformIterator operator--() {--m_it; return *this;}
+
+    TransformIterator operator++() const {++m_it; return *this;}
+    TransformIterator operator--() const {--m_it; return *this;}
+
+    TransformIterator operator++(int n) const {m_it += n; return *this;}
+    TransformIterator operator--(int n) const {m_it -= n; return *this;}
+
+    TransformIterator operator[](Distance n) const {m_it[n]; return  *this;};
+
+    Distance operator-(const TransformIterator& other) {return m_it - other.m_it;}
+
+    TransformIterator operator-(const Distance n) {return {m_it - n, m_fn};}
+    TransformIterator operator+(const Distance n) {return {m_it + n, m_fn};}
+
+    bool operator>(const TransformIterator& rhs) const {return m_it > rhs.m_it;}
+    bool operator<(const TransformIterator& rhs) const {return m_it < rhs.m_it;}
+    bool operator>=(const TransformIterator& rhs) const {return m_it >= rhs.m_it;}
+    bool operator<=(const TransformIterator& rhs) const {return m_it <= rhs.m_it;}
+
+    bool operator==(TransformIterator other) const {return (m_it == other.m_it);}
+    bool operator!=(TransformIterator other) const {return !(m_it == other.m_it);}
+
+    Reference operator*() const {return m_fn(*m_it);}
+
+private:
+    Iterator m_it;
+    Function m_fn;
+};
+
+template<typename Function, typename Iterator>
+constexpr TransformIterator<Function, Iterator> MakeTransformIterator(Iterator i, Function f)
+{
+    return TransformIterator<Function, Iterator>(i, f);
+}
+
+}
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnnDeserializer/IDeserializer.hpp b/arch/arm/ARMnn/include/armnnDeserializer/IDeserializer.hpp
new file mode 100644
index 0000000000..071e7984b7
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnDeserializer/IDeserializer.hpp
@@ -0,0 +1,55 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "armnn/Types.hpp"
+#include "armnn/NetworkFwd.hpp"
+#include "armnn/Tensor.hpp"
+#include "armnn/INetwork.hpp"
+
+#include <memory>
+#include <map>
+#include <vector>
+
+namespace armnnDeserializer
+{
+struct BindingPointInfo
+{
+    armnn::LayerBindingId   m_BindingId;
+    armnn::TensorInfo       m_TensorInfo;
+};
+
+class IDeserializer;
+using IDeserializerPtr = std::unique_ptr<IDeserializer, void(*)(IDeserializer* parser)>;
+
+class IDeserializer
+{
+public:
+    static IDeserializer* CreateRaw();
+    static IDeserializerPtr Create();
+    static void Destroy(IDeserializer* parser);
+
+    /// Create an input network from binary file contents
+    armnn::INetworkPtr CreateNetworkFromBinary(const std::vector<uint8_t>& binaryContent);
+
+    /// Create an input network from a binary input stream
+    armnn::INetworkPtr CreateNetworkFromBinary(std::istream& binaryContent);
+
+    /// Retrieve binding info (layer id and tensor info) for the network input identified by
+    /// the given layer name and layers id
+    BindingPointInfo GetNetworkInputBindingInfo(unsigned int layerId, const std::string& name) const;
+
+    /// Retrieve binding info (layer id and tensor info) for the network output identified by
+    /// the given layer name and layers id
+    BindingPointInfo GetNetworkOutputBindingInfo(unsigned int layerId, const std::string& name) const;
+
+private:
+    IDeserializer();
+    ~IDeserializer();
+
+    class DeserializerImpl;
+    std::unique_ptr<DeserializerImpl> pDeserializerImpl;
+};
+} //namespace armnnDeserializer
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnnOnnxParser/IOnnxParser.hpp b/arch/arm/ARMnn/include/armnnOnnxParser/IOnnxParser.hpp
new file mode 100644
index 0000000000..ba7fc83f93
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnOnnxParser/IOnnxParser.hpp
@@ -0,0 +1,65 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/INetwork.hpp>
+#include <armnn/Tensor.hpp>
+
+#include <memory>
+#include <vector>
+#include <map>
+
+namespace armnnOnnxParser
+{
+
+using BindingPointInfo = armnn::BindingPointInfo;
+
+class OnnxParserImpl;
+class IOnnxParser;
+using IOnnxParserPtr = std::unique_ptr<IOnnxParser, void(*)(IOnnxParser* parser)>;
+
+class IOnnxParser
+{
+public:
+    static IOnnxParser* CreateRaw();
+    static IOnnxParserPtr Create();
+    static void Destroy(IOnnxParser* parser);
+
+    /// Create the network from a protobuf binary file on disk
+    armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile);
+
+    /// Create the network from a protobuf text file on disk
+    armnn::INetworkPtr CreateNetworkFromTextFile(const char* graphFile);
+
+    /// Create the network directly from protobuf text in a string. Useful for debugging/testing
+    armnn::INetworkPtr CreateNetworkFromString(const std::string& protoText);
+
+    /// Create the network from a protobuf binary file on disk, with inputShapes specified
+    armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile,
+                                                   const std::map<std::string, armnn::TensorShape>& inputShapes);
+
+    /// Create the network from a protobuf text file on disk, with inputShapes specified
+    armnn::INetworkPtr CreateNetworkFromTextFile(const char* graphFile,
+                                                 const std::map<std::string, armnn::TensorShape>& inputShapes);
+
+     /// Create the network directly from protobuf text in a string, with inputShapes specified.
+     /// Useful for debugging/testing
+    armnn::INetworkPtr CreateNetworkFromString(const std::string& protoText,
+                                               const std::map<std::string, armnn::TensorShape>& inputShapes);
+
+    /// Retrieve binding info (layer id and tensor info) for the network input identified by the given layer name
+    BindingPointInfo GetNetworkInputBindingInfo(const std::string& name) const;
+
+    /// Retrieve binding info (layer id and tensor info) for the network output identified by the given layer name
+    BindingPointInfo GetNetworkOutputBindingInfo(const std::string& name) const;
+
+private:
+    IOnnxParser();
+    ~IOnnxParser();
+
+    std::unique_ptr<OnnxParserImpl> pOnnxParserImpl;
+  };
+
+  }
diff --git a/arch/arm/ARMnn/include/armnnOnnxParser/Version.hpp b/arch/arm/ARMnn/include/armnnOnnxParser/Version.hpp
new file mode 100644
index 0000000000..ed9d8690ec
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnOnnxParser/Version.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace armnnOnnxParser
+{
+
+/// Macro utils
+#define STRINGIFY_VALUE(s) STRINGIFY_MACRO(s)
+#define STRINGIFY_MACRO(s) #s
+
+// OnnxParser version components
+#define ONNX_PARSER_MAJOR_VERSION 24
+#define ONNX_PARSER_MINOR_VERSION 4
+#define ONNX_PARSER_PATCH_VERSION 0
+
+/// ONNX_PARSER_VERSION: "X.Y.Z"
+/// where:
+///   X = Major version number
+///   Y = Minor version number
+///   Z = Patch version number
+#define ONNX_PARSER_VERSION STRINGIFY_VALUE(ONNX_PARSER_MAJOR_VERSION) "." \
+                            STRINGIFY_VALUE(ONNX_PARSER_MINOR_VERSION) "." \
+                            STRINGIFY_VALUE(ONNX_PARSER_PATCH_VERSION)
+
+} //namespace armnnOnnxParser
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnnSerializer/ISerializer.hpp b/arch/arm/ARMnn/include/armnnSerializer/ISerializer.hpp
new file mode 100644
index 0000000000..5c6b3cc71f
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnSerializer/ISerializer.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "armnn/INetwork.hpp"
+#include "armnn/NetworkFwd.hpp"
+#include "armnn/Types.hpp"
+
+namespace armnnSerializer
+{
+
+class ISerializer;
+using ISerializerPtr = std::unique_ptr<ISerializer, void(*)(ISerializer* serializer)>;
+
+class ISerializer
+{
+public:
+    static ISerializer* CreateRaw();
+    static ISerializerPtr Create();
+    static void Destroy(ISerializer* serializer);
+
+    /// Serializes the network to ArmNN SerializedGraph.
+    /// @param [in] inNetwork The network to be serialized.
+    void Serialize(const armnn::INetwork& inNetwork);
+
+    /// Serializes the SerializedGraph to the stream.
+    /// @param [stream] the stream to save to
+    /// @return true if graph is Serialized to the Stream, false otherwise
+    bool SaveSerializedToStream(std::ostream& stream);
+
+private:
+    ISerializer();
+    ~ISerializer();
+
+    class SerializerImpl;
+    std::unique_ptr<SerializerImpl> pSerializerImpl;
+};
+
+} //namespace armnnSerializer
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/DataLayoutUtils.hpp b/arch/arm/ARMnn/include/armnnTestUtils/DataLayoutUtils.hpp
new file mode 100644
index 0000000000..fde6f172cc
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/DataLayoutUtils.hpp
@@ -0,0 +1,60 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
+
+#include <armnnUtils/Permute.hpp>
+
+template<typename T>
+void PermuteTensorNchwToNhwc(armnn::TensorInfo& tensorInfo, std::vector<T>& tensorData)
+{
+    const armnn::PermutationVector nchwToNhwc = { 0, 3, 1, 2 };
+
+    tensorInfo = armnnUtils::Permuted(tensorInfo, nchwToNhwc);
+
+    std::vector<T> tmp(tensorData.size());
+    armnnUtils::Permute(tensorInfo.GetShape(), nchwToNhwc, tensorData.data(), tmp.data(), sizeof(T));
+    tensorData = tmp;
+}
+
+template<typename T>
+void PermuteTensorNhwcToNchw(armnn::TensorInfo& tensorInfo, std::vector<T>& tensorData)
+{
+    const armnn::PermutationVector nhwcToNchw = { 0, 2, 3, 1 };
+
+    tensorInfo = armnnUtils::Permuted(tensorInfo, nhwcToNchw);
+
+    std::vector<T> tmp(tensorData.size());
+    armnnUtils::Permute(tensorInfo.GetShape(), nhwcToNchw, tensorData.data(), tmp.data(), sizeof(T));
+
+    tensorData = tmp;
+}
+
+template<typename T>
+void PermuteTensorNdhwcToNcdhw(armnn::TensorInfo& tensorInfo, std::vector<T>& tensorData)
+{
+    const armnn::PermutationVector ndhwcToNcdhw = { 0, 2, 3, 4, 1 };
+
+    tensorInfo = armnnUtils::Permuted(tensorInfo, ndhwcToNcdhw);
+
+    std::vector<T> tmp(tensorData.size());
+    armnnUtils::Permute(tensorInfo.GetShape(), ndhwcToNcdhw, tensorData.data(), tmp.data(), sizeof(T));
+    tensorData = tmp;
+}
+
+template<typename T>
+void PermuteTensorNcdhwToNdhwc(armnn::TensorInfo& tensorInfo, std::vector<T>& tensorData)
+{
+    const armnn::PermutationVector ncdhwToNdhwc = { 0, 4, 1, 2, 3 };
+
+    tensorInfo = armnnUtils::Permuted(tensorInfo, ncdhwToNdhwc);
+
+    std::vector<T> tmp(tensorData.size());
+    armnnUtils::Permute(tensorInfo.GetShape(), ncdhwToNdhwc, tensorData.data(), tmp.data(), sizeof(T));
+    tensorData = tmp;
+}
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/LayerTestResult.hpp b/arch/arm/ARMnn/include/armnnTestUtils/LayerTestResult.hpp
new file mode 100644
index 0000000000..410973e4b1
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/LayerTestResult.hpp
@@ -0,0 +1,63 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+#include <armnn/utility/Assert.hpp>
+
+#include <cstddef>
+#include <vector>
+
+template <typename T, std::size_t n>
+struct LayerTestResult
+{
+    LayerTestResult(const armnn::TensorInfo& outputInfo)
+        : m_Supported(true)
+        , m_CompareBoolean(false)
+    {
+        m_ActualData.reserve(outputInfo.GetNumElements());
+        m_ExpectedData.reserve(outputInfo.GetNumElements());
+        m_ActualShape = outputInfo.GetShape();
+        m_ExpectedShape = outputInfo.GetShape();
+    }
+
+    LayerTestResult(const std::vector<T>& actualData,
+                    const std::vector<T>& expectedData,
+                    const armnn::TensorShape& actualShape,
+                    const armnn::TensorShape& expectedShape)
+        : m_ActualData(actualData)
+        , m_ExpectedData(expectedData)
+        , m_ActualShape(actualShape)
+        , m_ExpectedShape(expectedShape)
+        , m_Supported(true)
+        , m_CompareBoolean(false)
+    {}
+
+    LayerTestResult(const std::vector<T>& actualData,
+                    const std::vector<T>& expectedData,
+                    const armnn::TensorShape& actualShape,
+                    const armnn::TensorShape& expectedShape,
+                    const bool compareBoolean)
+        : m_ActualData(actualData)
+        , m_ExpectedData(expectedData)
+        , m_ActualShape(actualShape)
+        , m_ExpectedShape(expectedShape)
+        , m_Supported(true)
+        , m_CompareBoolean(compareBoolean)
+    {}
+
+    std::vector<T> m_ActualData;
+    std::vector<T> m_ExpectedData;
+    armnn::TensorShape m_ActualShape;
+    armnn::TensorShape m_ExpectedShape;
+
+    bool m_Supported;
+    bool m_CompareBoolean;
+};
+
+
+
+
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/MemCopyTestImpl.hpp b/arch/arm/ARMnn/include/armnnTestUtils/MemCopyTestImpl.hpp
new file mode 100644
index 0000000000..1856dcb056
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/MemCopyTestImpl.hpp
@@ -0,0 +1,115 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "LayerTestResult.hpp"
+#include "TensorCopyUtils.hpp"
+#include "TensorHelpers.hpp"
+#include "WorkloadTestUtils.hpp"
+#include <ResolveType.hpp>
+#include <armnn/backends/IBackendInternal.hpp>
+#include <armnnTestUtils/MockBackend.hpp>
+
+namespace
+{
+
+template<armnn::DataType dataType, typename T = armnn::ResolveType<dataType>>
+LayerTestResult<T, 4> MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory,
+                                  armnn::IWorkloadFactory& dstWorkloadFactory,
+                                  bool withSubtensors)
+{
+    const std::array<unsigned int, 4> shapeData = { { 1u, 1u, 6u, 5u } };
+    const armnn::TensorShape tensorShape(4, shapeData.data());
+    const armnn::TensorInfo tensorInfo(tensorShape, dataType);
+    std::vector<T> inputData =
+    {
+         1,  2,  3,  4,  5,
+         6,  7,  8,  9, 10,
+        11, 12, 13, 14, 15,
+        16, 17, 18, 19, 20,
+        21, 22, 23, 24, 25,
+        26, 27, 28, 29, 30,
+    };
+
+    LayerTestResult<T, 4> ret(tensorInfo);
+    ret.m_ExpectedData = inputData;
+
+    std::vector<T> actualOutput(tensorInfo.GetNumElements());
+
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
+    auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo);
+    auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo);
+    ARMNN_NO_DEPRECATE_WARN_END
+
+    AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data());
+    outputTensorHandle->Allocate();
+
+    armnn::MemCopyQueueDescriptor memCopyQueueDesc;
+    armnn::WorkloadInfo workloadInfo;
+
+    const unsigned int origin[4] = {};
+
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
+    auto workloadInput  = (withSubtensors && srcWorkloadFactory.SupportsSubTensors())
+                              ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin)
+                              : std::move(inputTensorHandle);
+    auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors())
+                              ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin)
+                              : std::move(outputTensorHandle);
+    ARMNN_NO_DEPRECATE_WARN_END
+
+    AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get());
+    AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get());
+
+    dstWorkloadFactory.CreateWorkload(armnn::LayerType::MemCopy, memCopyQueueDesc, workloadInfo)->Execute();
+
+    CopyDataFromITensorHandle(actualOutput.data(), workloadOutput.get());
+    ret.m_ActualData = actualOutput;
+
+    return ret;
+}
+
+template <typename WorkloadFactoryType>
+struct MemCopyTestHelper
+{};
+template <>
+struct MemCopyTestHelper<armnn::MockWorkloadFactory>
+{
+    static armnn::IBackendInternal::IMemoryManagerSharedPtr GetMemoryManager()
+    {
+        armnn::MockBackend backend;
+        return backend.CreateMemoryManager();
+    }
+
+    static armnn::MockWorkloadFactory
+        GetFactory(const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr)
+    {
+        IgnoreUnused(memoryManager);
+        return armnn::MockWorkloadFactory();
+    }
+};
+
+using MockMemCopyTestHelper = MemCopyTestHelper<armnn::MockWorkloadFactory>;
+
+template <typename SrcWorkloadFactory,
+          typename DstWorkloadFactory,
+          armnn::DataType dataType,
+          typename T = armnn::ResolveType<dataType>>
+LayerTestResult<T, 4> MemCopyTest(bool withSubtensors)
+{
+
+    armnn::IBackendInternal::IMemoryManagerSharedPtr srcMemoryManager =
+        MemCopyTestHelper<SrcWorkloadFactory>::GetMemoryManager();
+
+    armnn::IBackendInternal::IMemoryManagerSharedPtr dstMemoryManager =
+        MemCopyTestHelper<DstWorkloadFactory>::GetMemoryManager();
+
+    SrcWorkloadFactory srcWorkloadFactory = MemCopyTestHelper<SrcWorkloadFactory>::GetFactory(srcMemoryManager);
+    DstWorkloadFactory dstWorkloadFactory = MemCopyTestHelper<DstWorkloadFactory>::GetFactory(dstMemoryManager);
+
+    return MemCopyTest<dataType>(srcWorkloadFactory, dstWorkloadFactory, withSubtensors);
+}
+
+}    // anonymous namespace
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/MockBackend.hpp b/arch/arm/ARMnn/include/armnnTestUtils/MockBackend.hpp
new file mode 100644
index 0000000000..8bc41b3f3f
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/MockBackend.hpp
@@ -0,0 +1,115 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <armnn/backends/MemCopyWorkload.hpp>
+#include <armnnTestUtils/MockTensorHandle.hpp>
+
+namespace armnn
+{
+
+// A bare bones Mock backend to enable unit testing of simple tensor manipulation features.
+class MockBackend : public IBackendInternal
+{
+public:
+    MockBackend() = default;
+
+    ~MockBackend() = default;
+
+    static const BackendId& GetIdStatic();
+
+    const BackendId& GetId() const override
+    {
+        return GetIdStatic();
+    }
+    IBackendInternal::IWorkloadFactoryPtr
+        CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+    {
+        IgnoreUnused(memoryManager);
+        return nullptr;
+    }
+
+    IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+    {
+        return nullptr;
+    };
+};
+
+class MockWorkloadFactory : public IWorkloadFactory
+{
+
+public:
+    explicit MockWorkloadFactory(const std::shared_ptr<MockMemoryManager>& memoryManager);
+    MockWorkloadFactory();
+
+    ~MockWorkloadFactory()
+    {}
+
+    const BackendId& GetBackendId() const override;
+
+    bool SupportsSubTensors() const override
+    {
+        return false;
+    }
+
+    ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
+    std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle&,
+                                                         TensorShape const&,
+                                                         unsigned int const*) const override
+    {
+        return nullptr;
+    }
+
+    ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateTensorHandle instead")
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                      const bool IsMemoryManaged = true) const override
+    {
+        IgnoreUnused(IsMemoryManaged);
+        return std::make_unique<MockTensorHandle>(tensorInfo, m_MemoryManager);
+    };
+
+    ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateTensorHandle instead")
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                      DataLayout dataLayout,
+                                                      const bool IsMemoryManaged = true) const override
+    {
+        IgnoreUnused(dataLayout, IsMemoryManaged);
+        return std::make_unique<MockTensorHandle>(tensorInfo, static_cast<unsigned int>(MemorySource::Malloc));
+    };
+
+    ARMNN_DEPRECATED_MSG_REMOVAL_DATE(
+        "Use ABI stable "
+        "CreateWorkload(LayerType, const QueueDescriptor&, const WorkloadInfo& info) instead.",
+        "22.11")
+    std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override
+    {
+        if (info.m_InputTensorInfos.empty())
+        {
+            throw InvalidArgumentException("MockWorkloadFactory::CreateInput: Input cannot be zero length");
+        }
+        if (info.m_OutputTensorInfos.empty())
+        {
+            throw InvalidArgumentException("MockWorkloadFactory::CreateInput: Output cannot be zero length");
+        }
+
+        if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
+        {
+            throw InvalidArgumentException(
+                "MockWorkloadFactory::CreateInput: data input and output differ in byte count.");
+        }
+
+        return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
+    };
+
+    std::unique_ptr<IWorkload>
+        CreateWorkload(LayerType type, const QueueDescriptor& descriptor, const WorkloadInfo& info) const override;
+
+private:
+    mutable std::shared_ptr<MockMemoryManager> m_MemoryManager;
+};
+
+}    // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/MockMemoryManager.hpp b/arch/arm/ARMnn/include/armnnTestUtils/MockMemoryManager.hpp
new file mode 100644
index 0000000000..38cd56747a
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/MockMemoryManager.hpp
@@ -0,0 +1,59 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/backends/IMemoryManager.hpp>
+
+#include <forward_list>
+#include <vector>
+
+namespace armnn
+{
+
+// An implementation of IMemoryManager to be used with MockTensorHandle
+class MockMemoryManager : public IMemoryManager
+{
+public:
+    MockMemoryManager();
+    virtual ~MockMemoryManager();
+
+    class Pool;
+
+    Pool* Manage(unsigned int numBytes);
+
+    void Allocate(Pool* pool);
+
+    void* GetPointer(Pool* pool);
+
+    void Acquire() override;
+    void Release() override;
+
+    class Pool
+    {
+    public:
+        Pool(unsigned int numBytes);
+        ~Pool();
+
+        void Acquire();
+        void Release();
+
+        void* GetPointer();
+
+        void Reserve(unsigned int numBytes);
+
+    private:
+        unsigned int m_Size;
+        void* m_Pointer;
+    };
+
+private:
+    MockMemoryManager(const MockMemoryManager&) = delete;               // Noncopyable
+    MockMemoryManager& operator=(const MockMemoryManager&) = delete;    // Noncopyable
+
+    std::forward_list<Pool> m_Pools;
+    std::vector<Pool*> m_FreePools;
+};
+
+}    // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/MockTensorHandle.hpp b/arch/arm/ARMnn/include/armnnTestUtils/MockTensorHandle.hpp
new file mode 100644
index 0000000000..9a7518b21a
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/MockTensorHandle.hpp
@@ -0,0 +1,81 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "MockMemoryManager.hpp"
+#include <armnn/backends/TensorHandle.hpp>
+
+namespace armnn
+{
+
+// An implementation of ITensorHandle with simple "bump the pointer" memory-management behaviour
+class MockTensorHandle : public ITensorHandle
+{
+public:
+    MockTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr<MockMemoryManager>& memoryManager);
+
+    MockTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags);
+
+    ~MockTensorHandle() override;
+
+    void Manage() override;
+
+    void Allocate() override;
+
+    ITensorHandle* GetParent() const override
+    {
+        return nullptr;
+    }
+
+    const void* Map(bool /* blocking = true */) const override;
+    using ITensorHandle::Map;
+
+    void Unmap() const override
+    {}
+
+    TensorShape GetStrides() const override
+    {
+        return GetUnpaddedTensorStrides(m_TensorInfo);
+    }
+
+    TensorShape GetShape() const override
+    {
+        return m_TensorInfo.GetShape();
+    }
+
+    const TensorInfo& GetTensorInfo() const
+    {
+        return m_TensorInfo;
+    }
+
+    MemorySourceFlags GetImportFlags() const override
+    {
+        return m_ImportFlags;
+    }
+
+    bool Import(void* memory, MemorySource source) override;
+    bool CanBeImported(void* memory, MemorySource source) override;
+
+private:
+    // Only used for testing
+    void CopyOutTo(void*) const override;
+    void CopyInFrom(const void*) override;
+
+    void* GetPointer() const;
+
+    MockTensorHandle(const MockTensorHandle& other) = delete;               // noncopyable
+    MockTensorHandle& operator=(const MockTensorHandle& other) = delete;    //noncopyable
+
+    TensorInfo m_TensorInfo;
+
+    std::shared_ptr<MockMemoryManager> m_MemoryManager;
+    MockMemoryManager::Pool* m_Pool;
+    mutable void* m_UnmanagedMemory;
+    MemorySourceFlags m_ImportFlags;
+    bool m_Imported;
+    bool m_IsImportEnabled;
+};
+
+}    // namespace armnn
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/PredicateResult.hpp b/arch/arm/ARMnn/include/armnnTestUtils/PredicateResult.hpp
new file mode 100644
index 0000000000..a344c8e3ad
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/PredicateResult.hpp
@@ -0,0 +1,48 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <sstream>
+
+namespace armnn
+{
+
+class PredicateResult
+{
+public:
+    explicit PredicateResult(bool result)
+        : m_Result(result)
+    {}
+
+    PredicateResult(const PredicateResult& predicateResult)
+        : m_Result(predicateResult.m_Result)
+        , m_Message(predicateResult.m_Message.str())
+    {}
+
+    void SetResult(bool newResult)
+    {
+        m_Result = newResult;
+    }
+
+    std::stringstream& Message()
+    {
+        return m_Message;
+    }
+
+    bool operator!() const
+    {
+        return !m_Result;
+    }
+
+    void operator=(PredicateResult otherPredicateResult)
+    {
+        otherPredicateResult.m_Result = m_Result;
+    }
+
+    bool m_Result;
+    std::stringstream m_Message;
+};
+
+}    // namespace armnn
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/TensorCopyUtils.hpp b/arch/arm/ARMnn/include/armnnTestUtils/TensorCopyUtils.hpp
new file mode 100644
index 0000000000..ae6072e46e
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/TensorCopyUtils.hpp
@@ -0,0 +1,15 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+#include <armnn/backends/ITensorHandle.hpp>
+
+void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* memory);
+
+void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle);
+
+void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* memory);
\ No newline at end of file
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/TensorHelpers.hpp b/arch/arm/ARMnn/include/armnnTestUtils/TensorHelpers.hpp
new file mode 100644
index 0000000000..ca17e621c3
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/TensorHelpers.hpp
@@ -0,0 +1,235 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnnTestUtils/PredicateResult.hpp>
+
+#include <armnn/Tensor.hpp>
+#include <armnn/utility/Assert.hpp>
+#include <armnnUtils/FloatingPointComparison.hpp>
+
+#include <armnnUtils/QuantizeHelper.hpp>
+
+#include <doctest/doctest.h>
+
+#include <array>
+#include <cmath>
+#include <random>
+#include <vector>
+
+constexpr float g_FloatCloseToZeroTolerance = 1.0e-6f;
+
+template<typename T, bool isQuantized = true>
+struct SelectiveComparer
+{
+    static bool Compare(T a, T b)
+    {
+        return (std::max(a, b) - std::min(a, b)) <= 1;
+    }
+
+};
+
+template<typename T>
+struct SelectiveComparer<T, false>
+{
+    static bool Compare(T a, T b)
+    {
+        // If a or b is zero, percent_tolerance does an exact match, so compare to a small, constant tolerance instead.
+        if (a == 0.0f || b == 0.0f)
+        {
+            return std::abs(a - b) <= g_FloatCloseToZeroTolerance;
+        }
+
+        if (std::isinf(a) && a == b)
+        {
+            return true;
+        }
+
+        if (std::isnan(a) && std::isnan(b))
+        {
+            return true;
+        }
+
+        // For unquantized floats we use a tolerance of 1%.
+        return armnnUtils::within_percentage_tolerance(a, b);
+    }
+};
+
+template<typename T>
+bool SelectiveCompare(T a, T b)
+{
+    return SelectiveComparer<T, armnn::IsQuantizedType<T>()>::Compare(a, b);
+};
+
+template<typename T>
+bool SelectiveCompareBoolean(T a, T b)
+{
+    return (((a == 0) && (b == 0)) || ((a != 0) && (b != 0)));
+};
+
+template <typename T>
+armnn::PredicateResult CompareTensors(const std::vector<T>& actualData,
+                                      const std::vector<T>& expectedData,
+                                      const armnn::TensorShape& actualShape,
+                                      const armnn::TensorShape& expectedShape,
+                                      bool compareBoolean = false,
+                                      bool isDynamic = false)
+{
+    if (actualData.size() != expectedData.size())
+    {
+        armnn::PredicateResult res(false);
+        res.Message() << "Different data size ["
+                      << actualData.size()
+                      << "!="
+                      << expectedData.size()
+                      << "]";
+        return res;
+    }
+
+    if (actualShape.GetNumDimensions() != expectedShape.GetNumDimensions())
+    {
+        armnn::PredicateResult res(false);
+        res.Message() << "Different number of dimensions ["
+                      << actualShape.GetNumDimensions()
+                      << "!="
+                      << expectedShape.GetNumDimensions()
+                      << "]";
+        return res;
+    }
+
+    if (actualShape.GetNumElements() != expectedShape.GetNumElements())
+    {
+        armnn::PredicateResult res(false);
+        res.Message() << "Different number of elements ["
+                      << actualShape.GetNumElements()
+                      << "!="
+                      << expectedShape.GetNumElements()
+                      << "]";
+        return res;
+    }
+
+    unsigned int numberOfDimensions = actualShape.GetNumDimensions();
+
+    if (!isDynamic)
+    {
+        // Checks they are same shape.
+        for (unsigned int i = 0; i < numberOfDimensions; ++i)
+        {
+            if (actualShape[i] != expectedShape[i])
+            {
+                armnn::PredicateResult res(false);
+                res.Message() << "Different shapes ["
+                              << actualShape[i]
+                              << "!="
+                              << expectedShape[i]
+                              << "]";
+                return res;
+            }
+        }
+    }
+
+    // Fun iteration over n dimensions.
+    std::vector<unsigned int> indices;
+    for (unsigned int i = 0; i < numberOfDimensions; i++)
+    {
+        indices.emplace_back(0);
+    }
+
+    std::stringstream errorString;
+    int numFailedElements = 0;
+    constexpr int maxReportedDifferences = 3;
+    unsigned int index = 0;
+
+    // Compare data element by element.
+    while (true)
+    {
+        bool comparison;
+        // As true for uint8_t is non-zero (1-255) we must have a dedicated compare for Booleans.
+        if(compareBoolean)
+        {
+            comparison = SelectiveCompareBoolean(actualData[index], expectedData[index]);
+        }
+        else
+        {
+            comparison = SelectiveCompare(actualData[index], expectedData[index]);
+        }
+
+        if (!comparison)
+        {
+            ++numFailedElements;
+
+            if (numFailedElements <= maxReportedDifferences)
+            {
+                if (numFailedElements >= 2)
+                {
+                    errorString << ", ";
+                }
+                errorString << "[";
+                for (unsigned int i = 0; i < numberOfDimensions; ++i)
+                {
+                    errorString << indices[i];
+                    if (i != numberOfDimensions - 1)
+                    {
+                        errorString << ",";
+                    }
+                }
+                errorString << "]";
+
+                errorString << " (" << +actualData[index] << " != " << +expectedData[index] << ")";
+            }
+        }
+
+        ++indices[numberOfDimensions - 1];
+        for (unsigned int i=numberOfDimensions-1; i>0; i--)
+        {
+            if (indices[i] == actualShape[i])
+            {
+                indices[i] = 0;
+                ++indices[i - 1];
+            }
+        }
+        if (indices[0] == actualShape[0])
+        {
+            break;
+        }
+
+        index++;
+    }
+
+    armnn::PredicateResult comparisonResult(true);
+    if (numFailedElements > 0)
+    {
+        comparisonResult.SetResult(false);
+        comparisonResult.Message() << numFailedElements << " different values at: ";
+        if (numFailedElements > maxReportedDifferences)
+        {
+            errorString << ", ... (and " << (numFailedElements - maxReportedDifferences) << " other differences)";
+        }
+        comparisonResult.Message() << errorString.str();
+    }
+
+    return comparisonResult;
+}
+
+template <typename T>
+std::vector<T> MakeRandomTensor(const armnn::TensorInfo& tensorInfo,
+                                unsigned int seed,
+                                float        min = -10.0f,
+                                float        max = 10.0f)
+{
+    std::mt19937 gen(seed);
+    std::uniform_real_distribution<float> dist(min, max);
+
+    std::vector<float> init(tensorInfo.GetNumElements());
+    for (unsigned int i = 0; i < init.size(); i++)
+    {
+        init[i] = dist(gen);
+    }
+
+    const float   qScale  = tensorInfo.GetQuantizationScale();
+    const int32_t qOffset = tensorInfo.GetQuantizationOffset();
+
+    return armnnUtils::QuantizedVector<T>(init, qScale, qOffset);
+}
diff --git a/arch/arm/ARMnn/include/armnnTestUtils/WorkloadTestUtils.hpp b/arch/arm/ARMnn/include/armnnTestUtils/WorkloadTestUtils.hpp
new file mode 100644
index 0000000000..156258a549
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTestUtils/WorkloadTestUtils.hpp
@@ -0,0 +1,113 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <armnn/backends/IMemoryManager.hpp>
+#include <armnn/backends/Workload.hpp>
+#include <armnn/backends/WorkloadInfo.hpp>
+
+namespace armnn
+{
+class ITensorHandle;
+} // namespace armnn
+
+namespace
+{
+
+template <typename QueueDescriptor>
+void AddInputToWorkload(QueueDescriptor& descriptor,
+    armnn::WorkloadInfo& info,
+    const armnn::TensorInfo& tensorInfo,
+    armnn::ITensorHandle* tensorHandle)
+{
+    descriptor.m_Inputs.push_back(tensorHandle);
+    info.m_InputTensorInfos.push_back(tensorInfo);
+}
+
+template <typename QueueDescriptor>
+void AddOutputToWorkload(QueueDescriptor& descriptor,
+    armnn::WorkloadInfo& info,
+    const armnn::TensorInfo& tensorInfo,
+    armnn::ITensorHandle* tensorHandle)
+{
+    descriptor.m_Outputs.push_back(tensorHandle);
+    info.m_OutputTensorInfos.push_back(tensorInfo);
+}
+
+template <typename QueueDescriptor>
+void SetWorkloadInput(QueueDescriptor& descriptor,
+    armnn::WorkloadInfo& info,
+    unsigned int index,
+    const armnn::TensorInfo& tensorInfo,
+    armnn::ITensorHandle* tensorHandle)
+{
+    descriptor.m_Inputs[index] = tensorHandle;
+    info.m_InputTensorInfos[index] = tensorInfo;
+}
+
+template <typename QueueDescriptor>
+void SetWorkloadOutput(QueueDescriptor& descriptor,
+    armnn::WorkloadInfo& info,
+    unsigned int index,
+    const armnn::TensorInfo& tensorInfo,
+    armnn::ITensorHandle* tensorHandle)
+{
+    descriptor.m_Outputs[index] = tensorHandle;
+    info.m_OutputTensorInfos[index] = tensorInfo;
+}
+
+inline void ExecuteWorkload(armnn::IWorkload& workload,
+                            const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+                            bool memoryManagementRequested = true)
+{
+    const bool manageMemory = memoryManager && memoryManagementRequested;
+
+    // Acquire working memory (if needed)
+    if (manageMemory)
+    {
+        memoryManager->Acquire();
+    }
+
+    // Perform PostAllocationConfiguration
+    workload.PostAllocationConfigure();
+
+    // Execute the workload
+    workload.Execute();
+
+    // Release working memory (if needed)
+    if (manageMemory)
+    {
+        memoryManager->Release();
+    }
+}
+
+inline armnn::Optional<armnn::DataType> GetBiasTypeFromWeightsType(armnn::Optional<armnn::DataType> weightsType)
+{
+    if (!weightsType)
+    {
+        return weightsType;
+    }
+
+    switch(weightsType.value())
+    {
+        case armnn::DataType::BFloat16:
+        case armnn::DataType::Float16:
+        case armnn::DataType::Float32:
+            return weightsType;
+        case armnn::DataType::QAsymmS8:
+        case armnn::DataType::QAsymmU8:
+        case armnn::DataType::QSymmS8:
+        case armnn::DataType::QSymmS16:
+            return armnn::DataType::Signed32;
+        default:
+            ARMNN_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type.");
+    }
+    return armnn::EmptyOptional();
+}
+
+} // anonymous namespace
diff --git a/arch/arm/ARMnn/include/armnnTfLiteParser/ITfLiteParser.hpp b/arch/arm/ARMnn/include/armnnTfLiteParser/ITfLiteParser.hpp
new file mode 100644
index 0000000000..b286c1ee4c
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTfLiteParser/ITfLiteParser.hpp
@@ -0,0 +1,75 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "armnn/Types.hpp"
+#include "armnn/NetworkFwd.hpp"
+#include "armnn/Tensor.hpp"
+#include "armnn/INetwork.hpp"
+#include "armnn/Optional.hpp"
+
+#include <memory>
+#include <map>
+#include <vector>
+
+namespace armnnTfLiteParser
+{
+
+using BindingPointInfo = armnn::BindingPointInfo;
+
+class TfLiteParserImpl;
+class ITfLiteParser;
+using ITfLiteParserPtr = std::unique_ptr<ITfLiteParser, void(*)(ITfLiteParser* parser)>;
+
+class ITfLiteParser
+{
+public:
+    struct TfLiteParserOptions
+    {
+        TfLiteParserOptions()
+            : m_StandInLayerForUnsupported(false),
+              m_InferAndValidate(false) {}
+
+        bool m_StandInLayerForUnsupported;
+        bool m_InferAndValidate;
+    };
+
+    static ITfLiteParser* CreateRaw(const armnn::Optional<TfLiteParserOptions>& options = armnn::EmptyOptional());
+    static ITfLiteParserPtr Create(const armnn::Optional<TfLiteParserOptions>& options = armnn::EmptyOptional());
+    static void Destroy(ITfLiteParser* parser);
+
+    /// Create the network from a flatbuffers binary file on disk
+    armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile);
+
+    /// Create the network from a flatbuffers binary
+    armnn::INetworkPtr CreateNetworkFromBinary(const std::vector<uint8_t> & binaryContent);
+
+    /// Retrieve binding info (layer id and tensor info) for the network input identified by
+    /// the given layer name and subgraph id
+    BindingPointInfo GetNetworkInputBindingInfo(size_t subgraphId,
+                                                const std::string& name) const;
+
+    /// Retrieve binding info (layer id and tensor info) for the network output identified by
+    /// the given layer name and subgraph id
+    BindingPointInfo GetNetworkOutputBindingInfo(size_t subgraphId,
+                                                         const std::string& name) const;
+
+    /// Return the number of subgraphs in the parsed model
+    size_t GetSubgraphCount() const;
+
+    /// Return the input tensor names for a given subgraph
+    std::vector<std::string> GetSubgraphInputTensorNames(size_t subgraphId) const;
+
+    /// Return the output tensor names for a given subgraph
+    std::vector<std::string> GetSubgraphOutputTensorNames(size_t subgraphId) const;
+
+private:
+    ITfLiteParser(const armnn::Optional<TfLiteParserOptions>& options = armnn::EmptyOptional());
+    ~ITfLiteParser();
+
+    std::unique_ptr<TfLiteParserImpl> pTfLiteParserImpl;
+};
+
+}
diff --git a/arch/arm/ARMnn/include/armnnTfLiteParser/Version.hpp b/arch/arm/ARMnn/include/armnnTfLiteParser/Version.hpp
new file mode 100644
index 0000000000..eee2124678
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnTfLiteParser/Version.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace armnnTfLiteParser
+{
+
+/// Macro utils
+#define STRINGIFY_VALUE(s) STRINGIFY_MACRO(s)
+#define STRINGIFY_MACRO(s) #s
+
+// TfLiteParser version components
+#define TFLITE_PARSER_MAJOR_VERSION 24
+#define TFLITE_PARSER_MINOR_VERSION 4
+#define TFLITE_PARSER_PATCH_VERSION 0
+
+/// TFLITE_PARSER_VERSION: "X.Y.Z"
+/// where:
+///   X = Major version number
+///   Y = Minor version number
+///   Z = Patch version number
+#define TFLITE_PARSER_VERSION STRINGIFY_VALUE(TFLITE_PARSER_MAJOR_VERSION) "." \
+                              STRINGIFY_VALUE(TFLITE_PARSER_MINOR_VERSION) "." \
+                              STRINGIFY_VALUE(TFLITE_PARSER_PATCH_VERSION)
+
+} //namespace armnnTfLiteParser
diff --git a/arch/arm/ARMnn/include/armnnUtils/CompatibleTypes.hpp b/arch/arm/ARMnn/include/armnnUtils/CompatibleTypes.hpp
new file mode 100644
index 0000000000..0640fcc428
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/CompatibleTypes.hpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+
+namespace armnnUtils
+{
+
+template<typename T>
+bool CompatibleTypes(armnn::DataType);
+
+} //namespace armnnUtils
diff --git a/arch/arm/ARMnn/include/armnnUtils/DataLayoutIndexed.hpp b/arch/arm/ARMnn/include/armnnUtils/DataLayoutIndexed.hpp
new file mode 100644
index 0000000000..163d34b159
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/DataLayoutIndexed.hpp
@@ -0,0 +1,74 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+#include <armnn/Tensor.hpp>
+
+#include <armnn/utility/Assert.hpp>
+
+namespace armnnUtils
+{
+
+/// Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout
+class DataLayoutIndexed
+{
+public:
+    DataLayoutIndexed(armnn::DataLayout dataLayout);
+
+    armnn::DataLayout GetDataLayout()    const { return m_DataLayout; }
+    unsigned int      GetChannelsIndex() const { return m_ChannelsIndex; }
+    unsigned int      GetHeightIndex()   const { return m_HeightIndex; }
+    unsigned int      GetWidthIndex()    const { return m_WidthIndex; }
+    unsigned int      GetDepthIndex()    const { return m_DepthIndex; }
+
+    inline unsigned int GetIndex(const armnn::TensorShape& shape,
+                                 unsigned int batchIndex, unsigned int channelIndex,
+                                 unsigned int heightIndex, unsigned int widthIndex) const
+    {
+        ARMNN_ASSERT( batchIndex < shape[0] || ( shape[0] == 0 && batchIndex == 0 ) );
+        ARMNN_ASSERT( channelIndex < shape[m_ChannelsIndex] ||
+                    ( shape[m_ChannelsIndex] == 0 && channelIndex == 0) );
+        ARMNN_ASSERT( heightIndex < shape[m_HeightIndex] ||
+                    ( shape[m_HeightIndex] == 0 && heightIndex == 0) );
+        ARMNN_ASSERT( widthIndex < shape[m_WidthIndex] ||
+                    ( shape[m_WidthIndex] == 0 && widthIndex == 0) );
+
+        /// Offset the given indices appropriately depending on the data layout
+        switch (m_DataLayout)
+        {
+        case armnn::DataLayout::NHWC:
+            batchIndex  *= shape[1] * shape[2] * shape[3]; // batchIndex *= heightIndex * widthIndex * channelIndex
+            heightIndex *= shape[m_WidthIndex] * shape[m_ChannelsIndex];
+            widthIndex  *= shape[m_ChannelsIndex];
+            /// channelIndex stays unchanged
+            break;
+        case armnn::DataLayout::NCHW:
+        default:
+            batchIndex   *= shape[1] * shape[2] * shape[3]; // batchIndex *= heightIndex * widthIndex * channelIndex
+            channelIndex *= shape[m_HeightIndex] * shape[m_WidthIndex];
+            heightIndex  *= shape[m_WidthIndex];
+            /// widthIndex stays unchanged
+            break;
+        }
+
+        /// Get the value using the correct offset
+        return batchIndex + channelIndex + heightIndex + widthIndex;
+    }
+
+private:
+    armnn::DataLayout m_DataLayout;
+    unsigned int      m_ChannelsIndex;
+    unsigned int      m_HeightIndex;
+    unsigned int      m_WidthIndex;
+    unsigned int      m_DepthIndex;
+};
+
+/// Equality methods
+bool operator==(const armnn::DataLayout& dataLayout, const DataLayoutIndexed& indexed);
+bool operator==(const DataLayoutIndexed& indexed, const armnn::DataLayout& dataLayout);
+
+} // namespace armnnUtils
diff --git a/arch/arm/ARMnn/include/armnnUtils/Filesystem.hpp b/arch/arm/ARMnn/include/armnnUtils/Filesystem.hpp
new file mode 100644
index 0000000000..ba0d97a272
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/Filesystem.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#if defined(_MSC_VER)
+// ghc includes Windows.h directly, bringing in macros that we don't want (e.g. min/max).
+// By including Windows.h ourselves first (with appropriate options), we prevent this.
+#include <common/include/WindowsWrapper.hpp>
+#endif
+#include <ghc/filesystem.hpp>
+namespace fs = ghc::filesystem;
+
+namespace armnnUtils
+{
+namespace Filesystem
+{
+
+/// Returns a path to a file in the system temporary folder. If the file existed it will be deleted.
+fs::path NamedTempFile(const char* fileName);
+
+}
+}
diff --git a/arch/arm/ARMnn/include/armnnUtils/FloatingPointComparison.hpp b/arch/arm/ARMnn/include/armnnUtils/FloatingPointComparison.hpp
new file mode 100644
index 0000000000..1e7b15a982
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/FloatingPointComparison.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cmath>
+
+namespace armnnUtils
+{
+
+/**
+ * Compare two floats and return true if their values are within a specified tolerance of each other.
+ * @param a
+ * @param b
+ * @param tolerancePercentage If not supplied default will be 1% tolerance (1.0f)
+ * @return true if the value of float b is within tolerancePercentage of the value for float a.
+ */
+inline bool within_percentage_tolerance(float a, float b, float tolerancePercent = 1.0f)
+{
+    float toleranceValue = std::fabs(a * (tolerancePercent / 100));
+    return std::fabs(a - b) <= toleranceValue;
+}
+
+} //namespace armnn
+
diff --git a/arch/arm/ARMnn/include/armnnUtils/FloatingPointConverter.hpp b/arch/arm/ARMnn/include/armnnUtils/FloatingPointConverter.hpp
new file mode 100644
index 0000000000..68c24cdc6e
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/FloatingPointConverter.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cstddef>
+
+namespace armnnUtils
+{
+
+class FloatingPointConverter
+{
+public:
+    /// Converts a buffer of FP32 values to FP16, and stores in the given dstFloat16Buffer.
+    /// dstFloat16Buffer should be (numElements * 2) in size
+    static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer);
+
+    static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer);
+
+    // Converts a buffer of FP32 values to BFloat16, and stores in the given dstBFloat16Buffer.
+    static void ConvertFloat32ToBFloat16(const float* srcFloat32Buffer, size_t numElements, void* dstBFloat16Buffer);
+
+    // Converts a buffer of BFloat16 to FP32 value, and stores in the given dstFloat32Buffer.
+    static void ConvertBFloat16ToFloat32(const void* srcBFloat16Buffer, size_t numElements, float* dstFloat32Buffer);
+};
+
+} // namespace armnnUtils
diff --git a/arch/arm/ARMnn/include/armnnUtils/Permute.hpp b/arch/arm/ARMnn/include/armnnUtils/Permute.hpp
new file mode 100644
index 0000000000..1e4166d938
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/Permute.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/TensorFwd.hpp>
+#include <armnn/Types.hpp>
+
+namespace armnnUtils
+{
+
+armnn::TensorShape Permuted(const armnn::TensorShape& srcShape,
+                            const armnn::PermutationVector& mappings);
+
+armnn::TensorInfo Permuted(const armnn::TensorInfo& info,
+                           const armnn::PermutationVector& mappings);
+
+void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
+             const void* src, void* dst, size_t dataTypeSize);
+
+} // namespace armnnUtils
diff --git a/arch/arm/ARMnn/include/armnnUtils/QuantizeHelper.hpp b/arch/arm/ARMnn/include/armnnUtils/QuantizeHelper.hpp
new file mode 100644
index 0000000000..231b8411cb
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/QuantizeHelper.hpp
@@ -0,0 +1,132 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+#include <armnn/utility/NumericCast.hpp>
+#include <armnn/TypesUtils.hpp>
+
+#include <BFloat16.hpp>
+#include <Half.hpp>
+
+#include <initializer_list>
+#include <iterator>
+#include <vector>
+
+namespace armnnUtils
+{
+
+template<typename T, bool DoQuantize=true>
+struct SelectiveQuantizer
+{
+    static T Quantize(float value, float scale, int32_t offset)
+    {
+        return armnn::Quantize<T>(value, scale, offset);
+    }
+
+    static float Dequantize(T value, float scale, int32_t offset)
+    {
+        return armnn::Dequantize(value, scale, offset);
+    }
+};
+
+template<typename T>
+struct SelectiveQuantizer<T, false>
+{
+    static T Quantize(float value, float scale, int32_t offset)
+    {
+        armnn::IgnoreUnused(scale, offset);
+        return value;
+    }
+
+    static float Dequantize(T value, float scale, int32_t offset)
+    {
+        armnn::IgnoreUnused(scale, offset);
+        return value;
+    }
+};
+
+template<>
+struct SelectiveQuantizer<armnn::Half, false>
+{
+    static armnn::Half Quantize(float value, float scale, int32_t offset)
+    {
+        armnn::IgnoreUnused(scale, offset);
+        return armnn::Half(value);
+    }
+
+    static float Dequantize(armnn::Half value, float scale, int32_t offset)
+    {
+        armnn::IgnoreUnused(scale, offset);
+        return value;
+    }
+};
+
+template<>
+struct SelectiveQuantizer<armnn::BFloat16, false>
+{
+    static armnn::BFloat16 Quantize(float value, float scale, int32_t offset)
+    {
+        armnn::IgnoreUnused(scale, offset);
+        return armnn::BFloat16(value);
+    }
+
+    static float Dequantize(armnn::BFloat16 value, float scale, int32_t offset)
+    {
+        armnn::IgnoreUnused(scale, offset);
+        return value;
+    }
+};
+
+template<typename T>
+T SelectiveQuantize(float value, float scale, int32_t offset)
+{
+    return SelectiveQuantizer<T, armnn::IsQuantizedType<T>()>::Quantize(value, scale, offset);
+};
+
+template<typename T>
+float SelectiveDequantize(T value, float scale, int32_t offset)
+{
+    return SelectiveQuantizer<T, armnn::IsQuantizedType<T>()>::Dequantize(value, scale, offset);
+};
+
+template<typename ItType>
+struct IsFloatingPointIterator
+{
+    static constexpr bool value=std::is_floating_point<typename std::iterator_traits<ItType>::value_type>::value;
+};
+
+template <typename T, typename FloatIt,
+typename std::enable_if<IsFloatingPointIterator<FloatIt>::value, int>::type=0 // Makes sure fp iterator is valid.
+>
+std::vector<T> QuantizedVector(FloatIt first, FloatIt last, float qScale, int32_t qOffset)
+{
+    std::vector<T> quantized;
+    quantized.reserve(armnn::numeric_cast<size_t>(std::distance(first, last)));
+
+    for (auto it = first; it != last; ++it)
+    {
+        auto f = *it;
+        T q = SelectiveQuantize<T>(f, qScale, qOffset);
+        quantized.push_back(q);
+    }
+
+    return quantized;
+}
+
+template<typename T>
+std::vector<T> QuantizedVector(const std::vector<float>& array, float qScale = 1.f, int32_t qOffset = 0)
+{
+    return QuantizedVector<T>(array.begin(), array.end(), qScale, qOffset);
+}
+
+template<typename T>
+std::vector<T> QuantizedVector(std::initializer_list<float> array, float qScale = 1.f, int32_t qOffset = 0)
+{
+    return QuantizedVector<T>(array.begin(), array.end(), qScale, qOffset);
+}
+
+} // namespace armnnUtils
diff --git a/arch/arm/ARMnn/include/armnnUtils/TContainer.hpp b/arch/arm/ARMnn/include/armnnUtils/TContainer.hpp
new file mode 100644
index 0000000000..a55f9df488
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/TContainer.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/TypesUtils.hpp>
+
+#include <mapbox/variant.hpp>
+
+namespace armnnUtils
+{
+
+// Standard definition of TContainer used by ArmNN, use this definition or add alternative definitions here instead of
+// defining your own.
+    using TContainer =
+    mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
+
+} // namespace armnnUtils
diff --git a/arch/arm/ARMnn/include/armnnUtils/TensorUtils.hpp b/arch/arm/ARMnn/include/armnnUtils/TensorUtils.hpp
new file mode 100644
index 0000000000..6a975255c6
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/TensorUtils.hpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/TypesUtils.hpp>
+
+namespace armnnUtils
+{
+armnn::TensorShape GetTensorShape(unsigned int numberOfBatches,
+                                  unsigned int numberOfChannels,
+                                  unsigned int height,
+                                  unsigned int width,
+                                  const armnn::DataLayout dataLayout);
+
+armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches,
+                                unsigned int numberOfChannels,
+                                unsigned int height,
+                                unsigned int width,
+                                const armnn::DataLayout dataLayout,
+                                const armnn::DataType dataType);
+
+armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches,
+                                unsigned int numberOfChannels,
+                                unsigned int depth,
+                                unsigned int height,
+                                unsigned int width,
+                                const armnn::DataLayout dataLayout,
+                                const armnn::DataType dataType);
+
+std::pair<float, float> FindMinMax(armnn::ITensorHandle* tensorHandle);
+
+armnn::TensorShape ExpandDims(const armnn::TensorShape& tensorShape, int axis);
+
+unsigned int GetNumElementsBetween(const armnn::TensorShape& shape,
+                                   unsigned int firstAxisInclusive,
+                                   unsigned int lastAxisExclusive);
+
+unsigned int GetUnsignedAxis(const unsigned int inputDimension, const int axis);
+
+unsigned int GetNumElementsAfter(const armnn::TensorShape& shape, unsigned int axis);
+
+std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::TensorInfo& info);
+
+} // namespace armnnUtils
diff --git a/arch/arm/ARMnn/include/armnnUtils/Threads.hpp b/arch/arm/ARMnn/include/armnnUtils/Threads.hpp
new file mode 100644
index 0000000000..4cecfd5079
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/Threads.hpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace armnnUtils
+{
+namespace Threads
+{
+
+int GetCurrentThreadId();
+
+}
+}
diff --git a/arch/arm/ARMnn/include/armnnUtils/Transpose.hpp b/arch/arm/ARMnn/include/armnnUtils/Transpose.hpp
new file mode 100644
index 0000000000..39ea5c5560
--- /dev/null
+++ b/arch/arm/ARMnn/include/armnnUtils/Transpose.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/TensorFwd.hpp>
+#include <armnn/Types.hpp>
+
+namespace armnnUtils
+{
+
+armnn::TensorShape TransposeTensorShape(const armnn::TensorShape& srcShape, const armnn::PermutationVector& mappings);
+
+armnn::TensorInfo TransposeTensorShape(const armnn::TensorInfo& info, const armnn::PermutationVector& mappings);
+
+void Transpose(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
+               const void* src, void* dst, size_t dataTypeSize);
+
+} // namespace armnnUtils
diff --git a/arch/arm/ARMnn/profiling/CMakeLists.txt b/arch/arm/ARMnn/profiling/CMakeLists.txt
new file mode 100644
index 0000000000..f9ffd66433
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/CMakeLists.txt
@@ -0,0 +1,36 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+cmake_minimum_required (VERSION 3.0.2) # 3.0.2 required for return() statement used in AddDllCopyCommands.cmake
+project(pipe)
+
+set(additional_cmake_files)
+list(APPEND additional_cmake_files
+    ../cmake/ArmnnVersion.cmake
+    ../cmake/Utils.cmake
+    ../cmake/GlobalConfig.cmake
+    ../cmake/AddDllCopyCommands.cmake)
+
+foreach(cmake_file ${additional_cmake_files})
+    include(${cmake_file})
+endforeach()
+
+# Include the additional cmake files in their own target so that they will appear nicely in IDEs
+add_custom_target(AdditionalCMakeFiles SOURCES ${additional_cmake_files})
+
+include(GNUInstallDirs)
+
+include_directories(SYSTEM common/include)
+include_directories(SYSTEM ${PROJECT_SOURCE_DIR})
+
+set(BUILD_UNIT_TESTS 0)
+
+if(BUILD_TIMELINE_DECODER)
+    add_subdirectory(server/src/timelineDecoder)
+    add_subdirectory(common/src)
+endif()
+
+if(BUILD_BASE_PIPE_SERVER)
+    add_subdirectory(server/src/basePipeServer)
+endif()
diff --git a/arch/arm/ARMnn/profiling/buildpipe.sh b/arch/arm/ARMnn/profiling/buildpipe.sh
new file mode 100644
index 0000000000..611b7b96e0
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/buildpipe.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+SOURCE="${BASH_SOURCE[0]}"
+while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
+  TARGET="$(readlink "$SOURCE")"
+  if [[ $TARGET == /* ]]; then
+    # "SOURCE '$SOURCE' is an absolute symlink to '$TARGET'"
+    SOURCE="$TARGET"
+  else
+    DIR="$( dirname "$SOURCE" )"
+    # "SOURCE '$SOURCE' is a relative symlink to '$TARGET' (relative to '$DIR')"
+    SOURCE="$DIR/$TARGET" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
+  fi
+done
+RDIR="$( dirname "$SOURCE" )"
+DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
+
+CMD=$( basename $0 )
+
+usage() {
+  echo "Usage: $CMD [options]"
+  echo "Options:        -t(type) <Debug or Release>"
+  echo "                -c(lean) build"
+  echo "                -s(tatic libraries) <1 or 0>"
+  echo "                -w(indows) build"
+  exit 1
+}
+# defaults
+TYPE=Release
+CLEAN=0
+STATIC=0
+WINDOWS=0
+
+# Parse the command line
+while getopts "whct:s:" opt; do
+  case "$opt" in
+    h|\?) usage;;
+    t) TYPE=$OPTARG;;
+    c) CLEAN=1;;
+    s) STATIC=$OPTARG;;
+    w) WINDOWS=1;;
+  esac
+done
+shift $((OPTIND - 1))
+
+if [ $CLEAN == 1 ]; then
+    echo "removing ${DIR}/build"
+    rm -rf ${DIR}/build
+fi
+
+BUILD_DIR="build"
+[ -d build ] || mkdir build
+echo $WINDOWS
+if [ "$WINDOWS" -eq "1" ]; then
+    echo "doing windows"
+    cd $BUILD_DIR
+    [ -d windows ] || mkdir windows
+    BUILD_DIR=$BUILD_DIR/windows
+    cd $DIR
+fi
+# lower case TYPE in a posix compliant manner
+LC_TYPE=$(echo "$TYPE" | tr '[:upper:]' '[:lower:]')
+if [ ${LC_TYPE} == "debug" ]; then
+    DEBUGDIR=($DIR/$BUILD_DIR/debug)
+    [ -d $DEBUGDIR ] || (cd ${BUILD_DIR} && mkdir debug && cd ..)
+    BUILD_DIR=$DEBUGDIR
+else
+    RELEASEDIR=($DIR/$BUILD_DIR/release)
+    [ -d $RELEASEDIR ] || (cd ${BUILD_DIR} && mkdir release && cd ..)
+    BUILD_DIR=$RELEASEDIR
+fi
+
+echo "Build Directory: ${BUILD_DIR}"
+
+CMAKE=cmake
+CMARGS="-DCMAKE_BUILD_TYPE=$TYPE \
+        -DBUILD_STATIC_PIPE_LIBS=$STATIC \
+        -DBUILD_PIPE_ONLY=1"
+if [ "$WINDOWS" -eq "1" ]; then
+    CMARGS="$CMARGS \
+           -DCMAKE_TOOLCHAIN_FILE=${DIR}/toolchain-x86-ubuntu-mingw64.cmake"
+fi
+MAKE=make
+
+cd ${BUILD_DIR}
+pwd
+( eval $CMAKE $CMARGS $DIR && eval ${MAKE} $MAKEFLAGS )
diff --git a/arch/arm/ARMnn/profiling/common/include/Assert.hpp b/arch/arm/ARMnn/profiling/common/include/Assert.hpp
new file mode 100644
index 0000000000..c6e8bc49d4
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/Assert.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cassert>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+#ifndef NDEBUG
+#   define ARM_PIPE_ASSERT(COND) assert(COND)
+#   define ARM_PIPE_ASSERT_MSG(COND, MSG) assert((COND) && MSG)
+#else
+#   define ARM_PIPE_ASSERT(COND)
+#   define ARM_PIPE_ASSERT_MSG(COND, MSG)
+#endif
+} // namespace pipe
+} //namespace arm
\ No newline at end of file
diff --git a/arch/arm/ARMnn/profiling/common/include/CommandHandlerFunctor.hpp b/arch/arm/ARMnn/profiling/common/include/CommandHandlerFunctor.hpp
new file mode 100644
index 0000000000..7e31a709a6
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/CommandHandlerFunctor.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Packet.hpp"
+#include <cstdint>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class CommandHandlerFunctor
+{
+public:
+    CommandHandlerFunctor(uint32_t familyId, uint32_t packetId, uint32_t version)
+        : m_FamilyId(familyId),
+          m_PacketId(packetId)
+        , m_Version(version)
+    {}
+
+    uint32_t GetFamilyId() const;
+    uint32_t GetPacketId() const;
+    uint32_t GetVersion()  const;
+
+    virtual void operator()(const Packet& packet) = 0;
+
+    virtual ~CommandHandlerFunctor() {}
+
+private:
+    uint32_t m_FamilyId;
+    uint32_t m_PacketId;
+    uint32_t m_Version;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/CommandHandlerKey.hpp b/arch/arm/ARMnn/profiling/common/include/CommandHandlerKey.hpp
new file mode 100644
index 0000000000..f45b5bef14
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/CommandHandlerKey.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cstdint>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class CommandHandlerKey
+{
+public:
+    CommandHandlerKey(uint32_t familyId, uint32_t packetId, uint32_t version)
+    : m_FamilyId(familyId), m_PacketId(packetId), m_Version(version) {};
+
+    uint32_t GetFamilyId() const;
+    uint32_t GetPacketId() const;
+    uint32_t GetVersion()  const;
+
+    bool operator< (const CommandHandlerKey& rhs) const;
+    bool operator> (const CommandHandlerKey& rhs) const;
+    bool operator<=(const CommandHandlerKey& rhs) const;
+    bool operator>=(const CommandHandlerKey& rhs) const;
+    bool operator==(const CommandHandlerKey& rhs) const;
+    bool operator!=(const CommandHandlerKey& rhs) const;
+
+private:
+    uint32_t m_FamilyId;
+    uint32_t m_PacketId;
+    uint32_t m_Version;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/CommandHandlerRegistry.hpp b/arch/arm/ARMnn/profiling/common/include/CommandHandlerRegistry.hpp
new file mode 100644
index 0000000000..5a5d879996
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/CommandHandlerRegistry.hpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "CommandHandlerFunctor.hpp"
+#include "CommandHandlerKey.hpp"
+
+#include <functional>
+#include <unordered_map>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+struct CommandHandlerHash
+{
+    std::size_t operator() (const CommandHandlerKey& commandHandlerKey) const
+    {
+        std::size_t seed = 0;
+        std::hash<uint32_t> hasher;
+        seed ^= hasher(commandHandlerKey.GetPacketId()) + 0x9e3779b9 + (seed<<6) + (seed>>2);
+        seed ^= hasher(commandHandlerKey.GetVersion()) + 0x9e3779b9 + (seed<<6) + (seed>>2);
+        return seed;
+    }
+};
+
+class CommandHandlerRegistry
+{
+public:
+    CommandHandlerRegistry() = default;
+
+    void RegisterFunctor(CommandHandlerFunctor* functor, uint32_t familyId, uint32_t packetId, uint32_t version);
+
+    void RegisterFunctor(CommandHandlerFunctor* functor);
+
+    CommandHandlerFunctor* GetFunctor(uint32_t familyId, uint32_t packetId, uint32_t version) const;
+
+private:
+    std::unordered_map<CommandHandlerKey, CommandHandlerFunctor*, CommandHandlerHash> registry;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/CommonProfilingUtils.hpp b/arch/arm/ARMnn/profiling/common/include/CommonProfilingUtils.hpp
new file mode 100644
index 0000000000..68fe6bb8ca
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/CommonProfilingUtils.hpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+namespace arm
+{
+
+namespace pipe
+{
+void ReadBytes(const unsigned char* buffer, unsigned int offset, unsigned int valueSize, uint8_t outValue[]);
+
+uint64_t ReadUint64(unsigned const char* buffer, unsigned int offset);
+
+uint32_t ReadUint32(unsigned const char* buffer, unsigned int offset);
+
+uint16_t ReadUint16(unsigned const char* buffer, unsigned int offset);
+
+uint8_t ReadUint8(unsigned const char* buffer, unsigned int offset);
+
+void WriteBytes(unsigned char* buffer, unsigned int offset, const void* value, unsigned int valueSize);
+
+void WriteUint64(unsigned char* buffer, unsigned int offset, uint64_t value);
+
+void WriteUint32(unsigned char* buffer, unsigned int offset, uint32_t value);
+
+void WriteUint16(unsigned char* buffer, unsigned int offset, uint16_t value);
+
+void WriteUint8(unsigned char* buffer, unsigned int offset, uint8_t value);
+
+std::string CentreAlignFormatting(const std::string& stringToPass, const int spacingWidth);
+
+} // namespace pipe
+} // namespace arm
\ No newline at end of file
diff --git a/arch/arm/ARMnn/profiling/common/include/Constants.hpp b/arch/arm/ARMnn/profiling/common/include/Constants.hpp
new file mode 100644
index 0000000000..01bfe795c2
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/Constants.hpp
@@ -0,0 +1,14 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+namespace arm
+{
+
+namespace pipe
+{
+    static const unsigned int PIPE_MAGIC = 0x45495434;
+} // namespace pipe
+} // namespace arm
\ No newline at end of file
diff --git a/arch/arm/ARMnn/profiling/common/include/Conversion.hpp b/arch/arm/ARMnn/profiling/common/include/Conversion.hpp
new file mode 100644
index 0000000000..0a3eb0c0d6
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/Conversion.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#if __GNUC__
+#   define ARM_PIPE_NO_CONVERSION_WARN_BEGIN \
+    _Pragma("GCC diagnostic push")  \
+    _Pragma("GCC diagnostic ignored \"-Wconversion\"") \
+    _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"")
+
+#   define ARM_PIPE_NO_CONVERSION_WARN_END \
+    _Pragma("GCC diagnostic pop")
+
+#elif __clang__
+#   define ARM_PIPE_NO_CONVERSION_WARN_BEGIN \
+    _Pragma("clang diagnostic push")  \
+    _Pragma("clang diagnostic ignored \"-Wconversion\"") \
+    _Pragma("clang diagnostic ignored \"-Wsign-conversion\"")
+
+#   define ARM_PIPE_NO_CONVERSION_WARN_END \
+    _Pragma("clang diagnostic pop")
+
+#elif defined (_MSC_VER)
+#   define ARM_PIPE_NO_CONVERSION_WARN_BEGIN \
+    __pragma(warning( push )) \
+    __pragma(warning(disable : 4101)) \
+    __pragma(warning(disable : 4267))
+
+#   define ARM_PIPE_NO_CONVERSION_WARN_END \
+    __pragma(warning( pop ))
+
+#else
+#   define ARM_PIPE_NO_CONVERSION_WARN_BEGIN
+#   define ARM_PIPE_NO_CONVERSION_WARN_END
+#endif
+
+#define ARM_PIPE_SUPRESS_CONVERSION_WARNING(func) \
+ARM_PIPE_NO_CONVERSION_WARN_BEGIN \
+func; \
+ARM_PIPE_NO_CONVERSION_WARN_END
diff --git a/arch/arm/ARMnn/profiling/common/include/DllExport.hpp b/arch/arm/ARMnn/profiling/common/include/DllExport.hpp
new file mode 100644
index 0000000000..8e01d43bca
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/DllExport.hpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#if defined (_MSC_VER)
+
+#ifdef ARMNN_COMPILING_DLL
+#define ARMNN_DLLEXPORT __declspec(dllexport)
+#else
+#define ARMNN_DLLEXPORT __declspec(dllimport)
+#endif
+
+#else
+
+#define ARMNN_DLLEXPORT
+
+#endif
\ No newline at end of file
diff --git a/arch/arm/ARMnn/profiling/common/include/EncodeVersion.hpp b/arch/arm/ARMnn/profiling/common/include/EncodeVersion.hpp
new file mode 100644
index 0000000000..9257b22cfc
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/EncodeVersion.hpp
@@ -0,0 +1,83 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <ostream>
+#include <sstream>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+constexpr inline uint32_t EncodeVersion(uint32_t major, uint32_t minor, uint32_t patch)
+{
+    return (major << 22) | (minor << 12) | patch;
+}
+
+// Encodes a semantic version https://semver.org/ into a 32 bit integer in the following fashion
+//
+// bits 22:31 major: Unsigned 10-bit integer. Major component of the schema version number.
+// bits 12:21 minor: Unsigned 10-bit integer. Minor component of the schema version number.
+// bits 0:11  patch: Unsigned 12-bit integer. Patch component of the schema version number.
+//
+class Version
+{
+public:
+    Version(uint32_t encodedValue)
+    {
+        m_Major = (encodedValue >> 22) & 1023;
+        m_Minor = (encodedValue >> 12) & 1023;
+        m_Patch = encodedValue & 4095;
+    }
+
+    Version(uint32_t major, uint32_t minor, uint32_t patch) :
+        m_Major(major),
+        m_Minor(minor),
+        m_Patch(patch)
+    {}
+
+    uint32_t GetEncodedValue()
+    {
+        return EncodeVersion(m_Major, m_Minor, m_Patch);
+    }
+
+    uint32_t GetMajor() { return m_Major; }
+    uint32_t GetMinor() { return m_Minor; }
+    uint32_t GetPatch() { return m_Patch; }
+
+    bool operator==(const Version& other) const
+    {
+        return m_Major == other.m_Major && m_Minor == other.m_Minor && m_Patch == other.m_Patch;
+    }
+
+    std::string ToString() const
+    {
+        constexpr char separator = '.';
+
+        std::stringstream stringStream;
+        stringStream << m_Major << separator << m_Minor << separator << m_Patch;
+
+        return stringStream.str();
+    }
+
+private:
+    uint32_t m_Major;
+    uint32_t m_Minor;
+    uint32_t m_Patch;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Version& version)
+{
+    os << version.ToString();
+    return os;
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/IProfilingGuidGenerator.hpp b/arch/arm/ARMnn/profiling/common/include/IProfilingGuidGenerator.hpp
new file mode 100644
index 0000000000..34ee9673b5
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/IProfilingGuidGenerator.hpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingGuid.hpp"
+
+#include <string>
+
+namespace armnn
+{
+
+namespace profiling
+{
+
+class IProfilingGuidGenerator
+{
+public:
+    /// Return the next random Guid in the sequence
+    virtual ProfilingDynamicGuid NextGuid() = 0;
+
+    /// Create a ProfilingStaticGuid based on a hash of the string
+    virtual ProfilingStaticGuid GenerateStaticId(const std::string& str) = 0;
+
+    virtual ~IProfilingGuidGenerator() {}
+};
+
+} // namespace profiling
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/profiling/common/include/IgnoreUnused.hpp b/arch/arm/ARMnn/profiling/common/include/IgnoreUnused.hpp
new file mode 100644
index 0000000000..fad40d33d4
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/IgnoreUnused.hpp
@@ -0,0 +1,18 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+namespace arm
+{
+
+namespace pipe
+{
+// Utility function to selectively silence unused variable compiler warnings
+
+template<typename ... Ts>
+inline void IgnoreUnused(Ts&&...){}
+} //namespace pipe
+} //namespace arm
\ No newline at end of file
diff --git a/arch/arm/ARMnn/profiling/common/include/LabelsAndEventClasses.hpp b/arch/arm/ARMnn/profiling/common/include/LabelsAndEventClasses.hpp
new file mode 100644
index 0000000000..8e3e0b0c67
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/LabelsAndEventClasses.hpp
@@ -0,0 +1,69 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingGuidGenerator.hpp"
+
+#include "DllExport.hpp"
+
+namespace armnn
+{
+
+namespace profiling
+{
+
+class LabelsAndEventClasses
+{
+public:
+    // Labels (string value + GUID)
+    ARMNN_DLLEXPORT static std::string EMPTY_LABEL;
+    ARMNN_DLLEXPORT static std::string NAME_LABEL;
+    ARMNN_DLLEXPORT static std::string TYPE_LABEL;
+    ARMNN_DLLEXPORT static std::string INDEX_LABEL;
+    ARMNN_DLLEXPORT static std::string BACKENDID_LABEL;
+    ARMNN_DLLEXPORT static std::string CHILD_LABEL;
+    ARMNN_DLLEXPORT static std::string EXECUTION_OF_LABEL;
+    ARMNN_DLLEXPORT static std::string PROCESS_ID_LABEL;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid EMPTY_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid NAME_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid TYPE_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid INDEX_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid BACKENDID_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid CHILD_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid EXECUTION_OF_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid PROCESS_ID_GUID;
+
+    // Common types
+    ARMNN_DLLEXPORT static std::string LAYER;
+    ARMNN_DLLEXPORT static std::string WORKLOAD;
+    ARMNN_DLLEXPORT static std::string NETWORK;
+    ARMNN_DLLEXPORT static std::string CONNECTION;
+    ARMNN_DLLEXPORT static std::string INFERENCE;
+    ARMNN_DLLEXPORT static std::string WORKLOAD_EXECUTION;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid LAYER_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid WORKLOAD_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid NETWORK_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid CONNECTION_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid INFERENCE_GUID;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid WORKLOAD_EXECUTION_GUID;
+
+    // Event Class GUIDs
+    // Start of Life (SOL)
+    ARMNN_DLLEXPORT static ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS;
+    ARMNN_DLLEXPORT static std::string ARMNN_PROFILING_SOL_EVENT_CLASS_NAME;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS_NAME_GUID;
+    // End of Life (EOL)
+    ARMNN_DLLEXPORT static ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS;
+    ARMNN_DLLEXPORT static std::string ARMNN_PROFILING_EOL_EVENT_CLASS_NAME;
+    ARMNN_DLLEXPORT static ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS_NAME_GUID;
+
+private:
+    static ProfilingGuidGenerator m_GuidGenerator;
+};
+
+} // namespace profiling
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/profiling/common/include/Logging.hpp b/arch/arm/ARMnn/profiling/common/include/Logging.hpp
new file mode 100644
index 0000000000..a31c2aaa7b
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/Logging.hpp
@@ -0,0 +1,182 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <vector>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+enum class LogSeverity
+{
+    Trace,
+    Debug,
+    Info,
+    Warning,
+    Error,
+    Fatal
+};
+
+inline std::string LevelToString(LogSeverity level)
+{
+    switch(level)
+    {
+        case LogSeverity::Trace:
+            return "Trace";
+        case LogSeverity::Debug:
+            return "Debug";
+        case LogSeverity::Info:
+            return "Info";
+        case LogSeverity::Warning:
+            return "Warning";
+        case LogSeverity::Error:
+            return "Error";
+        case LogSeverity::Fatal:
+            return "Fatal";
+        default:
+            return "Log";
+    }
+}
+
+class LogSink
+{
+public:
+    virtual ~LogSink(){};
+
+    virtual void Consume(const std::string&) = 0;
+private:
+
+};
+
+class StandardOutputSink : public LogSink
+{
+public:
+    void Consume(const std::string& s) override
+    {
+        std::cout << s << std::endl;
+    }
+};
+
+struct ScopedRecord
+{
+    ScopedRecord(const std::vector<std::shared_ptr<LogSink>>& sinks, LogSeverity level, bool enabled)
+    : m_LogSinks(sinks)
+    , m_Enabled(enabled)
+    {
+        if (enabled)
+        {
+            m_Os << LevelToString(level) << ": ";
+        }
+    }
+
+    ~ScopedRecord()
+    {
+        if (m_Enabled)
+        {
+            for (auto sink : m_LogSinks)
+            {
+                if (sink)
+                {
+                    sink->Consume(m_Os.str());
+                }
+            }
+        }
+    }
+
+    ScopedRecord(const ScopedRecord&) = delete;
+    ScopedRecord& operator=(const ScopedRecord&) = delete;
+    ScopedRecord& operator=(ScopedRecord&&) = delete;
+
+    ScopedRecord(ScopedRecord&& other) = default;
+
+    template<typename Streamable>
+    ScopedRecord& operator<<(const Streamable& s)
+    {
+        if (m_Enabled)
+        {
+            m_Os << s;
+        }
+        return (*this);
+    }
+
+private:
+    const std::vector<std::shared_ptr<LogSink>>& m_LogSinks;
+    std::ostringstream m_Os;
+    bool m_Enabled;
+};
+
+template<LogSeverity Level>
+class SimpleLogger
+{
+public:
+    SimpleLogger()
+        : m_Sinks{std::make_shared<StandardOutputSink>()}
+        , m_Enable(true)
+    {
+    }
+
+    static SimpleLogger& Get()
+    {
+        static SimpleLogger<Level> logger;
+        return logger;
+    }
+
+    void Enable(bool enable = true)
+    {
+        m_Enable = enable;
+    }
+
+    ScopedRecord StartNewRecord()
+    {
+        ScopedRecord record(m_Sinks, Level, m_Enable);
+        return record;
+    }
+
+    void RemoveAllSinks()
+    {
+        m_Sinks.clear();
+    }
+
+    void AddSink(std::shared_ptr<LogSink> sink)
+    {
+        m_Sinks.push_back(sink);
+    }
+private:
+    std::vector<std::shared_ptr<LogSink>> m_Sinks;
+    bool m_Enable;
+};
+
+void SetLogFilter(LogSeverity level);
+
+void SetAllLoggingSinks(bool standardOut, bool debugOut, bool coloured);
+
+enum class BoostLogSeverityMapping
+{
+    trace,
+    debug,
+    info,
+    warning,
+    error,
+    fatal
+};
+
+constexpr LogSeverity ConvertLogSeverity(BoostLogSeverityMapping severity)
+{
+    return static_cast<LogSeverity>(severity);
+}
+
+
+#define ARM_PIPE_LOG(severity) \
+    arm::pipe::SimpleLogger<ConvertLogSeverity(arm::pipe::BoostLogSeverityMapping::severity)>::Get().StartNewRecord()
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/NetworkSockets.hpp b/arch/arm/ARMnn/profiling/common/include/NetworkSockets.hpp
new file mode 100644
index 0000000000..29575cdcd6
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/NetworkSockets.hpp
@@ -0,0 +1,79 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+// This file (along with its corresponding .cpp) defines a very thin platform abstraction layer for the use of
+// networking sockets. Thankfully the underlying APIs on Windows and Linux are very similar so not much conversion
+// is needed (typically just forwarding the parameters to a differently named function).
+// Some of the APIs are in fact completely identical and so no forwarding function is needed.
+
+#pragma once
+
+#if defined(__unix__) || defined(__APPLE__)
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#elif defined(_MSC_VER)
+#include <WindowsWrapper.hpp>
+#include <winsock2.h>
+#include <afunix.h>
+#elif defined(__MINGW32__)
+#include <WindowsWrapper.hpp>
+#include <winsock2.h>
+#endif
+
+namespace arm
+{
+namespace pipe
+{
+
+#if defined(__unix__)
+
+using Socket = int;
+using PollFd = pollfd;
+
+#elif defined(__APPLE__)
+
+using Socket = int;
+using PollFd = pollfd;
+#define SOCK_CLOEXEC 0
+
+#elif defined(_MSC_VER)
+
+using Socket = SOCKET;
+using PollFd = WSAPOLLFD;
+using nfds_t = int;
+using socklen_t = int;
+#define SOCK_CLOEXEC 0
+
+#elif defined(__MINGW32__)
+
+using Socket = SOCKET;
+using PollFd = WSAPOLLFD;
+using nfds_t = int;
+using socklen_t = int;
+#define SOCK_CLOEXEC 0
+
+#endif
+
+/// Performs any required one-time setup.
+bool Initialize();
+
+int Close(Socket s);
+
+bool SetNonBlocking(Socket s);
+
+long Write(Socket s, const void* buf, size_t len);
+
+long Read(Socket s, void* buf, size_t len);
+
+int Ioctl(Socket s, unsigned long int cmd, void* arg);
+
+int Poll(PollFd* fds, nfds_t numFds, int timeout);
+
+Socket Accept(Socket s, sockaddr* addr, socklen_t* addrlen, int flags);
+
+} // namespace arm
+} // namespace pipe
diff --git a/arch/arm/ARMnn/profiling/common/include/NumericCast.hpp b/arch/arm/ARMnn/profiling/common/include/NumericCast.hpp
new file mode 100644
index 0000000000..069f9514fe
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/NumericCast.hpp
@@ -0,0 +1,128 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Assert.hpp"
+
+#include <type_traits>
+#include <limits>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+#if !defined(NDEBUG) || defined(ARM_PIPE_NUMERIC_CAST_TESTABLE)
+#define ENABLE_NUMERIC_CAST_CHECKS 1
+#else
+#define ENABLE_NUMERIC_CAST_CHECKS 0
+#endif
+
+#if defined(ARM_PIPE_NUMERIC_CAST_TESTABLE)
+#   define ARM_PIPE_NUMERIC_CAST_CHECK(cond, msg) ConditionalThrow<std::bad_cast>(cond)
+#else
+#   define ARM_PIPE_NUMERIC_CAST_CHECK(cond, msg) ARM_PIPE_ASSERT_MSG(cond, msg)
+#endif
+
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_unsigned<Source>::value &&
+    std::is_unsigned<Dest>::value
+    , Dest>
+numeric_cast(Source source)
+{
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (source > std::numeric_limits<Dest>::max())
+    {
+        ARM_PIPE_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to "
+                                        "narrower unsigned type. Overflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(source);
+}
+
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_signed<Source>::value &&
+    std::is_signed<Dest>::value
+    , Dest>
+numeric_cast(Source source)
+{
+    static_assert(!std::is_floating_point<Source>::value && !std::is_floating_point<Dest>::value,
+        "numeric_cast doesn't cast float.");
+
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (source > std::numeric_limits<Dest>::max())
+    {
+        ARM_PIPE_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower signed type. "
+                                        "Overflow detected.");
+    }
+
+    if (source < std::numeric_limits<Dest>::lowest())
+    {
+        ARM_PIPE_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower signed type. "
+                                        "Underflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(source);
+}
+
+// numeric cast from unsigned to signed checked for narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_signed<Dest>::value &&
+    std::is_unsigned<Source>::value
+    , Dest>
+numeric_cast(Source sValue)
+{
+    static_assert(!std::is_floating_point<Dest>::value, "numeric_cast doesn't cast to float.");
+
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (sValue > static_cast< typename std::make_unsigned<Dest>::type >(std::numeric_limits<Dest>::max()))
+    {
+        ARM_PIPE_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to signed type. "
+                                        "Overflow detected.");
+    }
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+
+    return static_cast<Dest>(sValue);
+}
+
+// numeric cast from signed to unsigned checked for underflows and narrowing overflows
+template<typename Dest, typename Source>
+typename std::enable_if_t<
+    std::is_unsigned<Dest>::value &&
+    std::is_signed<Source>::value
+    , Dest>
+numeric_cast(Source sValue)
+{
+    static_assert(!std::is_floating_point<Source>::value && !std::is_floating_point<Dest>::value,
+        "numeric_cast doesn't cast floats.");
+
+#if ENABLE_NUMERIC_CAST_CHECKS
+    if (sValue < 0)
+    {
+        ARM_PIPE_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
+                                        "Underflow detected.");
+    }
+
+    if (static_cast< typename std::make_unsigned<Source>::type >(sValue) > std::numeric_limits<Dest>::max())
+    {
+        ARM_PIPE_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to unsigned type. "
+                                        "Overflow detected.");
+    }
+
+#endif // ENABLE_NUMERIC_CAST_CHECKS
+    return static_cast<Dest>(sValue);
+}
+
+#undef ENABLE_NUMERIC_CAST_CHECKS
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/Packet.hpp b/arch/arm/ARMnn/profiling/common/include/Packet.hpp
new file mode 100644
index 0000000000..d8fa2709e2
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/Packet.hpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ProfilingException.hpp"
+
+#include <memory>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class Packet
+{
+public:
+    Packet()
+        : m_Header(0)
+        , m_PacketFamily(0)
+        , m_PacketId(0)
+        , m_Length(0)
+        , m_Data(nullptr)
+    {}
+
+    Packet(uint32_t header)
+        : m_Header(header)
+        , m_Length(0)
+        , m_Data(nullptr)
+    {
+        m_PacketId = ((header >> 16) & 1023);
+        m_PacketFamily = (header >> 26);
+    }
+
+    Packet(uint32_t header, uint32_t length, std::unique_ptr<unsigned char[]>& data)
+        : m_Header(header)
+        , m_Length(length)
+        , m_Data(std::move(data))
+    {
+        m_PacketId = ((header >> 16) & 1023);
+        m_PacketFamily = (header >> 26);
+
+        if (length == 0 && m_Data != nullptr)
+        {
+            throw arm::pipe::InvalidArgumentException("Data should be null when length is zero");
+        }
+    }
+
+    Packet(Packet&& other)
+        : m_Header(other.m_Header)
+        , m_PacketFamily(other.m_PacketFamily)
+        , m_PacketId(other.m_PacketId)
+        , m_Length(other.m_Length)
+        , m_Data(std::move(other.m_Data))
+    {
+        other.m_Header = 0;
+        other.m_PacketFamily = 0;
+        other.m_PacketId = 0;
+        other.m_Length = 0;
+    }
+
+    ~Packet() = default;
+
+    Packet(const Packet& other) = delete;
+    Packet& operator=(const Packet&) = delete;
+    Packet& operator=(Packet&&) = default;
+
+    uint32_t GetHeader() const           { return m_Header;        }
+    uint32_t GetPacketFamily() const     { return m_PacketFamily;  }
+    uint32_t GetPacketId() const         { return m_PacketId;      }
+    uint32_t GetPacketClass() const      { return m_PacketId >> 3; }
+    uint32_t GetPacketType() const       { return m_PacketId & 7;  }
+    uint32_t GetLength() const           { return m_Length;        }
+    const unsigned char* GetData() const { return m_Data.get();    }
+
+    bool IsEmpty() { return m_Header == 0 && m_Length == 0; }
+
+private:
+    uint32_t m_Header;
+    uint32_t m_PacketFamily;
+    uint32_t m_PacketId;
+    uint32_t m_Length;
+    std::unique_ptr<unsigned char[]> m_Data;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/PacketVersionResolver.hpp b/arch/arm/ARMnn/profiling/common/include/PacketVersionResolver.hpp
new file mode 100644
index 0000000000..0ec7d2aca4
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/PacketVersionResolver.hpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "EncodeVersion.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class PacketKey final
+{
+public:
+    PacketKey(uint32_t familyId, uint32_t packetId) : m_FamilyId(familyId), m_PacketId(packetId) {}
+
+    uint32_t GetFamilyId() { return m_FamilyId; }
+    uint32_t GetPacketId() { return m_PacketId; }
+
+    bool operator< (const PacketKey& rhs) const;
+    bool operator> (const PacketKey& rhs) const;
+    bool operator<=(const PacketKey& rhs) const;
+    bool operator>=(const PacketKey& rhs) const;
+    bool operator==(const PacketKey& rhs) const;
+    bool operator!=(const PacketKey& rhs) const;
+
+private:
+    uint32_t m_FamilyId;
+    uint32_t m_PacketId;
+};
+
+static const PacketKey ActivateTimeLinePacket(0 , 6);
+static const PacketKey DeactivateTimeLinePacket(0 , 7);
+
+class PacketVersionResolver final
+{
+public:
+    PacketVersionResolver()  = default;
+    ~PacketVersionResolver() = default;
+
+    Version ResolvePacketVersion(uint32_t familyId, uint32_t packetId) const;
+};
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/ProfilingException.hpp b/arch/arm/ARMnn/profiling/common/include/ProfilingException.hpp
new file mode 100644
index 0000000000..abdc0dc025
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/ProfilingException.hpp
@@ -0,0 +1,82 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <stdexcept>
+#include <string>
+#include <sstream>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+struct Location
+{
+    const char* m_Function;
+    const char* m_File;
+    unsigned int m_Line;
+
+    Location(const char* func,
+             const char* file,
+             unsigned int line)
+    : m_Function{func}
+    , m_File{file}
+    , m_Line{line}
+    {
+    }
+
+    std::string AsString() const
+    {
+        std::stringstream ss;
+        ss << " at function " << m_Function
+           << " [" << m_File << ':' << m_Line << "]";
+        return ss.str();
+    }
+
+    std::string FileLine() const
+    {
+        std::stringstream ss;
+        ss << " [" << m_File << ':' << m_Line << "]";
+        return ss.str();
+    }
+};
+
+/// General Exception class for Profiling code
+class ProfilingException : public std::exception
+{
+public:
+    explicit ProfilingException(const std::string& message) : m_Message(message) {};
+
+    explicit ProfilingException(const std::string& message,
+                                const Location& location) : m_Message(message + location.AsString()) {};
+
+    /// @return - Error message of ProfilingException
+    virtual const char *what() const noexcept override
+    {
+         return m_Message.c_str();
+    }
+
+private:
+    std::string m_Message;
+};
+
+class TimeoutException : public ProfilingException
+{
+public:
+    using ProfilingException::ProfilingException;
+};
+
+class InvalidArgumentException : public ProfilingException
+{
+public:
+    using ProfilingException::ProfilingException;
+};
+
+} // namespace pipe
+} // namespace arm
+
+#define LOCATION() arm::pipe::Location(__func__, __FILE__, __LINE__)
diff --git a/arch/arm/ARMnn/profiling/common/include/ProfilingGuid.hpp b/arch/arm/ARMnn/profiling/common/include/ProfilingGuid.hpp
new file mode 100644
index 0000000000..4d0dd9b3c9
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/ProfilingGuid.hpp
@@ -0,0 +1,113 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <memory>
+#include <stdint.h>
+
+namespace armnn
+{
+
+namespace profiling
+{
+
+static constexpr uint64_t MIN_STATIC_GUID = 1llu << 63;
+
+class ProfilingGuid
+{
+public:
+    ProfilingGuid() : m_Guid(0) {}
+
+    ProfilingGuid(uint64_t guid) : m_Guid(guid) {}
+
+    operator uint64_t() const { return m_Guid; }
+
+    bool operator==(const ProfilingGuid& other) const
+    {
+        return m_Guid == other.m_Guid;
+    }
+
+    bool operator!=(const ProfilingGuid& other) const
+    {
+        return m_Guid != other.m_Guid;
+    }
+
+    bool operator<(const ProfilingGuid& other) const
+    {
+        return m_Guid < other.m_Guid;
+    }
+
+    bool operator<=(const ProfilingGuid& other) const
+    {
+        return m_Guid <= other.m_Guid;
+    }
+
+    bool operator>(const ProfilingGuid& other) const
+    {
+        return m_Guid > other.m_Guid;
+    }
+
+    bool operator>=(const ProfilingGuid& other) const
+    {
+        return m_Guid >= other.m_Guid;
+    }
+
+    protected:
+        uint64_t m_Guid;
+};
+
+/// Strongly typed guids to distinguish between those generated at runtime, and those that are statically defined.
+struct ProfilingDynamicGuid : public ProfilingGuid
+{
+    using ProfilingGuid::ProfilingGuid;
+};
+
+struct ProfilingStaticGuid : public ProfilingGuid
+{
+    using ProfilingGuid::ProfilingGuid;
+};
+
+} // namespace profiling
+
+
+
+} // namespace armnn
+
+
+
+namespace std
+{
+/// make ProfilingGuid hashable
+template <>
+struct hash<armnn::profiling::ProfilingGuid>
+{
+    std::size_t operator()(armnn::profiling::ProfilingGuid const& guid) const noexcept
+    {
+        return hash<uint64_t>()(uint64_t(guid));
+    }
+};
+
+/// make ProfilingDynamicGuid hashable
+template <>
+struct hash<armnn::profiling::ProfilingDynamicGuid>
+{
+    std::size_t operator()(armnn::profiling::ProfilingDynamicGuid const& guid) const noexcept
+    {
+        return hash<uint64_t>()(uint64_t(guid));
+    }
+};
+
+/// make ProfilingStaticGuid hashable
+template <>
+struct hash<armnn::profiling::ProfilingStaticGuid>
+{
+    std::size_t operator()(armnn::profiling::ProfilingStaticGuid const& guid) const noexcept
+    {
+        return hash<uint64_t>()(uint64_t(guid));
+    }
+};
+
+}  // namespace std
\ No newline at end of file
diff --git a/arch/arm/ARMnn/profiling/common/include/ProfilingGuidGenerator.hpp b/arch/arm/ARMnn/profiling/common/include/ProfilingGuidGenerator.hpp
new file mode 100644
index 0000000000..2b7302bd61
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/ProfilingGuidGenerator.hpp
@@ -0,0 +1,62 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "IProfilingGuidGenerator.hpp"
+#include "ProfilingGuid.hpp"
+
+#include <functional>
+#include <mutex>
+
+namespace armnn
+{
+
+namespace profiling
+{
+
+class ProfilingGuidGenerator : public IProfilingGuidGenerator
+{
+public:
+    /// Construct a generator with the default address space static/dynamic partitioning
+    ProfilingGuidGenerator() : m_Sequence(0) {}
+
+    /// Return the next random Guid in the sequence
+    inline ProfilingDynamicGuid NextGuid() override
+    {
+        std::lock_guard<std::mutex> sequencelock(m_SequenceMutex);
+        ProfilingDynamicGuid guid(m_Sequence);
+        m_Sequence++;
+        if (m_Sequence >= MIN_STATIC_GUID)
+        {
+            // Reset the sequence to 0 when it reaches the upper bound of dynamic guid
+            m_Sequence = 0;
+        }
+        return guid;
+    }
+
+    /// Create a ProfilingStaticGuid based on a hash of the string
+    inline ProfilingStaticGuid GenerateStaticId(const std::string& str) override
+    {
+        uint64_t staticHash = m_Hash(str) | MIN_STATIC_GUID;
+        return ProfilingStaticGuid(staticHash);
+    }
+
+    /// Reset the generator back to zero. Used mainly for test.
+    inline void Reset()
+    {
+        std::lock_guard<std::mutex> sequencelock(m_SequenceMutex);
+        m_Sequence = 0;
+    }
+
+private:
+    std::hash<std::string> m_Hash;
+    uint64_t m_Sequence;
+    std::mutex m_SequenceMutex;
+};
+
+} // namespace profiling
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/profiling/common/include/SocketConnectionException.hpp b/arch/arm/ARMnn/profiling/common/include/SocketConnectionException.hpp
new file mode 100644
index 0000000000..42b8d9d67e
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/SocketConnectionException.hpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+#include "NetworkSockets.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+/// Socket Connection Exception for profiling
+class SocketConnectionException : public std::exception
+{
+public:
+    explicit SocketConnectionException(const std::string &message, arm::pipe::Socket socket)
+        : m_Message(message), m_Socket(socket), m_ErrNo(-1) {};
+
+    explicit SocketConnectionException(const std::string &message, arm::pipe::Socket socket, int errNo)
+        : m_Message(message), m_Socket(socket), m_ErrNo(errNo) {};
+
+    /// @return - Error message of  SocketProfilingConnection
+    virtual const char *what() const noexcept override
+    {
+        return m_Message.c_str();
+    }
+
+    /// @return - Socket File Descriptor of SocketProfilingConnection
+    ///           or '-1', an invalid file descriptor
+    arm::pipe::Socket GetSocketFd() const noexcept
+    {
+        return m_Socket;
+    }
+
+    /// @return - errno of SocketProfilingConnection
+    int GetErrorNo() const noexcept
+    {
+        return m_ErrNo;
+    }
+
+private:
+    std::string m_Message;
+    arm::pipe::Socket m_Socket;
+    int m_ErrNo;
+};
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/SwTrace.hpp b/arch/arm/ARMnn/profiling/common/include/SwTrace.hpp
new file mode 100644
index 0000000000..5abc59b1a8
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/SwTrace.hpp
@@ -0,0 +1,139 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "NumericCast.hpp"
+
+#include <algorithm>
+#include <cstring>
+#include <string>
+#include <vector>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+struct SwTraceHeader
+{
+    uint8_t m_StreamVersion;
+    uint8_t m_PointerBytes;
+    uint8_t m_ThreadIdBytes;
+};
+
+struct SwTraceMessage
+{
+    uint32_t m_Id;
+    std::string m_Name;
+    std::string m_UiName;
+    std::vector<char> m_ArgTypes;
+    std::vector<std::string> m_ArgNames;
+};
+
+struct SwTraceCharPolicy
+{
+    static bool IsValidChar(unsigned char c)
+    {
+        // Check that the given character has ASCII 7-bit encoding
+        return c < 128;
+    }
+};
+
+struct SwTraceNameCharPolicy
+{
+    static bool IsValidChar(unsigned char c)
+    {
+        // Check that the given character has ASCII 7-bit encoding, alpha-numeric and underscore only
+        return c < 128 && (std::isalnum(c) || c == '_');
+    }
+};
+
+struct SwTraceTypeCharPolicy
+{
+    static bool IsValidChar(unsigned char c)
+    {
+        // Check that the given character is among the allowed ones
+        switch (c)
+        {
+        case '@':
+        case 't':
+        case 'i':
+        case 'I':
+        case 'l':
+        case 'L':
+        case 'F':
+        case 'p':
+        case 's':
+            return true; // Valid char
+        default:
+            return false; // Invalid char
+        }
+    }
+};
+
+template <typename SwTracePolicy>
+bool IsValidSwTraceString(const std::string& s)
+{
+    // Check that all the characters in the given string conform to the given policy
+    return std::all_of(s.begin(), s.end(), [](unsigned char c) { return SwTracePolicy::IsValidChar(c); });
+}
+
+template <typename SwTracePolicy>
+bool StringToSwTraceString(const std::string& s, std::vector<uint32_t>& outputBuffer)
+{
+    // Converts the given string to an SWTrace "string" (i.e. a string of "chars"), and writes it into
+    // the given buffer including the null-terminator. It also pads it to the next uint32_t if necessary
+
+    // Clear the output buffer
+    outputBuffer.clear();
+
+    // Check that the given string is a valid SWTrace "string" (i.e. a string of "chars")
+    if (!IsValidSwTraceString<SwTracePolicy>(s))
+    {
+        return false;
+    }
+
+    // Prepare the output buffer
+    size_t s_size        = s.size() + 1;    // The size of the string (in chars) plus the null-terminator
+    size_t uint32_t_size = sizeof(uint32_t);
+    // Output buffer size = StringLength (32 bit) + amount of complete 32bit words that fit into the string
+    //                      + an additional 32bit word if there are remaining chars to complete the string
+    //                      (The rest of the 32bit word is then filled with the NULL terminator)
+    size_t outBufferSize = 1 + (s_size / uint32_t_size) + (s_size % uint32_t_size != 0 ? 1 : 0);
+    outputBuffer.resize(outBufferSize, '\0');
+
+    // Write the SWTrace string to the output buffer
+    outputBuffer[0] = numeric_cast<uint32_t>(s_size);
+    std::memcpy(outputBuffer.data() + 1, s.data(), s_size);
+
+    return true;
+}
+
+template <typename SwTracePolicy,
+          typename SwTraceBuffer = std::vector<uint32_t>>
+bool ConvertDirectoryComponent(const std::string& directoryComponent, SwTraceBuffer& swTraceBuffer)
+{
+    // Convert the directory component using the given policy
+    SwTraceBuffer tempSwTraceBuffer;
+    bool result = StringToSwTraceString<SwTracePolicy>(directoryComponent, tempSwTraceBuffer);
+    if (!result)
+    {
+        return false;
+    }
+
+    swTraceBuffer.insert(swTraceBuffer.end(), tempSwTraceBuffer.begin(), tempSwTraceBuffer.end());
+
+    return true;
+}
+
+uint32_t CalculateSizeOfPaddedSwString(const std::string& str);
+
+SwTraceMessage ReadSwTraceMessage(const unsigned char*, unsigned int&, const unsigned int& packetLength);
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/include/WindowsWrapper.hpp b/arch/arm/ARMnn/profiling/common/include/WindowsWrapper.hpp
new file mode 100644
index 0000000000..d75fdc857a
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/include/WindowsWrapper.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+// This header brings in the Win32 API header, with some small modifications applied to prevent clashes with our code.
+
+#if defined(_MSC_VER)
+
+#define NOMINMAX    // Prevent definition of min/max macros that interfere with std::min/max
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+// Windows.h defines some names that we don't need and interfere with some of our definition
+#undef TIME_MS      // Instrument.hpp
+#undef CreateEvent  // ITimelineDecoder.hpp
+
+#endif
+
+#if defined(__MINGW32__)
+
+#define NOMINMAX    // Prevent definition of min/max macros that interfere with std::min/max
+#define WIN32_LEAN_AND_MEAN
+#define WINVER 0x0A00
+#define _WIN32_WINNT 0x0A00
+#include <windows.h>
+// Windows.h defines some names that we don't need and interfere with some of our definition
+#undef TIME_MS      // Instrument.hpp
+#undef CreateEvent  // ITimelineDecoder.hpp
+
+#endif
diff --git a/arch/arm/ARMnn/profiling/common/src/CMakeLists.txt b/arch/arm/ARMnn/profiling/common/src/CMakeLists.txt
new file mode 100644
index 0000000000..b4dc59f53d
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/CMakeLists.txt
@@ -0,0 +1,40 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+if(BUILD_TIMELINE_DECODER)
+    set(pipeCommon_sources)
+    list(APPEND pipeCommon_sources
+        CommandHandlerFunctor.cpp
+        CommandHandlerKey.cpp
+        CommandHandlerRegistry.cpp
+        CommonProfilingUtils.cpp
+        NetworkSockets.cpp
+        PacketVersionResolver.cpp
+        SwTrace.cpp
+        LabelsAndEventClasses.cpp)
+
+    include_directories(${PROJECT_SOURCE_DIR}/profiling/common/include)
+    include_directories(${PROJECT_SOURCE_DIR}/common/include)
+
+    if(BUILD_UNIT_TESTS)
+        include_directories(${PROJECT_SOURCE_DIR}/src/profiling
+                            ${PROJECT_SOURCE_DIR}/src/armnnUtils)
+        target_include_directories(UnitTests PRIVATE ${PROJECT_SOURCE_DIR}/profiling/common/include)
+    endif()
+
+    # will only build a static version of this common code
+    # to simplify the build. No extra .so file to deploy to boards etc.
+    add_library_ex(pipeCommon STATIC ${pipeCommon_sources})
+
+    target_compile_definitions(pipeCommon PRIVATE "ARMNN_COMPILING_DLL")
+
+    # install the target
+    install(
+        TARGETS pipeCommon
+        EXPORT  armnn-targets
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    )
+endif()
diff --git a/arch/arm/ARMnn/profiling/common/src/CommandHandlerFunctor.cpp b/arch/arm/ARMnn/profiling/common/src/CommandHandlerFunctor.cpp
new file mode 100644
index 0000000000..ea24cfb34e
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/CommandHandlerFunctor.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CommandHandlerFunctor.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+uint32_t CommandHandlerFunctor::GetFamilyId() const
+{
+    return m_FamilyId;
+}
+
+uint32_t CommandHandlerFunctor::GetPacketId() const
+{
+    return m_PacketId;
+}
+
+uint32_t CommandHandlerFunctor::GetVersion() const
+{
+    return m_Version;
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/src/CommandHandlerKey.cpp b/arch/arm/ARMnn/profiling/common/src/CommandHandlerKey.cpp
new file mode 100644
index 0000000000..98e4567348
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/CommandHandlerKey.cpp
@@ -0,0 +1,77 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CommandHandlerKey.hpp"
+
+namespace arm
+{
+
+namespace pipe
+{
+
+uint32_t CommandHandlerKey::GetFamilyId() const
+{
+    return m_FamilyId;
+}
+
+uint32_t CommandHandlerKey::GetPacketId() const
+{
+    return m_PacketId;
+}
+
+uint32_t CommandHandlerKey::GetVersion() const
+{
+    return m_Version;
+}
+
+bool CommandHandlerKey::operator<(const CommandHandlerKey& rhs) const
+{
+    bool result = true;
+    if (m_FamilyId == rhs.m_FamilyId)
+    {
+        if (m_PacketId == rhs.m_PacketId)
+        {
+            result = m_Version < rhs.m_Version;
+        }
+        else if (m_PacketId > rhs.m_PacketId)
+        {
+            result = false;
+        }
+    }
+    else if (m_FamilyId > rhs.m_FamilyId)
+    {
+        result = false;
+    }
+    return result;
+}
+
+bool CommandHandlerKey::operator>(const CommandHandlerKey& rhs) const
+{
+    return rhs < *this;
+}
+
+bool CommandHandlerKey::operator<=(const CommandHandlerKey& rhs) const
+{
+    return !(*this > rhs);
+}
+
+bool CommandHandlerKey::operator>=(const CommandHandlerKey& rhs) const
+{
+    return !(*this < rhs);
+}
+
+bool CommandHandlerKey::operator==(const CommandHandlerKey& rhs) const
+{
+    return m_FamilyId == rhs.m_FamilyId && m_PacketId == rhs.m_PacketId && m_Version == rhs.m_Version;
+}
+
+bool CommandHandlerKey::operator!=(const CommandHandlerKey& rhs) const
+{
+    return !(*this == rhs);
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/src/CommandHandlerRegistry.cpp b/arch/arm/ARMnn/profiling/common/src/CommandHandlerRegistry.cpp
new file mode 100644
index 0000000000..324737eda5
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/CommandHandlerRegistry.cpp
@@ -0,0 +1,61 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+
+#include <common/include/Assert.hpp>
+#include <common/include/CommandHandlerRegistry.hpp>
+
+#include <sstream>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void CommandHandlerRegistry::RegisterFunctor(CommandHandlerFunctor* functor,
+                                             uint32_t familyId,
+                                             uint32_t packetId,
+                                             uint32_t version)
+{
+    ARM_PIPE_ASSERT_MSG(functor, "Provided functor should not be a nullptr");
+
+    CommandHandlerKey key(familyId, packetId, version);
+    registry[key] = functor;
+}
+
+void CommandHandlerRegistry::RegisterFunctor(CommandHandlerFunctor* functor)
+{
+    ARM_PIPE_ASSERT_MSG(functor, "Provided functor should not be a nullptr");
+
+    RegisterFunctor(functor, functor->GetFamilyId(), functor->GetPacketId(), functor->GetVersion());
+}
+
+CommandHandlerFunctor* CommandHandlerRegistry::GetFunctor(uint32_t familyId,uint32_t packetId, uint32_t version) const
+{
+    CommandHandlerKey key(familyId, packetId, version);
+
+    // Check that the requested key exists
+    if (registry.find(key) == registry.end())
+    {
+        std::stringstream ss;
+        ss << "Functor with requested PacketId=" << packetId << " and Version=" << version << " does not exist";
+        throw ProfilingException(ss.str());
+    }
+
+    CommandHandlerFunctor* commandHandlerFunctor = registry.at(key);
+    if (commandHandlerFunctor == nullptr)
+    {
+        std::stringstream ss;
+        ss << "Invalid functor registered for PacketId=" << packetId << " and Version=" << version;
+        throw ProfilingException(ss.str());
+    }
+
+    return commandHandlerFunctor;
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/src/CommonProfilingUtils.cpp b/arch/arm/ARMnn/profiling/common/src/CommonProfilingUtils.cpp
new file mode 100644
index 0000000000..fe98e0aaa9
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/CommonProfilingUtils.cpp
@@ -0,0 +1,145 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <common/include/Assert.hpp>
+#include <common/include/CommonProfilingUtils.hpp>
+
+#include <sstream>
+
+namespace arm
+{
+
+namespace pipe
+{
+void ReadBytes(const unsigned char* buffer, unsigned int offset, unsigned int valueSize, uint8_t outValue[])
+{
+    ARM_PIPE_ASSERT(buffer);
+    ARM_PIPE_ASSERT(outValue);
+
+    for (unsigned int i = 0; i < valueSize; i++, offset++)
+    {
+        outValue[i] = static_cast<uint8_t>(buffer[offset]);
+    }
+}
+
+uint64_t ReadUint64(const unsigned char* buffer, unsigned int offset)
+{
+    ARM_PIPE_ASSERT(buffer);
+
+    uint64_t value = 0;
+    value  = static_cast<uint64_t>(buffer[offset]);
+    value |= static_cast<uint64_t>(buffer[offset + 1]) << 8;
+    value |= static_cast<uint64_t>(buffer[offset + 2]) << 16;
+    value |= static_cast<uint64_t>(buffer[offset + 3]) << 24;
+    value |= static_cast<uint64_t>(buffer[offset + 4]) << 32;
+    value |= static_cast<uint64_t>(buffer[offset + 5]) << 40;
+    value |= static_cast<uint64_t>(buffer[offset + 6]) << 48;
+    value |= static_cast<uint64_t>(buffer[offset + 7]) << 56;
+
+    return value;
+}
+
+uint32_t ReadUint32(const unsigned char* buffer, unsigned int offset)
+{
+    ARM_PIPE_ASSERT(buffer);
+
+    uint32_t value = 0;
+    value  = static_cast<uint32_t>(buffer[offset]);
+    value |= static_cast<uint32_t>(buffer[offset + 1]) << 8;
+    value |= static_cast<uint32_t>(buffer[offset + 2]) << 16;
+    value |= static_cast<uint32_t>(buffer[offset + 3]) << 24;
+    return value;
+}
+
+uint16_t ReadUint16(const unsigned char* buffer, unsigned int offset)
+{
+    ARM_PIPE_ASSERT(buffer);
+
+    uint32_t value = 0;
+    value  = static_cast<uint32_t>(buffer[offset]);
+    value |= static_cast<uint32_t>(buffer[offset + 1]) << 8;
+    return static_cast<uint16_t>(value);
+}
+
+uint8_t ReadUint8(const unsigned char* buffer, unsigned int offset)
+{
+    ARM_PIPE_ASSERT(buffer);
+
+    return buffer[offset];
+}
+
+void WriteBytes(unsigned char* buffer, unsigned int offset, const void* value, unsigned int valueSize)
+{
+    ARM_PIPE_ASSERT(buffer);
+    ARM_PIPE_ASSERT(value);
+
+    for (unsigned int i = 0; i < valueSize; i++, offset++)
+    {
+        buffer[offset] = *(reinterpret_cast<const unsigned char*>(value) + i);
+    }
+}
+
+void WriteUint64(unsigned char* buffer, unsigned int offset, uint64_t value)
+{
+    ARM_PIPE_ASSERT(buffer);
+
+    buffer[offset]     = static_cast<unsigned char>(value & 0xFF);
+    buffer[offset + 1] = static_cast<unsigned char>((value >> 8) & 0xFF);
+    buffer[offset + 2] = static_cast<unsigned char>((value >> 16) & 0xFF);
+    buffer[offset + 3] = static_cast<unsigned char>((value >> 24) & 0xFF);
+    buffer[offset + 4] = static_cast<unsigned char>((value >> 32) & 0xFF);
+    buffer[offset + 5] = static_cast<unsigned char>((value >> 40) & 0xFF);
+    buffer[offset + 6] = static_cast<unsigned char>((value >> 48) & 0xFF);
+    buffer[offset + 7] = static_cast<unsigned char>((value >> 56) & 0xFF);
+}
+
+void WriteUint32(unsigned char* buffer, unsigned int offset, uint32_t value)
+{
+    ARM_PIPE_ASSERT(buffer);
+
+    buffer[offset]     = static_cast<unsigned char>(value & 0xFF);
+    buffer[offset + 1] = static_cast<unsigned char>((value >> 8) & 0xFF);
+    buffer[offset + 2] = static_cast<unsigned char>((value >> 16) & 0xFF);
+    buffer[offset + 3] = static_cast<unsigned char>((value >> 24) & 0xFF);
+}
+
+void WriteUint16(unsigned char* buffer, unsigned int offset, uint16_t value)
+{
+    ARM_PIPE_ASSERT(buffer);
+
+    buffer[offset]     = static_cast<unsigned char>(value & 0xFF);
+    buffer[offset + 1] = static_cast<unsigned char>((value >> 8) & 0xFF);
+}
+
+void WriteUint8(unsigned char* buffer, unsigned int offset, uint8_t value)
+{
+    ARM_PIPE_ASSERT(buffer);
+
+    buffer[offset] = static_cast<unsigned char>(value);
+}
+
+std::string CentreAlignFormatting(const std::string& stringToPass, const int spacingWidth)
+{
+    std::stringstream outputStream, centrePadding;
+    int padding = spacingWidth - static_cast<int>(stringToPass.size());
+
+    for (int i = 0; i < padding / 2; ++i)
+    {
+        centrePadding << " ";
+    }
+
+    outputStream << centrePadding.str() << stringToPass << centrePadding.str();
+
+    if (padding > 0 && padding %2 != 0)
+    {
+        outputStream << " ";
+    }
+
+    return outputStream.str();
+}
+
+
+} // namespace pipe
+} // namespace arm
\ No newline at end of file
diff --git a/arch/arm/ARMnn/profiling/common/src/LabelsAndEventClasses.cpp b/arch/arm/ARMnn/profiling/common/src/LabelsAndEventClasses.cpp
new file mode 100644
index 0000000000..fe6d119c2f
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/LabelsAndEventClasses.cpp
@@ -0,0 +1,79 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "LabelsAndEventClasses.hpp"
+
+namespace armnn
+{
+
+namespace profiling
+{
+
+ProfilingGuidGenerator LabelsAndEventClasses::m_GuidGenerator;
+
+// Labels (string value + GUID)
+std::string LabelsAndEventClasses::EMPTY_LABEL("");
+std::string LabelsAndEventClasses::NAME_LABEL("name");
+std::string LabelsAndEventClasses::TYPE_LABEL("type");
+std::string LabelsAndEventClasses::INDEX_LABEL("index");
+std::string LabelsAndEventClasses::BACKENDID_LABEL("backendId");
+std::string LabelsAndEventClasses::CHILD_LABEL("child");
+std::string LabelsAndEventClasses::EXECUTION_OF_LABEL("execution_of");
+std::string LabelsAndEventClasses::PROCESS_ID_LABEL("processId");
+
+ProfilingStaticGuid LabelsAndEventClasses::EMPTY_GUID(0);
+ProfilingStaticGuid LabelsAndEventClasses::NAME_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::NAME_LABEL));
+ProfilingStaticGuid LabelsAndEventClasses::TYPE_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::TYPE_LABEL));
+ProfilingStaticGuid LabelsAndEventClasses::INDEX_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::INDEX_LABEL));
+ProfilingStaticGuid LabelsAndEventClasses::BACKENDID_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::BACKENDID_LABEL));
+ProfilingStaticGuid LabelsAndEventClasses::CHILD_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::CHILD_LABEL));
+ProfilingStaticGuid LabelsAndEventClasses::EXECUTION_OF_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::EXECUTION_OF_LABEL));
+ProfilingStaticGuid LabelsAndEventClasses::PROCESS_ID_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::PROCESS_ID_LABEL));
+
+// Common types
+std::string LabelsAndEventClasses::LAYER("layer");
+std::string LabelsAndEventClasses::WORKLOAD("workload");
+std::string LabelsAndEventClasses::NETWORK("network");
+std::string LabelsAndEventClasses::CONNECTION("connection");
+std::string LabelsAndEventClasses::INFERENCE("inference");
+std::string LabelsAndEventClasses::WORKLOAD_EXECUTION("workload_execution");
+
+ProfilingStaticGuid LabelsAndEventClasses::LAYER_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::LAYER));
+ProfilingStaticGuid LabelsAndEventClasses::WORKLOAD_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::WORKLOAD));
+ProfilingStaticGuid LabelsAndEventClasses::NETWORK_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::NETWORK));
+ProfilingStaticGuid LabelsAndEventClasses::CONNECTION_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::CONNECTION));
+ProfilingStaticGuid LabelsAndEventClasses::INFERENCE_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::INFERENCE));
+ProfilingStaticGuid LabelsAndEventClasses::WORKLOAD_EXECUTION_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::WORKLOAD_EXECUTION));
+
+// Event Class GUIDs
+// Start of Life (SOL)
+std::string LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS_NAME("start_of_life");
+ProfilingStaticGuid LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS_NAME_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS_NAME));
+ProfilingStaticGuid LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS(
+    m_GuidGenerator.GenerateStaticId("ARMNN_PROFILING_SOL"));
+// End of Life (EOL)
+std::string LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS_NAME("end_of_life");
+ProfilingStaticGuid LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS_NAME_GUID(
+    m_GuidGenerator.GenerateStaticId(LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS_NAME));
+ProfilingStaticGuid LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS(
+    m_GuidGenerator.GenerateStaticId("ARMNN_PROFILING_EOL"));
+
+} // namespace profiling
+
+} // namespace armnn
diff --git a/arch/arm/ARMnn/profiling/common/src/NetworkSockets.cpp b/arch/arm/ARMnn/profiling/common/src/NetworkSockets.cpp
new file mode 100644
index 0000000000..15ad087d1e
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/NetworkSockets.cpp
@@ -0,0 +1,114 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <common/include/NetworkSockets.hpp>
+
+#if defined(__unix__) || defined(__APPLE__)
+#include <unistd.h>
+#include <fcntl.h>
+#endif
+
+#include <common/include/Conversion.hpp>
+#include <common/include/IgnoreUnused.hpp>
+#include <common/include/NumericCast.hpp>
+
+namespace arm
+{
+namespace pipe
+{
+
+bool Initialize()
+{
+#if defined(__unix__) || defined(__APPLE__)
+    return true;
+#elif defined(_MSC_VER) || defined(__MINGW32__)
+    WSADATA wsaData;
+    return WSAStartup(MAKEWORD(2, 2), &wsaData) == 0;
+#endif
+}
+
+int Close(Socket s)
+{
+#if defined(__unix__) || defined(__APPLE__)
+    return close(s);
+#elif defined(_MSC_VER) || defined(__MINGW32__)
+    return closesocket(s);
+#endif
+}
+
+
+bool SetNonBlocking(Socket s)
+{
+#if defined(__unix__) || defined(__APPLE__)
+    const int currentFlags = fcntl(s, F_GETFL);
+    return fcntl(s, F_SETFL, currentFlags | O_NONBLOCK) == 0;
+#elif defined(_MSC_VER)
+    u_long mode = 1;
+    return ioctlsocket(s, FIONBIO, &mode) == 0;
+#elif defined(__MINGW32__)
+    u_long mode = 1;
+    return ioctlsocket(s, arm::pipe::numeric_cast<long>(FIONBIO), &mode) == 0;
+#endif
+}
+
+
+long Write(Socket s, const void* buf, size_t len)
+{
+#if defined(__unix__) || defined(__APPLE__)
+    return write(s, buf, len);
+#elif defined(_MSC_VER) || defined(__MINGW32__)
+    return send(s, static_cast<const char*>(buf), static_cast<int>(len), 0);
+#endif
+}
+
+
+long Read(Socket s, void* buf, size_t len)
+{
+#if defined(__unix__) || defined(__APPLE__)
+    return read(s, buf, len);
+#elif defined(_MSC_VER) || defined(__MINGW32__)
+    return recv(s, static_cast<char*>(buf), static_cast<int>(len), 0);
+#endif
+}
+
+int Ioctl(Socket s, unsigned long int cmd, void* arg)
+{
+#if defined(__unix__) || defined(__APPLE__)
+    ARM_PIPE_NO_CONVERSION_WARN_BEGIN
+    return ioctl(s, static_cast<int>(cmd), arg);
+    ARM_PIPE_NO_CONVERSION_WARN_END
+#elif defined(_MSC_VER) || defined(__MINGW32__)
+    ARM_PIPE_NO_CONVERSION_WARN_BEGIN
+    return ioctlsocket(s, cmd, static_cast<u_long*>(arg));
+    ARM_PIPE_NO_CONVERSION_WARN_END
+#endif
+}
+
+
+int Poll(PollFd* fds, nfds_t numFds, int timeout)
+{
+#if defined(__unix__) || defined(__APPLE__)
+    return poll(fds, numFds, timeout);
+#elif defined(_MSC_VER) || defined(__MINGW32__)
+    return WSAPoll(fds, arm::pipe::numeric_cast<unsigned long>(numFds), timeout);
+#endif
+}
+
+
+arm::pipe::Socket Accept(Socket s, sockaddr* addr, socklen_t* addrlen, int flags)
+{
+#if defined(__unix__)
+    return accept4(s, addr, addrlen, flags);
+#elif defined(__APPLE__)
+    IgnoreUnused(flags);
+    return accept(s, addr, addrlen);
+#elif defined(_MSC_VER) || defined(__MINGW32__)
+    IgnoreUnused(flags);
+    return accept(s, addr, reinterpret_cast<int*>(addrlen));
+#endif
+}
+
+} // pipe
+} // arm
diff --git a/arch/arm/ARMnn/profiling/common/src/PacketVersionResolver.cpp b/arch/arm/ARMnn/profiling/common/src/PacketVersionResolver.cpp
new file mode 100644
index 0000000000..25d92b0c5d
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/PacketVersionResolver.cpp
@@ -0,0 +1,71 @@
+//
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <common/include/PacketVersionResolver.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+bool PacketKey::operator<(const PacketKey& rhs) const
+{
+    bool result = true;
+    if (m_FamilyId == rhs.m_FamilyId)
+    {
+            result = m_PacketId < rhs.m_PacketId;
+    }
+    else if (m_FamilyId > rhs.m_FamilyId)
+    {
+        result = false;
+    }
+    return result;
+}
+
+bool PacketKey::operator>(const PacketKey& rhs) const
+{
+    return rhs < *this;
+}
+
+bool PacketKey::operator<=(const PacketKey& rhs) const
+{
+    return !(*this > rhs);
+}
+
+bool PacketKey::operator>=(const PacketKey& rhs) const
+{
+    return !(*this < rhs);
+}
+
+bool PacketKey::operator==(const PacketKey& rhs) const
+{
+    return m_FamilyId == rhs.m_FamilyId && m_PacketId == rhs.m_PacketId;
+}
+
+bool PacketKey::operator!=(const PacketKey& rhs) const
+{
+    return !(*this == rhs);
+}
+
+Version PacketVersionResolver::ResolvePacketVersion(uint32_t familyId, uint32_t packetId) const
+{
+    const PacketKey packetKey(familyId, packetId);
+
+    if( packetKey == ActivateTimeLinePacket )
+    {
+        return Version(1, 1, 0);
+    }
+    if( packetKey == DeactivateTimeLinePacket )
+    {
+        return Version(1, 1, 0);
+    }
+
+    return Version(1, 0, 0);
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/common/src/SwTrace.cpp b/arch/arm/ARMnn/profiling/common/src/SwTrace.cpp
new file mode 100644
index 0000000000..5860d8cf06
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/common/src/SwTrace.cpp
@@ -0,0 +1,128 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <common/include/CommonProfilingUtils.hpp>
+#include <common/include/NumericCast.hpp>
+#include <common/include/ProfilingException.hpp>
+#include <common/include/SwTrace.hpp>
+
+#include <sstream>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+// Calculate the actual length an SwString will be including the terminating null character
+// padding to bring it to the next uint32_t boundary but minus the leading uint32_t encoding
+// the size to allow the offset to be correctly updated when decoding a binary packet.
+uint32_t CalculateSizeOfPaddedSwString(const std::string& str)
+{
+    std::vector<uint32_t> swTraceString;
+    StringToSwTraceString<SwTraceCharPolicy>(str, swTraceString);
+    unsigned int uint32_t_size = sizeof(uint32_t);
+    uint32_t size = (numeric_cast<uint32_t>(swTraceString.size()) - 1) * uint32_t_size;
+    return size;
+}
+
+// Read TimelineMessageDirectoryPacket from given IPacketBuffer and offset
+SwTraceMessage ReadSwTraceMessage(const unsigned char* packetBuffer,
+                                  unsigned int& offset,
+                                  const unsigned int& packetLength)
+{
+    ARM_PIPE_ASSERT(packetBuffer);
+
+    unsigned int uint32_t_size = sizeof(uint32_t);
+
+    SwTraceMessage swTraceMessage;
+
+    // Read the decl_id
+    uint32_t readDeclId = ReadUint32(packetBuffer, offset);
+    swTraceMessage.m_Id = readDeclId;
+
+    // SWTrace "namestring" format
+    // length of the string (first 4 bytes) + string + null terminator
+
+    // Check the decl_name
+    offset += uint32_t_size;
+    uint32_t swTraceDeclNameLength = ReadUint32(packetBuffer, offset);
+
+    if (swTraceDeclNameLength == 0 || swTraceDeclNameLength > packetLength)
+    {
+        throw arm::pipe::ProfilingException("Error swTraceDeclNameLength is an invalid size", LOCATION());
+    }
+
+    offset += uint32_t_size;
+    std::vector<unsigned char> swTraceStringBuffer(swTraceDeclNameLength - 1);
+    std::memcpy(swTraceStringBuffer.data(),
+                packetBuffer + offset, swTraceStringBuffer.size());
+
+    swTraceMessage.m_Name.assign(swTraceStringBuffer.begin(), swTraceStringBuffer.end()); // name
+
+    // Check the ui_name
+    offset += CalculateSizeOfPaddedSwString(swTraceMessage.m_Name);
+    uint32_t swTraceUINameLength = ReadUint32(packetBuffer, offset);
+
+    if (swTraceUINameLength == 0 || swTraceUINameLength > packetLength)
+    {
+        throw arm::pipe::ProfilingException("Error swTraceUINameLength is an invalid size", LOCATION());
+    }
+
+    offset += uint32_t_size;
+    swTraceStringBuffer.resize(swTraceUINameLength - 1);
+    std::memcpy(swTraceStringBuffer.data(),
+                packetBuffer  + offset, swTraceStringBuffer.size());
+
+    swTraceMessage.m_UiName.assign(swTraceStringBuffer.begin(), swTraceStringBuffer.end()); // ui_name
+
+    // Check arg_types
+    offset += CalculateSizeOfPaddedSwString(swTraceMessage.m_UiName);
+    uint32_t swTraceArgTypesLength = ReadUint32(packetBuffer, offset);
+
+    if (swTraceArgTypesLength == 0 || swTraceArgTypesLength > packetLength)
+    {
+        throw arm::pipe::ProfilingException("Error swTraceArgTypesLength is an invalid size", LOCATION());
+    }
+
+    offset += uint32_t_size;
+    swTraceStringBuffer.resize(swTraceArgTypesLength - 1);
+    std::memcpy(swTraceStringBuffer.data(),
+                packetBuffer  + offset, swTraceStringBuffer.size());
+
+    swTraceMessage.m_ArgTypes.assign(swTraceStringBuffer.begin(), swTraceStringBuffer.end()); // arg_types
+
+    std::string swTraceString(swTraceStringBuffer.begin(), swTraceStringBuffer.end());
+
+    // Check arg_names
+    offset += CalculateSizeOfPaddedSwString(swTraceString);
+    uint32_t swTraceArgNamesLength = ReadUint32(packetBuffer, offset);
+
+    if (swTraceArgNamesLength == 0 || swTraceArgNamesLength > packetLength)
+    {
+        throw arm::pipe::ProfilingException("Error swTraceArgNamesLength is an invalid size", LOCATION());
+    }
+
+    offset += uint32_t_size;
+    swTraceStringBuffer.resize(swTraceArgNamesLength - 1);
+    std::memcpy(swTraceStringBuffer.data(),
+                packetBuffer  + offset, swTraceStringBuffer.size());
+
+    swTraceString.assign(swTraceStringBuffer.begin(), swTraceStringBuffer.end());
+    std::stringstream stringStream(swTraceString);
+    std::string argName;
+    while (std::getline(stringStream, argName, ','))
+    {
+        swTraceMessage.m_ArgNames.push_back(argName);
+    }
+
+    offset += CalculateSizeOfPaddedSwString(swTraceString);
+
+    return swTraceMessage;
+}
+
+} // namespace pipe
+
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/include/basePipeServer/BasePipeServer.hpp b/arch/arm/ARMnn/profiling/server/include/basePipeServer/BasePipeServer.hpp
new file mode 100644
index 0000000000..1b6dec54ff
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/include/basePipeServer/BasePipeServer.hpp
@@ -0,0 +1,120 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <common/include/NetworkSockets.hpp>
+#include <common/include/Packet.hpp>
+#include <common/include/SocketConnectionException.hpp>
+
+#include <string>
+#include <atomic>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+enum class TargetEndianness
+{
+    BeWire,
+    LeWire
+};
+
+enum class PacketDirection
+{
+    Sending,
+    ReceivedHeader,
+    ReceivedData
+};
+class ConnectionHandler;
+
+class BasePipeServer
+{
+
+public:
+
+    BasePipeServer(arm::pipe::Socket clientConnection, bool echoPackets)
+            : m_ClientConnection(clientConnection)
+            , m_EchoPackets(echoPackets)
+            {}
+
+    ~BasePipeServer()
+    {
+        // We have set SOCK_CLOEXEC on this socket but we'll close it to be good citizens.
+        arm::pipe::Close(m_ClientConnection);
+    }
+
+    BasePipeServer(const BasePipeServer&) = delete;
+    BasePipeServer& operator=(const BasePipeServer&) = delete;
+
+    BasePipeServer(BasePipeServer&&) = delete;
+    BasePipeServer& operator=(BasePipeServer&&) = delete;
+
+    /// Close the client connection
+    /// @return 0 if successful
+    int Close()
+    {
+        return arm::pipe::Close(m_ClientConnection);
+    }
+
+    /// Send a packet to the client
+    /// @return true if a valid packet has been sent.
+    bool SendPacket(uint32_t packetFamily, uint32_t packetId, const uint8_t* data, uint32_t dataLength);
+
+    /// Set the client socket to nonblocking
+    /// @return true if successful.
+    bool SetNonBlocking()
+    {
+        return arm::pipe::SetNonBlocking(m_ClientConnection);
+    }
+
+    /// Block on the client connection until a complete packet has been received.
+    /// @return true if a valid packet has been received.
+    arm::pipe::Packet WaitForPacket(uint32_t timeoutMs);
+
+    /// Once the connection is open wait to receive the stream meta data packet from the client. Reading this
+    /// packet differs from others as we need to determine endianness.
+    /// @return true only if a valid stream meta data packet has been received.
+    bool WaitForStreamMetaData();
+
+    uint32_t GetStreamMetadataVersion()
+    {
+        return m_StreamMetaDataVersion;
+    }
+
+    uint32_t GetStreamMetadataMaxDataLen()
+    {
+        return m_StreamMetaDataMaxDataLen;
+    }
+
+    uint32_t GetStreamMetadataPid()
+    {
+        return m_StreamMetaDataPid;
+    }
+
+private:
+
+    void EchoPacket(PacketDirection direction, uint8_t* packet, size_t lengthInBytes);
+    bool ReadFromSocket(uint8_t* packetData, uint32_t expectedLength);
+    bool ReadHeader(uint32_t headerAsWords[2]);
+
+    arm::pipe::Packet ReceivePacket();
+
+    uint32_t ToUint32(uint8_t* data, TargetEndianness endianness);
+    void InsertU32(uint32_t value, uint8_t* data, TargetEndianness endianness);
+
+    arm::pipe::Socket m_ClientConnection;
+    bool m_EchoPackets;
+    TargetEndianness m_Endianness;
+
+    uint32_t m_StreamMetaDataVersion;
+    uint32_t m_StreamMetaDataMaxDataLen;
+    uint32_t m_StreamMetaDataPid;
+};
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/include/basePipeServer/ConnectionHandler.hpp b/arch/arm/ARMnn/profiling/server/include/basePipeServer/ConnectionHandler.hpp
new file mode 100644
index 0000000000..4859fced0d
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/include/basePipeServer/ConnectionHandler.hpp
@@ -0,0 +1,48 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "BasePipeServer.hpp"
+#include <string>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ConnectionHandler
+{
+public:
+    /// Constructor establishes the Unix domain socket and sets it to listen for connections.
+    /// @param udsNamespace the namespace (socket address) associated with the listener.
+    /// @throws SocketConnectionException if the socket has been incorrectly setup.
+    ConnectionHandler(const std::string& udsNamespace, const bool setNonBlocking);
+
+    ~ConnectionHandler()
+    {
+        // We have set SOCK_CLOEXEC on this socket but we'll close it to be good citizens.
+        arm::pipe::Close(m_ListeningSocket);
+    }
+
+    ConnectionHandler(const ConnectionHandler&) = delete;
+    ConnectionHandler& operator=(const ConnectionHandler&) = delete;
+
+    ConnectionHandler(ConnectionHandler&&) = delete;
+    ConnectionHandler& operator=(ConnectionHandler&&) = delete;
+
+    /// Attempt to open a new socket to the client and use it to construct a new basePipeServer
+    /// @param echoPackets if true the raw packets will be printed to stdout.
+    /// @return if successful a unique_ptr to a basePipeServer otherwise a nullptr
+    std::unique_ptr<BasePipeServer> GetNewBasePipeServer(const bool echoPackets);
+
+private:
+
+    arm::pipe::Socket m_ListeningSocket;
+};
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/include/timelineDecoder/ITimelineDecoder.hpp b/arch/arm/ARMnn/profiling/server/include/timelineDecoder/ITimelineDecoder.hpp
new file mode 100644
index 0000000000..18b8cc7006
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/include/timelineDecoder/ITimelineDecoder.hpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class ITimelineDecoder
+{
+
+public:
+
+    enum class TimelineStatus
+    {
+        TimelineStatus_Success,
+        TimelineStatus_Fail
+    };
+
+    enum class RelationshipType
+    {
+        RetentionLink, /// Head retains(parents) Tail
+        ExecutionLink, /// Head execution start depends on Tail execution completion
+        DataLink,      /// Head uses data of Tail
+        LabelLink      /// Head uses label Tail (Tail MUST be a guid of a label).
+    };
+
+    static char const* GetRelationshipAsCString(RelationshipType rType)
+    {
+        switch (rType)
+        {
+            case RelationshipType::RetentionLink: return "RetentionLink";
+            case RelationshipType::ExecutionLink: return "ExecutionLink";
+            case RelationshipType::DataLink: return "DataLink";
+            case RelationshipType::LabelLink: return "LabelLink";
+            default: return "Unknown";
+        }
+    }
+
+    struct Entity
+    {
+        uint64_t m_Guid;
+    };
+
+    struct EventClass
+    {
+        uint64_t m_Guid;
+        uint64_t m_NameGuid;
+    };
+
+    struct Event
+    {
+        uint64_t m_Guid;
+        uint64_t m_TimeStamp;
+        uint64_t m_ThreadId;
+    };
+
+    struct Label
+    {
+        uint64_t m_Guid;
+        std::string m_Name;
+    };
+
+    struct Relationship
+    {
+        RelationshipType m_RelationshipType;
+        uint64_t m_Guid;
+        uint64_t m_HeadGuid;
+        uint64_t m_TailGuid;
+        uint64_t m_AttributeGuid;
+    };
+
+    virtual ~ITimelineDecoder() = default;
+
+    virtual TimelineStatus CreateEntity(const Entity&) = 0;
+    virtual TimelineStatus CreateEventClass(const EventClass&) = 0;
+    virtual TimelineStatus CreateEvent(const Event&) = 0;
+    virtual TimelineStatus CreateLabel(const Label&) = 0;
+    virtual TimelineStatus CreateRelationship(const Relationship&) = 0;
+};
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineCaptureCommandHandler.hpp b/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineCaptureCommandHandler.hpp
new file mode 100644
index 0000000000..c51cfd32cd
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineCaptureCommandHandler.hpp
@@ -0,0 +1,63 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ITimelineDecoder.hpp"
+
+#include <common/include/CommandHandlerFunctor.hpp>
+#include <common/include/Packet.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class TimelineCaptureCommandHandler : public arm::pipe::CommandHandlerFunctor
+{
+    // Utils
+    uint32_t uint32_t_size = sizeof(uint32_t);
+    uint32_t uint64_t_size = sizeof(uint64_t);
+
+    using ReadFunction = ITimelineDecoder::TimelineStatus (TimelineCaptureCommandHandler::*)(
+        const unsigned char*, uint32_t&);
+
+public:
+    TimelineCaptureCommandHandler(uint32_t familyId,
+                                  uint32_t packetId,
+                                  uint32_t version,
+                                  ITimelineDecoder& timelineDecoder,
+                                  uint32_t threadIdSize = 0)
+        : CommandHandlerFunctor(familyId, packetId, version)
+        , m_TimelineDecoder(timelineDecoder)
+        , m_ThreadIdSize(threadIdSize)
+        , m_PacketLength(0)
+    {}
+
+    void operator()(const arm::pipe::Packet& packet) override;
+
+
+    void SetThreadIdSize(uint32_t size);
+
+private:
+    void ParseData(const arm::pipe::Packet& packet);
+
+    ITimelineDecoder::TimelineStatus ReadLabel(const unsigned char* data, uint32_t& offset);
+    ITimelineDecoder::TimelineStatus ReadEntity(const unsigned char* data, uint32_t& offset);
+    ITimelineDecoder::TimelineStatus ReadEventClass(const unsigned char* data, uint32_t& offset);
+    ITimelineDecoder::TimelineStatus ReadRelationship(const unsigned char* data, uint32_t& offset);
+    ITimelineDecoder::TimelineStatus ReadEvent(const unsigned char* data, uint32_t& offset);
+
+    ITimelineDecoder& m_TimelineDecoder;
+    uint32_t m_ThreadIdSize;
+    unsigned int              m_PacketLength;
+    static const ReadFunction m_ReadFunctions[];
+
+};
+
+} //namespace pipe
+
+} //namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineDecoder.hpp b/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineDecoder.hpp
new file mode 100644
index 0000000000..9776ec91f9
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineDecoder.hpp
@@ -0,0 +1,79 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "ITimelineDecoder.hpp"
+
+#include <mutex>
+#include <utility>
+#include <vector>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class TimelineDecoder : public ITimelineDecoder
+{
+
+public:
+
+    struct Model
+    {
+        std::vector<Entity> m_Entities;
+        std::vector<EventClass> m_EventClasses;
+        std::vector<Event> m_Events;
+        std::vector<Label> m_Labels;
+        std::vector<Relationship> m_Relationships;
+    };
+
+    using OnNewEntityCallback       =  void (*)(Model &, const Entity);
+    using OnNewEventClassCallback   =  void (*)(Model &, const EventClass);
+    using OnNewEventCallback        =  void (*)(Model &, const Event);
+    using OnNewLabelCallback        =  void (*)(Model &, const Label);
+    using OnNewRelationshipCallback =  void (*)(Model &, const Relationship);
+
+    virtual TimelineStatus CreateEntity(const Entity &) override;
+    virtual TimelineStatus CreateEventClass(const EventClass &) override;
+    virtual TimelineStatus CreateEvent(const Event &) override;
+    virtual TimelineStatus CreateLabel(const Label &) override;
+    virtual TimelineStatus CreateRelationship(const Relationship &) override;
+
+    template<class F>
+    decltype(auto) ApplyToModel(F&& f){
+        std::lock_guard<std::mutex> lock(m_ModelMutex);
+        return f(m_Model);
+    }
+
+    TimelineStatus SetEntityCallback(const OnNewEntityCallback);
+    TimelineStatus SetEventClassCallback(const OnNewEventClassCallback);
+    TimelineStatus SetEventCallback(const OnNewEventCallback);
+    TimelineStatus SetLabelCallback(const OnNewLabelCallback);
+    TimelineStatus SetRelationshipCallback(const OnNewRelationshipCallback);
+
+    void SetDefaultCallbacks();
+
+    void print();
+
+private:
+    Model m_Model;
+    std::mutex m_ModelMutex;
+
+    OnNewEntityCallback m_OnNewEntityCallback;
+    OnNewEventClassCallback m_OnNewEventClassCallback;
+    OnNewEventCallback m_OnNewEventCallback;
+    OnNewLabelCallback m_OnNewLabelCallback;
+    OnNewRelationshipCallback m_OnNewRelationshipCallback;
+
+    void printLabels();
+    void printEntities();
+    void printEventClasses();
+    void printRelationships();
+    void printEvents();
+};
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineDirectoryCaptureCommandHandler.hpp b/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineDirectoryCaptureCommandHandler.hpp
new file mode 100644
index 0000000000..826ee1f10d
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/include/timelineDecoder/TimelineDirectoryCaptureCommandHandler.hpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TimelineCaptureCommandHandler.hpp"
+
+#include <common/include/SwTrace.hpp>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+class TimelineDirectoryCaptureCommandHandler : public arm::pipe::CommandHandlerFunctor
+{
+    // Utils
+    uint32_t uint8_t_size  = sizeof(uint8_t);
+    uint32_t uint32_t_size = sizeof(uint32_t);
+
+public:
+    TimelineDirectoryCaptureCommandHandler(uint32_t familyId,
+                                           uint32_t packetId,
+                                           uint32_t version,
+                                           TimelineCaptureCommandHandler& timelineCaptureCommandHandler,
+                                           bool quietOperation = false)
+        : CommandHandlerFunctor(familyId, packetId, version)
+        , m_TimelineCaptureCommandHandler(timelineCaptureCommandHandler)
+        , m_QuietOperation(quietOperation)
+    {}
+
+    void operator()(const arm::pipe::Packet& packet) override;
+
+    arm::pipe::SwTraceHeader m_SwTraceHeader;
+    std::vector<arm::pipe::SwTraceMessage> m_SwTraceMessages;
+
+private:
+    void ParseData(const arm::pipe::Packet& packet);
+    void Print();
+
+    TimelineCaptureCommandHandler& m_TimelineCaptureCommandHandler;
+    bool m_QuietOperation;
+};
+
+} //namespace pipe
+
+} //namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/src/basePipeServer/BasePipeServer.cpp b/arch/arm/ARMnn/profiling/server/src/basePipeServer/BasePipeServer.cpp
new file mode 100644
index 0000000000..81f58a5ee9
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/basePipeServer/BasePipeServer.cpp
@@ -0,0 +1,302 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <server/include/basePipeServer/BasePipeServer.hpp>
+
+#include <common/include/Constants.hpp>
+#include <common/include/NumericCast.hpp>
+
+#include <iostream>
+#include <vector>
+#include <iomanip>
+#include <string.h>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+bool BasePipeServer::ReadFromSocket(uint8_t* packetData, uint32_t expectedLength)
+{
+    // This is a blocking read until either expectedLength has been received or an error is detected.
+    long totalBytesRead = 0;
+    while (arm::pipe::numeric_cast<uint32_t>(totalBytesRead) < expectedLength)
+    {
+        long bytesRead = arm::pipe::Read(m_ClientConnection, packetData, expectedLength);
+        if (bytesRead < 0)
+        {
+            std::cerr << ": Failure when reading from client socket: " << strerror(errno) << std::endl;
+            return false;
+        }
+        if (bytesRead == 0)
+        {
+            std::cerr << ": EOF while reading from client socket." << std::endl;
+            return false;
+        }
+        totalBytesRead += bytesRead;
+    }
+    return true;
+};
+
+bool BasePipeServer::WaitForStreamMetaData()
+{
+    if (m_EchoPackets)
+    {
+        std::cout << "Waiting for stream meta data..." << std::endl;
+    }
+    // The start of the stream metadata is 2x32bit words, 0 and packet length.
+    uint8_t header[8];
+    if (!ReadFromSocket(header, 8))
+    {
+        return false;
+    }
+    EchoPacket(PacketDirection::ReceivedHeader, header, 8);
+    // The first word, stream_metadata_identifer, should always be 0.
+    if (ToUint32(&header[0], TargetEndianness::BeWire) != 0)
+    {
+        std::cerr << ": Protocol error. The stream_metadata_identifer was not 0." << std::endl;
+        return false;
+    }
+
+    uint8_t pipeMagic[4];
+    if (!ReadFromSocket(pipeMagic, 4))
+    {
+        return false;
+    }
+    EchoPacket(PacketDirection::ReceivedData, pipeMagic, 4);
+
+    // Before we interpret the length we need to read the pipe_magic word to determine endianness.
+    if (ToUint32(&pipeMagic[0], TargetEndianness::BeWire) == PIPE_MAGIC)
+    {
+        m_Endianness = TargetEndianness::BeWire;
+    }
+    else if (ToUint32(&pipeMagic[0], TargetEndianness::LeWire) == PIPE_MAGIC)
+    {
+        m_Endianness = TargetEndianness::LeWire;
+    }
+    else
+    {
+        std::cerr << ": Protocol read error. Unable to read the PIPE_MAGIC value." << std::endl;
+        return false;
+    }
+    // Now we know the endianness we can get the length from the header.
+    // Remember we already read the pipe magic 4 bytes.
+    uint32_t metaDataLength = ToUint32(&header[4], m_Endianness) - 4;
+    // Read the entire packet.
+    std::vector<uint8_t> packetData(metaDataLength);
+    if (metaDataLength !=
+        arm::pipe::numeric_cast<uint32_t>(arm::pipe::Read(m_ClientConnection, packetData.data(), metaDataLength)))
+    {
+        std::cerr << ": Protocol read error. Data length mismatch." << std::endl;
+        return false;
+    }
+    EchoPacket(PacketDirection::ReceivedData, packetData.data(), metaDataLength);
+    m_StreamMetaDataVersion    = ToUint32(&packetData[0], m_Endianness);
+    m_StreamMetaDataMaxDataLen = ToUint32(&packetData[4], m_Endianness);
+    m_StreamMetaDataPid        = ToUint32(&packetData[8], m_Endianness);
+
+    return true;
+}
+
+arm::pipe::Packet BasePipeServer::WaitForPacket(uint32_t timeoutMs)
+{
+    // Is there currently more than a headers worth of data waiting to be read?
+    int bytes_available;
+    arm::pipe::Ioctl(m_ClientConnection, FIONREAD, &bytes_available);
+    if (bytes_available > 8)
+    {
+        // Yes there is. Read it:
+        return ReceivePacket();
+    }
+    else
+    {
+        // No there's not. Poll for more data.
+        struct pollfd pollingFd[1]{};
+        pollingFd[0].fd = m_ClientConnection;
+        int pollResult  = arm::pipe::Poll(pollingFd, 1, static_cast<int>(timeoutMs));
+
+        switch (pollResult)
+        {
+            // Error
+            case -1:
+                throw ProfilingException(std::string("File descriptor reported an error during polling: ") +
+                                         strerror(errno));
+
+                // Timeout
+            case 0:
+                throw arm::pipe::TimeoutException("Timeout while waiting to receive packet.");
+
+                // Normal poll return. It could still contain an error signal
+            default:
+                // Check if the socket reported an error
+                if (pollingFd[0].revents & (POLLNVAL | POLLERR | POLLHUP))
+                {
+                    if (pollingFd[0].revents == POLLNVAL)
+                    {
+                        throw arm::pipe::ProfilingException(
+                            std::string("Error while polling receiving socket: POLLNVAL"));
+                    }
+                    if (pollingFd[0].revents == POLLERR)
+                    {
+                        throw arm::pipe::ProfilingException(
+                            std::string("Error while polling receiving socket: POLLERR: ") + strerror(errno));
+                    }
+                    if (pollingFd[0].revents == POLLHUP)
+                    {
+                        throw arm::pipe::ProfilingException(
+                            std::string("Connection closed by remote client: POLLHUP"));
+                    }
+                }
+
+                // Check if there is data to read
+                if (!(pollingFd[0].revents & (POLLIN)))
+                {
+                    // This is a corner case. The socket as been woken up but not with any data.
+                    // We'll throw a timeout exception to loop around again.
+                    throw arm::pipe::TimeoutException(
+                        "File descriptor was polled but no data was available to receive.");
+                }
+                return ReceivePacket();
+        }
+    }
+}
+
+arm::pipe::Packet BasePipeServer::ReceivePacket()
+{
+    uint32_t header[2];
+    if (!ReadHeader(header))
+    {
+        return arm::pipe::Packet();
+    }
+    // Read data_length bytes from the socket.
+    std::unique_ptr<unsigned char[]> uniquePacketData = std::make_unique<unsigned char[]>(header[1]);
+    unsigned char* packetData                         = reinterpret_cast<unsigned char*>(uniquePacketData.get());
+
+    if (!ReadFromSocket(packetData, header[1]))
+    {
+        return arm::pipe::Packet();
+    }
+
+    EchoPacket(PacketDirection::ReceivedData, packetData, header[1]);
+
+    // Construct received packet
+    arm::pipe::Packet packetRx = arm::pipe::Packet(header[0], header[1], uniquePacketData);
+    if (m_EchoPackets)
+    {
+        std::cout << "Processing packet ID= " << packetRx.GetPacketId() << " Length=" << packetRx.GetLength()
+                  << std::endl;
+    }
+
+    return packetRx;
+}
+
+bool BasePipeServer::SendPacket(uint32_t packetFamily, uint32_t packetId, const uint8_t* data, uint32_t dataLength)
+{
+    // Construct a packet from the id and data given and send it to the client.
+    // Encode the header.
+    uint32_t header[2];
+    header[0] = packetFamily << 26 | packetId << 16;
+    header[1] = dataLength;
+    // Add the header to the packet.
+    std::vector<uint8_t> packet(8 + dataLength);
+    InsertU32(header[0], packet.data(), m_Endianness);
+    InsertU32(header[1], packet.data() + 4, m_Endianness);
+    // And the rest of the data if there is any.
+    if (dataLength > 0)
+    {
+        memcpy((packet.data() + 8), data, dataLength);
+    }
+    EchoPacket(PacketDirection::Sending, packet.data(), packet.size());
+    if (-1 == arm::pipe::Write(m_ClientConnection, packet.data(), packet.size()))
+    {
+        std::cerr  << ": Failure when writing to client socket: " << strerror(errno) << std::endl;
+        return false;
+    }
+    return true;
+}
+
+bool BasePipeServer::ReadHeader(uint32_t headerAsWords[2])
+{
+    // The header will always be 2x32bit words.
+    uint8_t header[8];
+    if (!ReadFromSocket(header, 8))
+    {
+        return false;
+    }
+    EchoPacket(PacketDirection::ReceivedHeader, header, 8);
+    headerAsWords[0] = ToUint32(&header[0], m_Endianness);
+    headerAsWords[1] = ToUint32(&header[4], m_Endianness);
+    return true;
+}
+
+void BasePipeServer::EchoPacket(PacketDirection direction, uint8_t* packet, size_t lengthInBytes)
+{
+    // If enabled print the contents of the data packet to the console.
+    if (m_EchoPackets)
+    {
+        if (direction == PacketDirection::Sending)
+        {
+            std::cout << "TX " << std::dec << lengthInBytes << " bytes : ";
+        }
+        else if (direction == PacketDirection::ReceivedHeader)
+        {
+            std::cout << "RX Header " << std::dec << lengthInBytes << " bytes : ";
+        }
+        else
+        {
+            std::cout << "RX Data " << std::dec << lengthInBytes << " bytes : ";
+        }
+        for (unsigned int i = 0; i < lengthInBytes; i++)
+        {
+            if ((i % 10) == 0)
+            {
+                std::cout << std::endl;
+            }
+            std::cout << "0x" << std::setfill('0') << std::setw(2) << std::hex << static_cast<unsigned int>(packet[i])
+                      << " ";
+        }
+        std::cout << std::endl;
+    }
+}
+
+uint32_t BasePipeServer::ToUint32(uint8_t* data, TargetEndianness endianness)
+{
+    // Extract the first 4 bytes starting at data and push them into a 32bit integer based on the
+    // specified endianness.
+    if (endianness == TargetEndianness::BeWire)
+    {
+        return static_cast<uint32_t>(data[0]) << 24 | static_cast<uint32_t>(data[1]) << 16 |
+               static_cast<uint32_t>(data[2]) << 8 | static_cast<uint32_t>(data[3]);
+    }
+    else
+    {
+        return static_cast<uint32_t>(data[3]) << 24 | static_cast<uint32_t>(data[2]) << 16 |
+               static_cast<uint32_t>(data[1]) << 8 | static_cast<uint32_t>(data[0]);
+    }
+}
+
+void BasePipeServer::InsertU32(uint32_t value, uint8_t* data, TargetEndianness endianness)
+{
+    // Take the bytes of a 32bit integer and copy them into char array starting at data considering
+    // the endianness value.
+    if (endianness == TargetEndianness::BeWire)
+    {
+        *data       = static_cast<uint8_t>((value >> 24) & 0xFF);
+        *(data + 1) = static_cast<uint8_t>((value >> 16) & 0xFF);
+        *(data + 2) = static_cast<uint8_t>((value >> 8) & 0xFF);
+        *(data + 3) = static_cast<uint8_t>(value & 0xFF);
+    }
+    else
+    {
+        *(data + 3) = static_cast<uint8_t>((value >> 24) & 0xFF);
+        *(data + 2) = static_cast<uint8_t>((value >> 16) & 0xFF);
+        *(data + 1) = static_cast<uint8_t>((value >> 8) & 0xFF);
+        *data       = static_cast<uint8_t>(value & 0xFF);
+    }
+}
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/src/basePipeServer/CMakeLists.txt b/arch/arm/ARMnn/profiling/server/src/basePipeServer/CMakeLists.txt
new file mode 100644
index 0000000000..b5dc68c2c4
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/basePipeServer/CMakeLists.txt
@@ -0,0 +1,49 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+if(BUILD_BASE_PIPE_SERVER)
+    set(BasePipeServer_sources)
+    list(APPEND BasePipeServer_sources
+        BasePipeServer.cpp)
+
+     if(NOT "${TOOLCHAIN_PREFIX}" STREQUAL x86_64-w64-mingw32)
+         list(APPEND BasePipeServer_sources
+                 ConnectionHandler.cpp)
+     endif()
+
+    include_directories(${PROJECT_SOURCE_DIR}/profiling/common/include)
+    include_directories(${PROJECT_SOURCE_DIR}/profiling/server/include/basePipeServer)
+
+    if (BUILD_UNIT_TESTS)
+        target_include_directories(UnitTests PRIVATE ${PROJECT_SOURCE_DIR}/profiling/server/src/basePipeServer)
+        target_include_directories(UnitTests PUBLIC ${PROJECT_SOURCE_DIR}/profiling/common/include)
+    endif()
+
+    if (BUILD_STATIC_PIPE_LIBS)
+      add_library_ex(armnnBasePipeServer STATIC ${BasePipeServer_sources})
+      target_link_libraries(armnnBasePipeServer pipeCommon)
+
+      if ("${CMAKE_SYSTEM_NAME}" STREQUAL Windows)
+        target_link_libraries(armnnBasePipeServer ws2_32.lib)
+      endif()
+    else()
+       add_library_ex(armnnBasePipeServer SHARED ${BasePipeServer_sources})
+       target_link_libraries(armnnBasePipeServer pipeCommon)
+
+       if ("${CMAKE_SYSTEM_NAME}" STREQUAL Windows)
+         target_link_libraries(armnnBasePipeServer ws2_32.lib)
+       endif()
+
+       set_target_properties(armnnBasePipeServer PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
+       set_target_properties(armnnBasePipeServer PROPERTIES VERSION ${GENERIC_LIB_VERSION}
+                                                            SOVERSION ${GENERIC_LIB_SOVERSION})
+
+       target_include_directories(armnnBasePipeServer PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils)
+
+       install(TARGETS armnnBasePipeServer
+               LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+               RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+     endif()
+endif()
diff --git a/arch/arm/ARMnn/profiling/server/src/basePipeServer/ConnectionHandler.cpp b/arch/arm/ARMnn/profiling/server/src/basePipeServer/ConnectionHandler.cpp
new file mode 100644
index 0000000000..1c9ffa95c2
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/basePipeServer/ConnectionHandler.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <server/include/basePipeServer/ConnectionHandler.hpp>
+
+#include <string>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+ConnectionHandler::ConnectionHandler(const std::string& udsNamespace, const bool setNonBlocking)
+{
+    arm::pipe::Initialize();
+    m_ListeningSocket = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+
+    if (-1 == m_ListeningSocket)
+    {
+        throw SocketConnectionException(": Socket construction failed: ", 1, 1);
+    }
+
+    sockaddr_un udsAddress;
+    memset(&udsAddress, 0, sizeof(sockaddr_un));
+    // We've set the first element of sun_path to be 0, skip over it and copy the namespace after it.
+    memcpy(udsAddress.sun_path + 1, udsNamespace.c_str(), strlen(udsNamespace.c_str()));
+    udsAddress.sun_family = AF_UNIX;
+
+    // Bind the socket to the UDS namespace.
+    if (-1 == bind(m_ListeningSocket, reinterpret_cast<const sockaddr*>(&udsAddress), sizeof(sockaddr_un)))
+    {
+        throw SocketConnectionException(": Binding on socket failed: ", m_ListeningSocket, errno);
+    }
+    // Listen for connections.
+    if (-1 == listen(m_ListeningSocket, 1))
+    {
+        throw SocketConnectionException(": Listen call on socket failed: ", m_ListeningSocket, errno);
+    }
+
+    if (setNonBlocking)
+    {
+        arm::pipe::SetNonBlocking(m_ListeningSocket);
+    }
+}
+
+std::unique_ptr<BasePipeServer> ConnectionHandler::GetNewBasePipeServer(const bool echoPackets)
+{
+    arm::pipe::Socket clientConnection = arm::pipe::Accept(m_ListeningSocket, nullptr, nullptr, SOCK_CLOEXEC);
+    if (clientConnection < 1)
+    {
+        return nullptr;
+    }
+    return std::make_unique<BasePipeServer>(clientConnection, echoPackets);
+}
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/src/basePipeServer/tests/BasePipeServerTests.cpp b/arch/arm/ARMnn/profiling/server/src/basePipeServer/tests/BasePipeServerTests.cpp
new file mode 100644
index 0000000000..f307d98668
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/basePipeServer/tests/BasePipeServerTests.cpp
@@ -0,0 +1,97 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <server/include/basePipeServer/ConnectionHandler.hpp>
+
+#include <SocketProfilingConnection.hpp>
+#include <Processes.hpp>
+
+#include <doctest/doctest.h>
+
+TEST_SUITE("BasePipeServerTests")
+{
+using namespace armnn;
+using namespace arm::pipe;
+
+TEST_CASE("BasePipeServerTest")
+{
+    // Setup the mock service to bind to the UDS.
+    std::string udsNamespace = "gatord_namespace";
+
+    // Try to initialize a listening socket through the ConnectionHandler
+    CHECK_NOTHROW(ConnectionHandler connectionHandler(udsNamespace, true));
+
+    // The socket should close once we leave the scope of CHECK_NOTHROW
+    // and socketProfilingConnection should fail to connect
+    CHECK_THROWS_AS(profiling::SocketProfilingConnection socketProfilingConnection,
+                      arm::pipe::SocketConnectionException);
+
+    // Try to initialize a listening socket through the ConnectionHandler again
+    ConnectionHandler connectionHandler(udsNamespace, true);
+    // socketProfilingConnection should connect now
+    profiling::SocketProfilingConnection socketProfilingConnection;
+    CHECK(socketProfilingConnection.IsOpen());
+
+    auto basePipeServer = connectionHandler.GetNewBasePipeServer(false);
+    // GetNewBasePipeServer will return null if it fails to create a socket
+    CHECK(basePipeServer.get());
+
+    profiling::BufferManager bufferManager;
+    profiling::SendCounterPacket sendCounterPacket(bufferManager);
+
+    // Check that we can receive a StreamMetaDataPacket
+    sendCounterPacket.SendStreamMetaDataPacket();
+
+    auto packetBuffer = bufferManager.GetReadableBuffer();
+    const unsigned char* readBuffer = packetBuffer->GetReadableData();
+    unsigned int readBufferSize = packetBuffer->GetSize();
+
+    CHECK(readBuffer);
+    CHECK(readBufferSize > 0u);
+
+    socketProfilingConnection.WritePacket(readBuffer,readBufferSize);
+    bufferManager.MarkRead(packetBuffer);
+
+    CHECK(basePipeServer.get()->WaitForStreamMetaData());
+    CHECK(basePipeServer.get()->GetStreamMetadataPid() == armnnUtils::Processes::GetCurrentId());
+    CHECK(basePipeServer.get()->GetStreamMetadataMaxDataLen() == MAX_METADATA_PACKET_LENGTH);
+
+    // Now try a simple PeriodicCounterSelectionPacket
+    sendCounterPacket.SendPeriodicCounterSelectionPacket(50, {1,2,3,4,5});
+
+    packetBuffer = bufferManager.GetReadableBuffer();
+    readBuffer = packetBuffer->GetReadableData();
+    readBufferSize = packetBuffer->GetSize();
+
+    CHECK(readBuffer);
+    CHECK(readBufferSize > 0u);
+
+    socketProfilingConnection.WritePacket(readBuffer,readBufferSize);
+    bufferManager.MarkRead(packetBuffer);
+
+    auto packet1 = basePipeServer.get()->WaitForPacket(500);
+
+    CHECK(!packet1.IsEmpty());
+    CHECK(packet1.GetPacketFamily() == 0);
+    CHECK(packet1.GetPacketId() == 4);
+    CHECK(packet1.GetLength() == 14);
+
+    // Try and send the packet back to the client
+    basePipeServer.get()->SendPacket(packet1.GetPacketFamily(),
+                                     packet1.GetPacketId(),
+                                     packet1.GetData(),
+                                     packet1.GetLength());
+
+    auto packet2 = socketProfilingConnection.ReadPacket(500);
+
+    CHECK(!packet2.IsEmpty());
+    CHECK(packet2.GetPacketFamily() == 0);
+    CHECK(packet2.GetPacketId() == 4);
+    CHECK(packet2.GetLength() == 14);
+
+    socketProfilingConnection.Close();
+}
+
+}
\ No newline at end of file
diff --git a/arch/arm/ARMnn/profiling/server/src/timelineDecoder/CMakeLists.txt b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/CMakeLists.txt
new file mode 100644
index 0000000000..7154722fa6
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/CMakeLists.txt
@@ -0,0 +1,44 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+if(BUILD_TIMELINE_DECODER)
+    set(timelineDecoder_sources)
+    list(APPEND timelineDecoder_sources
+        TimelineCaptureCommandHandler.cpp
+        TimelineDecoder.cpp
+        TimelineDirectoryCaptureCommandHandler.cpp)
+
+    include_directories(${PROJECT_SOURCE_DIR}/profiling/common/include)
+
+    if(BUILD_UNIT_TESTS)
+        include_directories(${PROJECT_SOURCE_DIR}/src/profiling
+                            ${PROJECT_SOURCE_DIR}/src/armnnUtils)
+        target_include_directories(UnitTests PRIVATE ${PROJECT_SOURCE_DIR}/profiling/server/include/timelineDecoder)
+    endif()
+
+
+    if (BUILD_STATIC_PIPE_LIBS)
+      add_library_ex(timelineDecoder STATIC ${timelineDecoder_sources})
+      target_link_libraries(timelineDecoder pipeCommon)
+
+      if ("${CMAKE_SYSTEM_NAME}" STREQUAL Windows)
+        target_link_libraries(timelineDecoder ws2_32.lib)
+      endif()
+    else()
+      add_library_ex(timelineDecoder SHARED ${timelineDecoder_sources})
+      target_link_libraries(timelineDecoder pipeCommon)
+
+      if ("${CMAKE_SYSTEM_NAME}" STREQUAL Windows)
+        target_link_libraries(timelineDecoder ws2_32.lib)
+      endif()
+
+      set_target_properties(timelineDecoder PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
+      set_target_properties(timelineDecoder PROPERTIES VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION} )
+
+      install(TARGETS timelineDecoder
+              LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+              RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+    endif()
+endif()
diff --git a/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineCaptureCommandHandler.cpp b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineCaptureCommandHandler.cpp
new file mode 100644
index 0000000000..247c9519e0
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineCaptureCommandHandler.cpp
@@ -0,0 +1,166 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <common/include/CommonProfilingUtils.hpp>
+#include <common/include/Logging.hpp>
+#include <server/include/timelineDecoder/TimelineCaptureCommandHandler.hpp>
+
+#include <string>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+//Array of member functions, the array index matches the decl_id
+const TimelineCaptureCommandHandler::ReadFunction TimelineCaptureCommandHandler::m_ReadFunctions[]
+{
+    &TimelineCaptureCommandHandler::ReadLabel,              // Label decl_id = 0
+    &TimelineCaptureCommandHandler::ReadEntity,             // Entity decl_id = 1
+    &TimelineCaptureCommandHandler::ReadEventClass,         // EventClass decl_id = 2
+    &TimelineCaptureCommandHandler::ReadRelationship,       // Relationship decl_id = 3
+    &TimelineCaptureCommandHandler::ReadEvent               // Event decl_id = 4
+};
+
+void TimelineCaptureCommandHandler::SetThreadIdSize(uint32_t size)
+{
+    m_ThreadIdSize = size;
+}
+
+void TimelineCaptureCommandHandler::operator()(const arm::pipe::Packet& packet)
+{
+    ParseData(packet);
+}
+
+void TimelineCaptureCommandHandler::ParseData(const arm::pipe::Packet& packet)
+{
+    uint32_t offset = 0;
+    m_PacketLength = packet.GetLength();
+
+    // We are expecting TimelineDirectoryCaptureCommandHandler to set the thread id size
+    // if it not set in the constructor
+    if (m_ThreadIdSize == 0)
+    {
+        ARM_PIPE_LOG(error) << "TimelineCaptureCommandHandler: m_ThreadIdSize has not been set";
+        return;
+    }
+
+    if (packet.GetLength() < 8)
+    {
+        return;
+    }
+
+    const unsigned char* data = reinterpret_cast<const unsigned char*>(packet.GetData());
+
+    uint32_t declId = 0;
+
+    while ( offset < m_PacketLength )
+    {
+        declId = arm::pipe::ReadUint32(data, offset);
+        offset += uint32_t_size;
+
+        ITimelineDecoder::TimelineStatus status = (this->*m_ReadFunctions[declId])(data, offset);
+        if (status == ITimelineDecoder::TimelineStatus::TimelineStatus_Fail)
+        {
+            ARM_PIPE_LOG(error) << "Decode of timeline message type [" << declId <<
+                                "] at offset [" << offset << "] failed";
+            break;
+        }
+    }
+}
+
+ITimelineDecoder::TimelineStatus TimelineCaptureCommandHandler::ReadLabel(const unsigned char* data, uint32_t& offset)
+{
+    ITimelineDecoder::Label label;
+    label.m_Guid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+
+    uint32_t nameLength = arm::pipe::ReadUint32(data, offset);
+    offset += uint32_t_size;
+
+    uint32_t i = 0;
+    // nameLength - 1 to account for null operator \0
+    for ( i = 0; i < nameLength - 1; ++i )
+    {
+        label.m_Name += static_cast<char>(arm::pipe::ReadUint8(data, offset + i));
+    }
+    // Shift offset past nameLength
+    uint32_t uint32WordAmount = (nameLength / uint32_t_size) + (nameLength % uint32_t_size != 0 ? 1 : 0);
+    offset += uint32WordAmount * uint32_t_size;
+
+    return m_TimelineDecoder.CreateLabel(label);
+}
+
+ITimelineDecoder::TimelineStatus TimelineCaptureCommandHandler::ReadEntity(
+        const unsigned char* data, uint32_t& offset)
+{
+    ITimelineDecoder::Entity entity;
+    entity.m_Guid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+    return m_TimelineDecoder.CreateEntity(entity);
+}
+
+ITimelineDecoder::TimelineStatus TimelineCaptureCommandHandler::ReadEventClass(
+    const unsigned char* data, uint32_t& offset)
+{
+    ITimelineDecoder::EventClass eventClass;
+    eventClass.m_Guid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+    eventClass.m_NameGuid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+    return m_TimelineDecoder.CreateEventClass(eventClass);
+}
+
+ITimelineDecoder::TimelineStatus TimelineCaptureCommandHandler::ReadRelationship(
+    const unsigned char* data, uint32_t& offset)
+{
+    ITimelineDecoder::Relationship relationship;
+    relationship.m_RelationshipType =
+        static_cast<ITimelineDecoder::RelationshipType>(arm::pipe::ReadUint32(data, offset));
+    offset += uint32_t_size;
+
+    relationship.m_Guid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+
+    relationship.m_HeadGuid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+
+    relationship.m_TailGuid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+
+    relationship.m_AttributeGuid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+
+    return m_TimelineDecoder.CreateRelationship(relationship);
+}
+
+ITimelineDecoder::TimelineStatus TimelineCaptureCommandHandler::ReadEvent(
+    const unsigned char* data, uint32_t& offset)
+{
+    ITimelineDecoder::Event event;
+    event.m_TimeStamp = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+
+    if ( m_ThreadIdSize == 4 )
+    {
+        event.m_ThreadId = arm::pipe::ReadUint32(data, offset);
+    }
+    else if ( m_ThreadIdSize == 8 )
+    {
+        event.m_ThreadId = arm::pipe::ReadUint64(data, offset);
+    }
+
+    offset += m_ThreadIdSize;
+
+    event.m_Guid = arm::pipe::ReadUint64(data, offset);
+    offset += uint64_t_size;
+
+    return m_TimelineDecoder.CreateEvent(event);
+}
+
+} //namespace pipe
+
+} //namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineDecoder.cpp b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineDecoder.cpp
new file mode 100644
index 0000000000..6eaaff6e54
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineDecoder.cpp
@@ -0,0 +1,330 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <common/include/CommonProfilingUtils.hpp>
+#include <server/include/timelineDecoder/TimelineDecoder.hpp>
+
+#include <iostream>
+#include <sstream>
+
+namespace arm
+{
+namespace pipe
+{
+
+TimelineDecoder::TimelineStatus TimelineDecoder::CreateEntity(const Entity &entity)
+{
+    if (m_OnNewEntityCallback == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    ApplyToModel([&](Model& m){
+        m_OnNewEntityCallback(m, entity);
+    });
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::CreateEventClass(const EventClass &eventClass)
+{
+    if (m_OnNewEventClassCallback == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    ApplyToModel([&](Model& m){
+        m_OnNewEventClassCallback(m, eventClass);
+    });
+
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::CreateEvent(const Event &event)
+{
+    if (m_OnNewEventCallback == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    ApplyToModel([&](Model& m){
+        m_OnNewEventCallback(m, event);
+    });
+
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::CreateLabel(const Label &label)
+{
+    if (m_OnNewLabelCallback == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    ApplyToModel([&](Model& m){
+        m_OnNewLabelCallback(m, label);
+    });
+
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::CreateRelationship(const Relationship &relationship)
+{
+    if (m_OnNewRelationshipCallback == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    ApplyToModel([&](Model& m){
+        m_OnNewRelationshipCallback(m, relationship);
+    });
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::SetEntityCallback(OnNewEntityCallback cb)
+{
+    if (cb == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    m_OnNewEntityCallback = cb;
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::SetEventClassCallback(OnNewEventClassCallback cb)
+{
+    if (cb == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    m_OnNewEventClassCallback = cb;
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::SetEventCallback(OnNewEventCallback cb)
+{
+    if (cb == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    m_OnNewEventCallback = cb;
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::SetLabelCallback(OnNewLabelCallback cb)
+{
+    if (cb == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    m_OnNewLabelCallback = cb;
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+TimelineDecoder::TimelineStatus TimelineDecoder::SetRelationshipCallback(OnNewRelationshipCallback cb)
+{
+    if (cb == nullptr)
+    {
+        return TimelineStatus::TimelineStatus_Fail;
+    }
+    m_OnNewRelationshipCallback = cb;
+    return TimelineStatus::TimelineStatus_Success;
+}
+
+void TimelineDecoder::SetDefaultCallbacks()
+{
+    SetEntityCallback([](Model& model, const ITimelineDecoder::Entity entity)
+    {
+        model.m_Entities.emplace_back(entity);
+    });
+
+    SetEventClassCallback([](Model& model, const ITimelineDecoder::EventClass eventClass)
+    {
+        model.m_EventClasses.emplace_back(eventClass);
+    });
+
+    SetEventCallback([](Model& model, const ITimelineDecoder::Event event)
+    {
+        model.m_Events.emplace_back(event);
+    });
+
+    SetLabelCallback([](Model& model, const ITimelineDecoder::Label label)
+    {
+        model.m_Labels.emplace_back(label);
+    });
+
+    SetRelationshipCallback([](Model& model, const ITimelineDecoder::Relationship relationship)
+    {
+        model.m_Relationships.emplace_back(relationship);
+    });
+}
+
+void TimelineDecoder::print()
+{
+    if (m_Model.m_Labels.empty() && m_Model.m_Entities.empty() && m_Model.m_EventClasses.empty() &&
+        m_Model.m_Events.empty() && m_Model.m_Relationships.empty())
+    {
+        std::cout << "No timeline packets received" << std::endl;
+        return;
+    }
+
+    printLabels();
+    printEntities();
+    printEventClasses();
+    printEvents();
+    printRelationships();
+}
+
+void TimelineDecoder::printLabels()
+{
+    std::string header;
+
+    header.append(arm::pipe::CentreAlignFormatting("guid", 12));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("value", 30));
+    header.append("\n");
+
+    std::cout << "\n" << "\n";
+    std::cout << arm::pipe::CentreAlignFormatting("LABELS", static_cast<int>(header.size()));
+    std::cout << "\n";
+    std::cout << std::string(header.size(), '=') << "\n";
+    std::cout << header;
+
+    for (uint32_t i = 0; i < m_Model.m_Labels.size(); ++i)
+    {
+        std::string body;
+
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(m_Model.m_Labels[i].m_Guid), 12));
+        body.append(" | ");
+        body.append(arm::pipe::CentreAlignFormatting(m_Model.m_Labels[i].m_Name, 30));
+        body.append("\n");
+
+        std::cout << std::string(body.size(), '-') << "\n";
+        std::cout << body;
+    }
+}
+
+void TimelineDecoder::printEntities()
+{
+    std::string header;
+    header.append(arm::pipe::CentreAlignFormatting("guid", 12));
+    header.append("\n");
+
+    std::cout << "\n" << "\n";
+    std::cout << arm::pipe::CentreAlignFormatting("ENTITIES", static_cast<int>(header.size()));
+    std::cout << "\n";
+    std::cout << std::string(header.size(), '=') << "\n";
+    std::cout << header;
+
+    for (uint32_t i = 0; i < m_Model.m_Entities.size(); ++i)
+    {
+        std::string body;
+
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(m_Model.m_Entities[i].m_Guid), 12));
+        body.append("\n");
+
+        std::cout << std::string(body.size(), '-') << "\n";
+        std::cout << body;
+    }
+}
+
+void TimelineDecoder::printEventClasses()
+{
+    std::string header;
+    header.append(arm::pipe::CentreAlignFormatting("guid", 12));
+    header.append("\n");
+
+    std::cout << "\n" << "\n";
+    std::cout << arm::pipe::CentreAlignFormatting("EVENT CLASSES", static_cast<int>(header.size()));
+    std::cout << "\n";
+    std::cout << std::string(header.size(), '=') << "\n";
+    std::cout << header;
+
+    for (uint32_t i = 0; i < m_Model.m_EventClasses.size(); ++i)
+    {
+        std::string body;
+
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(m_Model.m_EventClasses[i].m_Guid), 12));
+        body.append("\n");
+
+        std::cout << std::string(body.size(), '-') << "\n";
+        std::cout << body;
+    }
+}
+
+void TimelineDecoder::printEvents()
+{
+    std::string header;
+
+    header.append(arm::pipe::CentreAlignFormatting("timestamp", 12));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("threadId", 12));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("eventGuid", 12));
+    header.append("\n");
+
+    std::cout << "\n" << "\n";
+    std::cout << arm::pipe::CentreAlignFormatting("EVENTS", static_cast<int>(header.size()));
+    std::cout << "\n";
+    std::cout << std::string(header.size(), '=') << "\n";
+    std::cout << header;
+
+    for (uint32_t i = 0; i < m_Model.m_Events.size(); ++i)
+    {
+        std::string body;
+
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(m_Model.m_Events[i].m_TimeStamp), 12));
+        body.append(" | ");
+
+        std::stringstream ss;
+        ss << m_Model.m_Events[i].m_ThreadId;
+        std::string threadId = ss.str();;
+
+        body.append(arm::pipe::CentreAlignFormatting(threadId, 12));
+        body.append(" | ");
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(m_Model.m_Events[i].m_Guid), 12));
+        body.append("\n");
+
+        std::cout << std::string(body.size(), '-') << "\n";
+        std::cout << body;
+    }
+}
+
+void TimelineDecoder::printRelationships()
+{
+    std::string header;
+    header.append(arm::pipe::CentreAlignFormatting("relationshipType", 20));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("relationshipGuid", 20));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("headGuid", 12));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("tailGuid", 12));
+    header.append("\n");
+
+    std::cout << "\n" << "\n";
+    std::cout << arm::pipe::CentreAlignFormatting("RELATIONSHIPS", static_cast<int>(header.size()));
+    std::cout << "\n";
+    std::cout << std::string(header.size(), '=') << "\n";
+    std::cout << header;
+
+    for (uint32_t i = 0; i < m_Model.m_Relationships.size(); ++i)
+    {
+        std::string body;
+
+        body.append(
+                arm::pipe::CentreAlignFormatting(std::to_string(static_cast<unsigned int>
+                                                                (m_Model.m_Relationships[i].m_RelationshipType)),
+                                                 20));
+        body.append(" | ");
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(m_Model.m_Relationships[i].m_Guid), 20));
+        body.append(" | ");
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(m_Model.m_Relationships[i].m_HeadGuid), 12));
+        body.append(" | ");
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(m_Model.m_Relationships[i].m_TailGuid), 12));
+        body.append(" | ");
+        body.append("\n");
+
+        std::cout << std::string(body.size(), '-') << "\n";
+        std::cout << body;
+    }
+}
+
+} // namespace pipe
+} // namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineDirectoryCaptureCommandHandler.cpp b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineDirectoryCaptureCommandHandler.cpp
new file mode 100644
index 0000000000..6963dd00c7
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/TimelineDirectoryCaptureCommandHandler.cpp
@@ -0,0 +1,117 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <common/include/CommonProfilingUtils.hpp>
+#include <common/include/SwTrace.hpp>
+#include <server/include/timelineDecoder/TimelineCaptureCommandHandler.hpp>
+#include <server/include/timelineDecoder/TimelineDirectoryCaptureCommandHandler.hpp>
+
+#include <iostream>
+#include <string>
+
+namespace arm
+{
+
+namespace pipe
+{
+
+void TimelineDirectoryCaptureCommandHandler::ParseData(const arm::pipe::Packet& packet)
+{
+    uint32_t offset = 0;
+
+    if (packet.GetLength() < 8)
+    {
+        return;
+    }
+
+    const unsigned char* data = packet.GetData();
+
+    m_SwTraceHeader.m_StreamVersion = ReadUint8(data, offset);
+    offset += uint8_t_size;
+    m_SwTraceHeader.m_PointerBytes = ReadUint8(data, offset);
+    offset += uint8_t_size;
+    m_SwTraceHeader.m_ThreadIdBytes = ReadUint8(data, offset);
+    offset += uint8_t_size;
+
+    uint32_t numberOfDeclarations = arm::pipe::ReadUint32(data, offset);
+    offset += uint32_t_size;
+
+    for (uint32_t declaration = 0; declaration < numberOfDeclarations; ++declaration)
+    {
+        m_SwTraceMessages.push_back(arm::pipe::ReadSwTraceMessage(data, offset, packet.GetLength()));
+    }
+
+    m_TimelineCaptureCommandHandler.SetThreadIdSize(m_SwTraceHeader.m_ThreadIdBytes);
+}
+
+void TimelineDirectoryCaptureCommandHandler::Print()
+{
+    std::string header;
+
+    header.append(arm::pipe::CentreAlignFormatting("decl_id", 12));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("decl_name", 20));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("ui_name", 20));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("arg_types", 16));
+    header.append(" | ");
+    header.append(arm::pipe::CentreAlignFormatting("arg_names", 80));
+    header.append("\n");
+
+    std::cout << "\n" << "\n";
+    std::cout << arm::pipe::CentreAlignFormatting("SW DIRECTORY", static_cast<int>(header.size()));
+    std::cout << "\n";
+    std::cout << std::string(header.size(), '=') << "\n";
+
+    std::cout << header;
+
+    for (const auto& swTraceMessage : m_SwTraceMessages)
+    {
+        std::string body;
+
+        body.append(arm::pipe::CentreAlignFormatting(std::to_string(swTraceMessage.m_Id), 12));
+        body.append(" | ");
+        body.append(arm::pipe::CentreAlignFormatting(swTraceMessage.m_Name, 20));
+        body.append(" | ");
+        body.append(arm::pipe::CentreAlignFormatting(swTraceMessage.m_UiName, 20));
+        body.append(" | ");
+
+        std::string argTypes;
+        for (auto argType: swTraceMessage.m_ArgTypes)
+        {
+            argTypes += argType;
+            argTypes += " ";
+        }
+        body.append(arm::pipe::CentreAlignFormatting(argTypes, 16));
+        body.append(" | ");
+
+        std::string argNames;
+        for (auto argName: swTraceMessage.m_ArgNames)
+        {
+            argNames += argName + " ";
+        }
+        body.append(arm::pipe::CentreAlignFormatting(argNames, 80));
+
+        body.append("\n");
+
+        std::cout << std::string(body.size(), '-') << "\n";
+
+        std::cout << body;
+    }
+}
+
+void TimelineDirectoryCaptureCommandHandler::operator()(const arm::pipe::Packet& packet)
+{
+    ParseData(packet);
+
+    if (!m_QuietOperation)
+    {
+        Print();
+    }
+}
+
+} //namespace pipe
+} //namespace arm
diff --git a/arch/arm/ARMnn/profiling/server/src/timelineDecoder/tests/TimelineTests.cpp b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/tests/TimelineTests.cpp
new file mode 100644
index 0000000000..82c16fee79
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/server/src/timelineDecoder/tests/TimelineTests.cpp
@@ -0,0 +1,373 @@
+//
+// Copyright © 2019 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <common/include/CommandHandlerFunctor.hpp>
+#include <common/include/CommonProfilingUtils.hpp>
+#include <server/include/timelineDecoder/TimelineCaptureCommandHandler.hpp>
+#include <server/include/timelineDecoder/TimelineDirectoryCaptureCommandHandler.hpp>
+#include <server/include/timelineDecoder/TimelineDecoder.hpp>
+
+#include <BufferManager.hpp>
+#include <armnnUtils/Threads.hpp>
+#include <ProfilingService.hpp>
+#include <PacketBuffer.hpp>
+#include <TimelinePacketWriterFactory.hpp>
+
+#include <doctest/doctest.h>
+
+TEST_SUITE("TimelineDecoderTests")
+{
+void SendTimelinePacketToCommandHandler(const unsigned char* packetBuffer,
+                                        arm::pipe::CommandHandlerFunctor& CommandHandler)
+{
+    uint32_t uint32_t_size = sizeof(uint32_t);
+    unsigned int offset = 0;
+
+    uint32_t header[2];
+    header[0] = arm::pipe::ReadUint32(packetBuffer, offset);
+    offset += uint32_t_size;
+    header[1] = arm::pipe::ReadUint32(packetBuffer, offset);
+    offset += uint32_t_size;
+    uint32_t PacketDataLength  = header[1] & 0x00FFFFFF;
+
+    auto uniquePacketData = std::make_unique<unsigned char[]>(PacketDataLength);
+    std::memcpy(uniquePacketData.get(), packetBuffer + offset, PacketDataLength);
+
+    arm::pipe::Packet packet(header[0], PacketDataLength, uniquePacketData);
+
+    CHECK(std::memcmp(packetBuffer + offset, packet.GetData(), packet.GetLength()) == 0);
+
+    CommandHandler(packet);
+}
+
+void PushEntity(arm::pipe::TimelineDecoder::Model& model, const arm::pipe::ITimelineDecoder::Entity entity)
+{
+    model.m_Entities.emplace_back(entity);
+}
+
+void PushEventClass(arm::pipe::TimelineDecoder::Model& model, const arm::pipe::ITimelineDecoder::EventClass eventClass)
+{
+    model.m_EventClasses.emplace_back(eventClass);
+}
+
+void PushEvent(arm::pipe::TimelineDecoder::Model& model, const arm::pipe::ITimelineDecoder::Event event)
+{
+    model.m_Events.emplace_back(event);
+}
+
+void PushLabel(arm::pipe::TimelineDecoder::Model& model, const arm::pipe::ITimelineDecoder::Label label)
+{
+    model.m_Labels.emplace_back(label);
+}
+
+void PushRelationship(arm::pipe::TimelineDecoder::Model& model,
+                      const arm::pipe::ITimelineDecoder::Relationship relationship)
+{
+    model.m_Relationships.emplace_back(relationship);
+}
+
+TEST_CASE("TimelineDirectoryTest")
+{
+    uint32_t uint8_t_size  = sizeof(uint8_t);
+    uint32_t uint32_t_size = sizeof(uint32_t);
+    uint32_t uint64_t_size = sizeof(uint64_t);
+
+    armnn::profiling::BufferManager bufferManager(5);
+    armnn::profiling::TimelinePacketWriterFactory timelinePacketWriterFactory(bufferManager);
+
+    std::unique_ptr<armnn::profiling::ISendTimelinePacket> sendTimelinePacket =
+            timelinePacketWriterFactory.GetSendTimelinePacket();
+
+    arm::pipe::PacketVersionResolver packetVersionResolver;
+
+    arm::pipe::TimelineDecoder timelineDecoder;
+    arm::pipe::TimelineCaptureCommandHandler timelineCaptureCommandHandler(
+            1, 1, packetVersionResolver.ResolvePacketVersion(1, 1).GetEncodedValue(), timelineDecoder);
+
+    arm::pipe::TimelineDirectoryCaptureCommandHandler timelineDirectoryCaptureCommandHandler(
+            1, 0, packetVersionResolver.ResolvePacketVersion(1, 0).GetEncodedValue(),
+            timelineCaptureCommandHandler, true);
+
+    sendTimelinePacket->SendTimelineMessageDirectoryPackage();
+    sendTimelinePacket->Commit();
+
+    std::vector<arm::pipe::SwTraceMessage> swTraceBufferMessages;
+
+    unsigned int offset = uint32_t_size * 2;
+
+    std::unique_ptr<armnn::profiling::IPacketBuffer> packetBuffer = bufferManager.GetReadableBuffer();
+
+    uint8_t readStreamVersion = ReadUint8(packetBuffer, offset);
+    CHECK(readStreamVersion == 4);
+    offset += uint8_t_size;
+    uint8_t readPointerBytes = ReadUint8(packetBuffer, offset);
+    CHECK(readPointerBytes == uint64_t_size);
+    offset += uint8_t_size;
+    uint8_t readThreadIdBytes = ReadUint8(packetBuffer, offset);
+    CHECK(readThreadIdBytes == armnn::profiling::ThreadIdSize);
+    offset += uint8_t_size;
+
+    uint32_t declarationSize = arm::pipe::ReadUint32(packetBuffer->GetReadableData(), offset);
+    offset += uint32_t_size;
+    for(uint32_t i = 0; i < declarationSize; ++i)
+    {
+        swTraceBufferMessages.push_back(arm::pipe::ReadSwTraceMessage(packetBuffer->GetReadableData(),
+                                                                      offset,
+                                                                      packetBuffer->GetSize()));
+    }
+
+    SendTimelinePacketToCommandHandler(packetBuffer->GetReadableData(), timelineDirectoryCaptureCommandHandler);
+
+    for(uint32_t index = 0; index < declarationSize; ++index)
+    {
+        arm::pipe::SwTraceMessage& bufferMessage = swTraceBufferMessages[index];
+        arm::pipe::SwTraceMessage& handlerMessage = timelineDirectoryCaptureCommandHandler.m_SwTraceMessages[index];
+
+        CHECK(bufferMessage.m_Name == handlerMessage.m_Name);
+        CHECK(bufferMessage.m_UiName == handlerMessage.m_UiName);
+        CHECK(bufferMessage.m_Id == handlerMessage.m_Id);
+
+        CHECK(bufferMessage.m_ArgTypes.size() == handlerMessage.m_ArgTypes.size());
+        for(uint32_t i = 0; i < bufferMessage.m_ArgTypes.size(); ++i)
+        {
+            CHECK(bufferMessage.m_ArgTypes[i] == handlerMessage.m_ArgTypes[i]);
+        }
+
+        CHECK(bufferMessage.m_ArgNames.size() == handlerMessage.m_ArgNames.size());
+        for(uint32_t i = 0; i < bufferMessage.m_ArgNames.size(); ++i)
+        {
+            CHECK(bufferMessage.m_ArgNames[i] == handlerMessage.m_ArgNames[i]);
+        }
+    }
+}
+
+TEST_CASE("TimelineCaptureTest")
+{
+    armnn::profiling::BufferManager bufferManager(50);
+    armnn::profiling::TimelinePacketWriterFactory timelinePacketWriterFactory(bufferManager);
+
+    std::unique_ptr<armnn::profiling::ISendTimelinePacket> sendTimelinePacket =
+        timelinePacketWriterFactory.GetSendTimelinePacket();
+
+    arm::pipe::PacketVersionResolver packetVersionResolver;
+
+    arm::pipe::TimelineDecoder timelineDecoder;
+
+    arm::pipe::TimelineCaptureCommandHandler timelineCaptureCommandHandler(
+        1, 1, packetVersionResolver.ResolvePacketVersion(1, 1).GetEncodedValue(), timelineDecoder,
+        armnn::profiling::ThreadIdSize);
+
+    using Status = arm::pipe::ITimelineDecoder::TimelineStatus;
+    CHECK(timelineDecoder.SetEntityCallback(PushEntity)             == Status::TimelineStatus_Success);
+    CHECK(timelineDecoder.SetEventClassCallback(PushEventClass)     == Status::TimelineStatus_Success);
+    CHECK(timelineDecoder.SetEventCallback(PushEvent)               == Status::TimelineStatus_Success);
+    CHECK(timelineDecoder.SetLabelCallback(PushLabel)               == Status::TimelineStatus_Success);
+    CHECK(timelineDecoder.SetRelationshipCallback(PushRelationship) == Status::TimelineStatus_Success);
+
+    const uint64_t entityGuid = 111111u;
+    const uint64_t eventClassGuid = 22222u;
+    const uint64_t eventClassNameGuid = 22322u;
+    const uint64_t timestamp = 33333u;
+    const uint64_t eventGuid = 44444u;
+
+    const int threadId = armnnUtils::Threads::GetCurrentThreadId();
+
+    // need to do a bit of work here to extract the value from threadId
+    unsigned char* uCharThreadId = new unsigned char[armnn::profiling::ThreadIdSize]();;
+    uint64_t uint64ThreadId;
+
+    arm::pipe::WriteBytes(uCharThreadId, 0, &threadId, armnn::profiling::ThreadIdSize);
+
+    if (armnn::profiling::ThreadIdSize == 4)
+    {
+        uint64ThreadId =  arm::pipe::ReadUint32(uCharThreadId, 0);
+    }
+    else if (armnn::profiling::ThreadIdSize == 8)
+    {
+        uint64ThreadId =  arm::pipe::ReadUint64(uCharThreadId, 0);
+    }
+    delete[] uCharThreadId;
+
+    const uint64_t labelGuid = 66666u;
+    std::string labelName = "test_label";
+
+    const uint64_t relationshipGuid = 77777u;
+    const uint64_t headGuid = 888888u;
+    const uint64_t tailGuid = 999999u;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        // Send entity
+        sendTimelinePacket->SendTimelineEntityBinaryPacket(entityGuid);
+        sendTimelinePacket->Commit();
+        SendTimelinePacketToCommandHandler(bufferManager.GetReadableBuffer()->GetReadableData(),
+                                           timelineCaptureCommandHandler);
+
+        // Send event class
+        sendTimelinePacket->SendTimelineEventClassBinaryPacket(eventClassGuid, eventClassNameGuid);
+        sendTimelinePacket->Commit();
+        SendTimelinePacketToCommandHandler(bufferManager.GetReadableBuffer()->GetReadableData(),
+                                           timelineCaptureCommandHandler);
+
+        // Send event
+        sendTimelinePacket->SendTimelineEventBinaryPacket(timestamp, threadId, eventGuid);
+        sendTimelinePacket->Commit();
+        SendTimelinePacketToCommandHandler(bufferManager.GetReadableBuffer()->GetReadableData(),
+                                           timelineCaptureCommandHandler);
+
+        // Send label
+        sendTimelinePacket->SendTimelineLabelBinaryPacket(labelGuid, labelName);
+        sendTimelinePacket->Commit();
+        SendTimelinePacketToCommandHandler(bufferManager.GetReadableBuffer()->GetReadableData(),
+                                           timelineCaptureCommandHandler);
+
+        // Send relationship
+        armnn::profiling::ProfilingRelationshipType relationshipType =
+            armnn::profiling::ProfilingRelationshipType::DataLink;
+        sendTimelinePacket->SendTimelineRelationshipBinaryPacket(relationshipType,
+                                                                 relationshipGuid,
+                                                                 headGuid,
+                                                                 tailGuid,
+                                                                 0);
+        sendTimelinePacket->Commit();
+        SendTimelinePacketToCommandHandler(bufferManager.GetReadableBuffer()->GetReadableData(),
+                                           timelineCaptureCommandHandler);
+    }
+
+    timelineDecoder.ApplyToModel([&](const arm::pipe::TimelineDecoder::Model& model){
+        for (unsigned long i = 0; i < 10; ++i)
+        {
+            CHECK(model.m_Entities[i].m_Guid == entityGuid);
+
+            CHECK(model.m_EventClasses[i].m_Guid == eventClassGuid);
+
+            CHECK(model.m_Events[i].m_TimeStamp == timestamp);
+            CHECK(model.m_Events[i].m_ThreadId == uint64ThreadId);
+            CHECK(model.m_Events[i].m_Guid == eventGuid);
+
+            CHECK(model.m_Labels[i].m_Guid == labelGuid);
+            CHECK(model.m_Labels[i].m_Name == labelName);
+
+            CHECK(model.m_Relationships[i].m_RelationshipType ==
+                arm::pipe::ITimelineDecoder::RelationshipType::DataLink);
+            CHECK(model.m_Relationships[i].m_Guid == relationshipGuid);
+            CHECK(model.m_Relationships[i].m_HeadGuid == headGuid);
+            CHECK(model.m_Relationships[i].m_TailGuid == tailGuid);
+        }
+    });
+}
+
+TEST_CASE("TimelineCaptureTestMultipleStringsInBuffer")
+{
+    armnn::profiling::BufferManager               bufferManager(50);
+    armnn::profiling::TimelinePacketWriterFactory timelinePacketWriterFactory(bufferManager);
+
+    std::unique_ptr<armnn::profiling::ISendTimelinePacket> sendTimelinePacket =
+                                                        timelinePacketWriterFactory.GetSendTimelinePacket();
+
+    arm::pipe::PacketVersionResolver packetVersionResolver;
+
+    arm::pipe::TimelineDecoder timelineDecoder;
+
+    arm::pipe::TimelineCaptureCommandHandler timelineCaptureCommandHandler(
+        1, 1, packetVersionResolver.ResolvePacketVersion(1, 1).GetEncodedValue(), timelineDecoder,
+        armnn::profiling::ThreadIdSize);
+
+    using Status = arm::pipe::TimelineDecoder::TimelineStatus;
+    CHECK(timelineDecoder.SetEntityCallback(PushEntity) == Status::TimelineStatus_Success);
+    CHECK(timelineDecoder.SetEventClassCallback(PushEventClass) == Status::TimelineStatus_Success);
+    CHECK(timelineDecoder.SetEventCallback(PushEvent) == Status::TimelineStatus_Success);
+    CHECK(timelineDecoder.SetLabelCallback(PushLabel) == Status::TimelineStatus_Success);
+    CHECK(timelineDecoder.SetRelationshipCallback(PushRelationship) == Status::TimelineStatus_Success);
+
+    const uint64_t entityGuid         = 111111u;
+    const uint64_t eventClassGuid     = 22222u;
+    const uint64_t eventClassNameGuid = 22322u;
+    const uint64_t timestamp          = 33333u;
+    const uint64_t eventGuid          = 44444u;
+
+    const int threadId = armnnUtils::Threads::GetCurrentThreadId();
+
+    // need to do a bit of work here to extract the value from threadId
+    unsigned char* uCharThreadId = new unsigned char[armnn::profiling::ThreadIdSize]();
+    uint64_t uint64ThreadId;
+
+    arm::pipe::WriteBytes(uCharThreadId, 0, &threadId, armnn::profiling::ThreadIdSize);
+
+    if ( armnn::profiling::ThreadIdSize == 4 )
+    {
+        uint64ThreadId = arm::pipe::ReadUint32(uCharThreadId, 0);
+    } 
+    else if ( armnn::profiling::ThreadIdSize == 8 )
+    {
+        uint64ThreadId = arm::pipe::ReadUint64(uCharThreadId, 0);
+    }
+    delete[] uCharThreadId;
+
+    const uint64_t labelGuid  = 66666u;
+    std::string    labelName  = "test_label";
+    std::string    labelName2 = "test_label2";
+    std::string    labelName3 = "test_label32";
+
+    const uint64_t relationshipGuid = 77777u;
+    const uint64_t headGuid         = 888888u;
+    const uint64_t tailGuid         = 999999u;
+
+    // Check with multiple messages in the same buffer
+    for ( int i = 0; i < 9; ++i )
+    {
+        // Send entity
+        sendTimelinePacket->SendTimelineEntityBinaryPacket(entityGuid);
+        // Send event class
+        sendTimelinePacket->SendTimelineEventClassBinaryPacket(eventClassGuid, eventClassNameGuid);
+        // Send event
+        sendTimelinePacket->SendTimelineEventBinaryPacket(timestamp, threadId, eventGuid);
+        // Send label
+        sendTimelinePacket->SendTimelineLabelBinaryPacket(labelGuid, labelName);
+        sendTimelinePacket->SendTimelineLabelBinaryPacket(labelGuid, labelName2);
+        sendTimelinePacket->SendTimelineLabelBinaryPacket(labelGuid, labelName3);
+        // Send relationship
+        armnn::profiling::ProfilingRelationshipType relationshipType =
+            armnn::profiling::ProfilingRelationshipType::DataLink;
+        sendTimelinePacket->SendTimelineRelationshipBinaryPacket(relationshipType,
+                                                                 relationshipGuid,
+                                                                 headGuid,
+                                                                 tailGuid,
+                                                                 0);
+    }
+
+    sendTimelinePacket->Commit();
+    SendTimelinePacketToCommandHandler(bufferManager.GetReadableBuffer()->GetReadableData(),
+                                       timelineCaptureCommandHandler);
+
+    timelineDecoder.ApplyToModel([&](const arm::pipe::TimelineDecoder::Model& model){
+        for ( unsigned long i = 0; i < 9; ++i )
+        {
+            CHECK(model.m_Entities[i].m_Guid == entityGuid);
+
+            CHECK(model.m_EventClasses[i].m_Guid == eventClassGuid);
+
+            CHECK(model.m_Labels[i].m_Guid == labelGuid);
+
+            CHECK(model.m_Events[i].m_TimeStamp == timestamp);
+            CHECK(model.m_Events[i].m_ThreadId == uint64ThreadId);
+            CHECK(model.m_Events[i].m_Guid == eventGuid);
+
+            CHECK(model.m_Relationships[i].m_RelationshipType ==
+                arm::pipe::ITimelineDecoder::RelationshipType::DataLink);
+            CHECK(model.m_Relationships[i].m_Guid == relationshipGuid);
+            CHECK(model.m_Relationships[i].m_HeadGuid == headGuid);
+            CHECK(model.m_Relationships[i].m_TailGuid == tailGuid);
+        }
+        for ( unsigned long i = 0; i < 9; i += 3 )
+        {
+            CHECK(model.m_Labels[i].m_Name == labelName);
+            CHECK(model.m_Labels[i+1].m_Name == labelName2);
+            CHECK(model.m_Labels[i+2].m_Name == labelName3);
+        }
+    });
+}
+
+}
diff --git a/arch/arm/ARMnn/profiling/toolchain-x86-ubuntu-mingw64.cmake b/arch/arm/ARMnn/profiling/toolchain-x86-ubuntu-mingw64.cmake
new file mode 100644
index 0000000000..2d9e49f0c3
--- /dev/null
+++ b/arch/arm/ARMnn/profiling/toolchain-x86-ubuntu-mingw64.cmake
@@ -0,0 +1,28 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+# toolchain file for building for Windows on an Ubuntu Linux system.
+#
+# Typical usage:
+#    *) install cross compiler: `sudo apt-get install mingw-w64`
+#    *) cmake -DCMAKE_TOOLCHAIN_FILE=~/toolchain-x86-ubuntu-mingw64.cmake
+
+set(CMAKE_SYSTEM_NAME Windows)
+set(TOOLCHAIN_PREFIX x86_64-w64-mingw32)
+
+# cross compilers to use for C and C++
+set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc)
+set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}-g++)
+
+# target environment on the build host system
+#   set 1st to dir with the cross compiler's C/C++ headers/libs
+set(CMAKE_FIND_ROOT_PATH /usr/${TOOLCHAIN_PREFIX})
+
+# modify default behavior of FIND_XXX() commands to
+# search for headers/libs in the target environment and
+# search for programs in the build host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/arch/arm/ARMnn/python/pyarmnn/.gitignore b/arch/arm/ARMnn/python/pyarmnn/.gitignore
new file mode 100644
index 0000000000..69e2681903
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/.gitignore
@@ -0,0 +1,7 @@
+# Generated pyarmnn files
+src/pyarmnn/_generated/*
+!src/pyarmnn/_generated/.keep
+!src/pyarmnn/_generated/*.py
+!src/pyarmnn/_generated/*.cpp
+**/include
+
diff --git a/arch/arm/ARMnn/python/pyarmnn/CMakeLists.txt b/arch/arm/ARMnn/python/pyarmnn/CMakeLists.txt
new file mode 100644
index 0000000000..f82eac258c
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/CMakeLists.txt
@@ -0,0 +1,54 @@
+#
+# Copyright 2020 NXP
+# SPDX-License-Identifier: MIT
+#
+set(SETUP_PY_IN         "${CMAKE_CURRENT_SOURCE_DIR}/setup.py")
+set(SETUP_PY            "${CMAKE_CURRENT_BINARY_DIR}/setup.py")
+set(SWIG_GENERATE_IN    "${CMAKE_CURRENT_SOURCE_DIR}/swig_generate.py")
+set(SWIG_GENERATE       "${CMAKE_CURRENT_BINARY_DIR}/swig_generate.py")
+set(OUT_WRAP            "${CMAKE_CURRENT_BINARY_DIR}/pyarmnn.wrap.timestamp")
+
+configure_file(${SETUP_PY_IN} ${SETUP_PY} COPYONLY)
+configure_file(${SWIG_GENERATE_IN} ${SWIG_GENERATE} COPYONLY)
+
+# local env variables passed down to the python scripts
+# scripts can thus be used standalone
+set(ARMNN_ENV ARMNN_INCLUDE="${PROJECT_SOURCE_DIR}/include:${PROJECT_SOURCE_DIR}/profiling/common/include"
+              ARMNN_LIB=${PROJECT_BINARY_DIR}
+              SWIG_EXECUTABLE=${SWIG_EXECUTABLE})
+
+# common step - generates swig wrappers and builds the lib
+add_custom_command(OUTPUT ${OUT_WRAP}
+                   COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/README.md ${CMAKE_CURRENT_BINARY_DIR}
+                   COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE ${CMAKE_CURRENT_BINARY_DIR}
+                   COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
+                   COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/test ${CMAKE_CURRENT_BINARY_DIR}/test
+                   COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "Clearing Python build ..."
+                   COMMAND ${Python3_EXECUTABLE} ${SETUP_PY} --quiet clean --all
+                   COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "Generating SWIG wrappers ..."
+                   COMMAND ${CMAKE_COMMAND} -E env ${ARMNN_ENV} ${Python3_EXECUTABLE} ${SWIG_GENERATE}
+                   COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "Building Python extensions ..."
+                   COMMAND ${CMAKE_COMMAND} -E env ${ARMNN_ENV} ${Python3_EXECUTABLE} ${SETUP_PY} --quiet build_ext --inplace
+                   COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src/pyarmnn/_generated ${CMAKE_CURRENT_BINARY_DIR}/src/pyarmnn/_generated
+                   COMMAND ${CMAKE_COMMAND} -E touch ${OUT_WRAP}
+                   DEPENDS armnn)
+
+# source package
+if(BUILD_PYTHON_SRC)
+    set(OUT_SRC "${CMAKE_CURRENT_BINARY_DIR}/pyarmnn.src.timestamp")
+    add_custom_command(OUTPUT  ${OUT_SRC}
+                       COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "Building Python source package ..."
+                       COMMAND ${CMAKE_COMMAND} -E env ${ARMNN_ENV} ${Python3_EXECUTABLE} ${SETUP_PY} --quiet sdist
+                       COMMAND ${CMAKE_COMMAND} -E touch ${OUT_SRC}
+                       DEPENDS ${OUT_WRAP})
+endif()
+# wheel package
+if(BUILD_PYTHON_WHL)
+    set(OUT_WHL "${CMAKE_CURRENT_BINARY_DIR}/pyarmnn.whl.timestamp")
+    add_custom_command(OUTPUT  ${OUT_WHL}
+                       COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "Building Python binary package ..."
+                       COMMAND ${CMAKE_COMMAND} -E env ${ARMNN_ENV} ${Python3_EXECUTABLE} ${SETUP_PY} --quiet bdist_wheel
+                       COMMAND ${CMAKE_COMMAND} -E touch ${OUT_WHL}
+                       DEPENDS ${OUT_WRAP})
+endif()
+add_custom_target(pyarmnn ALL DEPENDS ${OUT_WRAP} ${OUT_SRC} ${OUT_WHL})
diff --git a/arch/arm/ARMnn/python/pyarmnn/LICENSE b/arch/arm/ARMnn/python/pyarmnn/LICENSE
new file mode 100644
index 0000000000..7e2243a18a
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/LICENSE
@@ -0,0 +1,22 @@
+MIT License
+
+Copyright (c) 2020 ARM Limited.
+Copyright 2020 NXP
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/arch/arm/ARMnn/python/pyarmnn/README.md b/arch/arm/ARMnn/python/pyarmnn/README.md
new file mode 100644
index 0000000000..4ac36fafb4
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/README.md
@@ -0,0 +1,256 @@
+# PyArmNN
+
+PyArmNN is a python extension for [Arm NN SDK](https://developer.arm.com/ip-products/processors/machine-learning/arm-nn).
+PyArmNN provides interface similar to Arm NN C++ Api.
+Before you proceed with the project setup, you will need to checkout and build a corresponding Arm NN version.
+
+PyArmNN is built around public headers from the armnn/include folder of Arm NN. PyArmNN does not implement any computation kernels itself, all operations are
+delegated to the Arm NN library.
+
+The [SWIG](http://www.swig.org/) project is used to generate the Arm NN python shadow classes and C wrapper.
+
+The following diagram shows the conceptual architecture of this library:
+![PyArmNN](docs/pyarmnn.png)
+
+# Setup development environment
+
+Before, proceeding to the next steps, make sure that:
+
+1. You have Python 3.6+ installed system-side. The package is not compatible with older Python versions.
+2. You have python3.6-dev installed system-side. This contains header files needed to build PyArmNN extension module.
+3. In case you build Python from sources manually, make sure that the following libraries are installed and available in you system:
+``python3.6-dev build-essential checkinstall libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev``
+4. Install SWIG 4.x. Only 3.x version is typically available in Linux package managers, so you will have to build it and install it from sources. It can be downloaded from the [SWIG project website](http://www.swig.org/download.html) or from [SWIG GitHub](https://github.com/swig/swig). To install it follow the guide on [SWIG GitHub](https://github.com/swig/swig/wiki/Getting-Started).
+
+## Setup virtual environment
+
+Now you can proceed with setting up workspace. It is recommended to create a python virtual environment, so you do not pollute your working folder:
+```bash
+python -m venv env
+source env/bin/activate
+```
+
+You may run into missing python modules such as *wheel*. Make sure to install those using pip:
+```bash
+pip install wheel
+```
+
+## Build python distr
+
+Python supports source and binary distribution packages.
+
+Source distr contains setup.py script that is executed on the users machine during package installation.
+When preparing binary distr (wheel), setup.py is executed on the build machine and the resulting package contains only the result
+of the build (generated files and resources, test results etc).
+
+In our case, PyArmNN depends on Arm NN installation. Thus, binary distr will be linked with
+the local build machine libraries and runtime.
+
+There are 2 ways to build the python packages. Either directly using the python scripts or using CMake.
+
+### CMake build
+
+The recommended aproach is to build PyArmNN together with Arm NN by adding the following options to your CMake command:
+```
+-DBUILD_PYTHON_SRC=1
+-DBUILD_PYTHON_WHL=1
+```
+This will build either the source package or the wheel or both. Current project headers and build libraries will be used, so there is no need to provide them.
+
+SWIG is required to generate the wrappers. If CMake did not find the executable during the configure step or it has found an older version, you may provide it manually:
+```
+-DSWIG_EXECUTABLE=<path_to_swig_executable>
+```
+
+After the build finishes, you will find the python packages in `<build_folder>/python/pyarmnn/dist`.
+
+### Standalone build
+
+PyArmNN can also be built using the provided python scripts only. The advantage of that is that you may use prebuilt Arm NN libraries and it is generally much faster if you do not want to build all the Arm NN libraries.
+
+##### 1. Set environment:
+
+*ARMNN_INCLUDE* and *ARMNN_LIB* are mandatory and should point to Arm NN includes and libraries against which you will be generating the wrappers. *SWIG_EXECUTABLE* should only be set if you have multiple versions of SWIG installed or you used a custom location for your installation:
+```bash
+$ export SWIG_EXECUTABLE=/full/path/to/swig/executable
+$ export ARMNN_INCLUDE=/full/path/to/armnn/include:/full/path/to/armnn/profiling/common/include
+$ export ARMNN_LIB=/path/to/libs
+```
+
+##### 2. Clean and build SWIG wrappers:
+
+```bash
+$ python setup.py clean --all
+$ python swig_generate.py -v
+$ python setup.py build_ext --inplace
+```
+This step will put all generated files under `./src/pyarmnn/_generated` folder and can be used repeatedly to re-generate the wrappers.
+
+##### 4. Build the source package
+
+```bash
+$ python setup.py sdist
+```
+As the result you will get `./dist/pyarmnn-28.0.0.tar.gz` file. As you can see it is platform independent.
+
+##### 5. Build the binary package
+
+```bash
+$ python setup.py bdist_wheel
+```
+As the result you will get something like `./dist/pyarmnn-28.0.0-cp36-cp36m-linux_x86_64.whl` file. As you can see it
+ is platform dependent.
+
+# PyArmNN installation
+
+PyArmNN can be distributed as a source package or a binary package (wheel).
+
+Binary package is platform dependent, the name of the package will indicate the platform it was built for, e.g.:
+
+* Linux x86 64bit machine: pyarmnn-28.0.0-cp36-cp36m-*linux_x86_64*.whl
+* Linux Aarch 64 bit machine: pyarmnn-28.0.0-cp36-cp36m-*linux_aarch64*.whl
+
+The source package is platform independent but installation involves compilation of Arm NN python extension. You will need to have g++ compatible with C++ 14 standard and a python development library installed on the build machine.
+
+Both of them, source and binary package, require the Arm NN library to be present on the target/build machine.
+
+It is strongly suggested to work within a python virtual environment. The further steps assume that the virtual environment was created and activated before running PyArmNN installation commands.
+
+PyArmNN also depends on the NumPy python library. It will be automatically downloaded and installed alongside PyArmNN. If your machine does not have access to Python pip repositories you might need to install NumPy in advance by following public instructions: https://scipy.org/install.html
+
+## Installing from wheel
+
+Make sure that Arm NN binaries and Arm NN dependencies are installed and can be found in one of the system default library locations. You can check default locations by executing the following command:
+```bash
+$ gcc --print-search-dirs
+```
+Install PyArmNN from binary by pointing to the wheel file:
+```bash
+$ pip install /path/to/pyarmnn-28.0.0-cp36-cp36m-linux_aarch64.whl
+```
+
+## Installing from source package
+
+Alternatively, you can install from source. This is the more reliable way but requires a little more effort on the users part.
+
+While installing from sources, you have the freedom of choosing Arm NN libraries location. Set environment variables *ARMNN_LIB* and *ARMNN_INCLUDE* to point to Arm NN libraries and headers.
+If you want to use system default locations, just set *ARMNN_INCLUDE* to point to Arm NN headers.
+
+```bash
+$ export  ARMNN_LIB=/path/to/libs
+$ export  ARMNN_INCLUDE=/full/path/to/armnn/include:/full/path/to/armnn/profiling/common/include
+```
+
+Install PyArmNN as follows:
+```bash
+$ pip install /path/to/pyarmnn-28.0.0.tar.gz
+```
+
+If PyArmNN installation script fails to find Arm NN libraries it will raise an error like this
+
+`RuntimeError: ArmNN library was not found in ('/usr/lib/gcc/aarch64-linux-gnu/8/', <...> ,'/lib/', '/usr/lib/'). Please install ArmNN to one of the standard locations or set correct ARMNN_INCLUDE and ARMNN_LIB env variables.`
+
+You can now verify that PyArmNN library is installed and check PyArmNN version using:
+```bash
+$ pip show pyarmnn
+```
+You can also verify it by running the following and getting output similar to below:
+```bash
+$ python -c "import pyarmnn as ann;print(ann.GetVersion())"
+'28.0.0'
+```
+
+# PyArmNN API overview
+
+#### Getting started
+The easiest way to begin using PyArmNN is by using the Parsers. We will demonstrate how to use them below:
+
+Create a parser object and load your model file.
+```python
+import pyarmnn as ann
+import imageio
+
+# An ONNX parser also exists.
+parser = ann.ITfLiteParser()
+network = parser.CreateNetworkFromBinaryFile('./model.tflite')
+```
+
+Get the input binding information by using the name of the input layer.
+```python
+input_binding_info = parser.GetNetworkInputBindingInfo(0, 'model/input')
+
+# Create a runtime object that will perform inference.
+options = ann.CreationOptions()
+runtime = ann.IRuntime(options)
+```
+Choose preferred backends for execution and optimize the network.
+```python
+# Backend choices earlier in the list have higher preference.
+preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')]
+opt_network, messages = ann.Optimize(network, preferredBackends, runtime.GetDeviceSpec(), ann.OptimizerOptions())
+
+# Load the optimized network into the runtime.
+net_id, _ = runtime.LoadNetwork(opt_network)
+```
+Make workload tensors using input and output binding information.
+```python
+# Load an image and create an inputTensor for inference.
+img = imageio.imread('./image.png')
+input_tensors = ann.make_input_tensors([input_binding_info], [img])
+
+# Get output binding information for an output layer by using the layer name.
+output_binding_info = parser.GetNetworkOutputBindingInfo(0, 'model/output')
+output_tensors = ann.make_output_tensors([output_binding_info])
+```
+
+Perform inference and get the results back into a numpy array.
+```python
+runtime.EnqueueWorkload(0, input_tensors, output_tensors)
+
+results = ann.workload_tensors_to_ndarray(output_tensors)
+print(results)
+```
+
+#### Examples
+
+To further explore PyArmNN API there are several examples provided in the `/examples` folder for you to explore.
+
+##### Image Classification
+
+This sample application performs image classification on an image and outputs the <i>Top N</i> results, listing the classes and probabilities associated with the classified image. All resources are downloaded during execution, so if you do not have access to the internet, you may need to download these manually.
+
+Sample scripts are provided for performing image classification with TFLite and ONNX models with `tflite_mobilenetv1_quantized.py` and `onnx_mobilenetv2.py`.
+
+##### Object Detection
+
+This sample application guides the user and shows how to perform object detection using PyArmNN API. By taking a model and video file or camera feed as input, and running inference on each frame, we are able to interpret the output to draw bounding boxes around detected objects and overlay the corresponding labels and confidence scores.
+
+Sample scripts are provided for performing object detection from video file and video stream with `run_video_file.py` and `run_video_stream.py`.
+
+
+## Tox for automation
+
+To make things easier *tox* is available for automating individual tasks or running multiple commands at once such as generating wrappers, running unit tests using multiple python versions or generating documentation. To run it use:
+
+```bash
+$ tox <task_name>
+```
+
+See *tox.ini* for the list of tasks. You may also modify it for your own purposes. To dive deeper into tox read through https://tox.readthedocs.io/en/latest/
+
+## Running unit-tests
+
+Download resources required to run unit tests by executing the script in the scripts folder:
+
+```
+$ python ./scripts/download_test_resources.py
+```
+
+The script will download an archive from the Linaro server and extract it. A folder `test/testdata/shared` will be created. Execute `pytest` from the project root dir:
+```bash
+$ python -m pytest test/ -v
+```
+or run tox which will do both:
+```bash
+$ tox
+```
diff --git a/arch/arm/ARMnn/python/pyarmnn/conftest.py b/arch/arm/ARMnn/python/pyarmnn/conftest.py
new file mode 100644
index 0000000000..af6b8da742
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/conftest.py
@@ -0,0 +1,52 @@
+# Copyright © 2020 Arm Ltd. All rights reserved.
+# SPDX-License-Identifier: MIT
+import os
+import platform
+
+import pytest
+
+ARCHITECTURES = set("x86_64 aarch64".split())
+
+
+@pytest.fixture(scope="module")
+def data_folder_per_test(request):
+    """
+        This fixture returns path to folder with test resources (one per test module)
+    """
+
+    basedir, script = request.fspath.dirname, request.fspath.basename
+    return str(os.path.join(basedir, "testdata", os.path.splitext(script)[0]))
+
+
+@pytest.fixture(scope="module")
+def shared_data_folder(request):
+    """
+        This fixture returns path to folder with shared test resources among all tests
+    """
+
+    return str(os.path.join(request.fspath.dirname, "testdata", "shared"))
+
+
+@pytest.fixture(scope="function")
+def tmpdir(tmpdir):
+    """
+        This fixture returns path to temp folder. Fixture was added for py35 compatibility
+    """
+
+    return str(tmpdir)
+
+
+def pytest_runtest_setup(item):
+    supported_architectures = ARCHITECTURES.intersection(mark.name for mark in item.iter_markers())
+    arch = platform.machine()
+    if supported_architectures and arch not in supported_architectures:
+        pytest.skip("cannot run on platform {}".format(arch))
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", "aarch64: mark test to run only on aarch64"
+    )
+    config.addinivalue_line(
+        "markers", "x86_64: mark test to run only on x86_64"
+    )
\ No newline at end of file
diff --git a/arch/arm/ARMnn/python/pyarmnn/docs_conf/config.mako b/arch/arm/ARMnn/python/pyarmnn/docs_conf/config.mako
new file mode 100644
index 0000000000..b26003bd53
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/docs_conf/config.mako
@@ -0,0 +1,34 @@
+<%!
+    # Template configuration. Copy over in your template directory
+    # (used with --template-dir) and adapt as required.
+    html_lang = 'en'
+    show_inherited_members = True
+    extract_module_toc_into_sidebar = True
+    list_class_variables_in_index = True
+    sort_identifiers = True
+    show_type_annotations = False
+
+    # Show collapsed source code block next to each item.
+    # Disabling this can improve rendering speed of large modules.
+    show_source_code = False
+
+    # A prefix to use for every HTML hyperlink in the generated documentation.
+    # No prefix results in all links being relative.
+    link_prefix = ''
+
+    # Set the style keyword such as 'atom-one-light' or 'github-gist'
+    #     Options: https://github.com/highlightjs/highlight.js/tree/master/src/styles
+    #     Demo: https://highlightjs.org/static/demo/
+    hljs_style = 'github'
+
+    # If set, insert Google Analytics tracking code. Value is GA
+    # tracking id (UA-XXXXXX-Y).
+    google_analytics = ''
+
+    # If set, render LaTeX math syntax within \(...\) (inline equations),
+    # or within \[...\] or $$...$$ or `.. math::` (block equations)
+    # as nicely-formatted math formulas using MathJax.
+    # Note: in Python docstrings, either all backslashes need to be escaped (\\)
+    # or you need to use raw r-strings.
+    latex_math = True
+%>
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/common/audio_capture.py b/arch/arm/ARMnn/python/pyarmnn/examples/common/audio_capture.py
new file mode 100644
index 0000000000..1bd53b4473
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/common/audio_capture.py
@@ -0,0 +1,149 @@
+# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+"""Contains CaptureAudioStream class for capturing chunks of audio data from incoming
+  stream and generic capture_audio function for capturing from files."""
+import collections
+import time
+from queue import Queue
+from typing import Generator
+
+import numpy as np
+import sounddevice as sd
+import soundfile as sf
+
+AudioCaptureParams = collections.namedtuple('AudioCaptureParams',
+                                            ['dtype', 'overlap', 'min_samples', 'sampling_freq', 'mono'])
+
+
+def capture_audio(audio_file_path, params_tuple) -> Generator[np.ndarray, None, None]:
+    """Creates a generator that yields audio data from a file. Data is padded with
+    zeros if necessary to make up minimum number of samples.
+    Args:
+        audio_file_path: Path to audio file provided by user.
+        params_tuple: Sampling parameters for model used
+    Yields:
+        Blocks of audio data of minimum sample size.
+    """
+    with sf.SoundFile(audio_file_path) as audio_file:
+        for block in audio_file.blocks(
+                blocksize=params_tuple.min_samples,
+                dtype=params_tuple.dtype,
+                always_2d=True,
+                fill_value=0,
+                overlap=params_tuple.overlap
+        ):
+            if params_tuple.mono and block.shape[0] > 1:
+                block = np.mean(block, dtype=block.dtype, axis=1)
+            yield block
+
+
+class CaptureAudioStream:
+
+    def __init__(self, audio_capture_params):
+        self.audio_capture_params = audio_capture_params
+        self.collection = np.zeros(self.audio_capture_params.min_samples + self.audio_capture_params.overlap).astype(
+            dtype=self.audio_capture_params.dtype)
+        self.is_active = True
+        self.is_first_window = True
+        self.duration = False
+        self.block_count = 0
+        self.current_block = 0
+        self.queue = Queue(2)
+
+    def set_stream_defaults(self):
+        """Discovers input devices on the system and sets default stream parameters."""
+        print(sd.query_devices())
+        device = input("Select input device by index or name: ")
+
+        try:
+            sd.default.device = int(device)
+        except ValueError:
+            sd.default.device = str(device)
+
+        sd.default.samplerate = self.audio_capture_params.sampling_freq
+        sd.default.blocksize = self.audio_capture_params.min_samples
+        sd.default.dtype = self.audio_capture_params.dtype
+        sd.default.channels = 1 if self.audio_capture_params.mono else 2
+
+    def set_recording_duration(self, duration):
+        """Sets a time duration (in integer seconds) for recording audio. Total time duration is
+        adjusted to a minimum based on the parameters of the model used. Durations less than 1
+        result in endless recording.
+
+        Args:
+            duration (int): User-provided command line argument for time duration of recording.
+        """
+        if duration > 0:
+            min_duration = int(
+                np.ceil(self.audio_capture_params.min_samples / self.audio_capture_params.sampling_freq)
+            )
+            if duration < min_duration:
+                print(f"Minimum duration must be {min_duration} seconds of audio")
+                print(f"Setting minimum recording duration...")
+                duration = min_duration
+
+            print(f"Recording duration is {duration} seconds")
+            self.duration = self.audio_capture_params.sampling_freq * duration
+            self.block_count, remainder_samples = divmod(
+                self.duration, self.audio_capture_params.min_samples
+            )
+
+            if remainder_samples > 0.5 * self.audio_capture_params.sampling_freq:
+                self.block_count += 1
+        else:
+            self.duration = False  # Record forever
+
+    def countdown(self, delay=3):
+        """3 second countdown prior to recording audio."""
+        print("Beginning recording in...")
+        for i in range(delay, 0, -1):
+            print(f"{i}...")
+            time.sleep(1)
+
+    def update(self):
+        """If a duration has been set, increments a counter to update the number of blocks of audio
+        data left to be collected. The stream is deactivated upon reaching the maximum block count
+        determined by the duration.
+        """
+        if self.duration:
+            self.current_block += 1
+            if self.current_block == self.block_count:
+                self.is_active = False
+
+    def capture_data(self):
+        """Gets the next window of audio data by retrieving the newest data from a queue and
+        shifting the position of the data in the collection. Overlap values of less than `min_samples` are supported.
+        """
+        new_data = self.queue.get()
+
+        if self.is_first_window or self.audio_capture_params.overlap == 0:
+            self.collection[:self.audio_capture_params.min_samples] = new_data[:]
+
+        elif self.audio_capture_params.overlap < self.audio_capture_params.min_samples:
+            #
+            self.collection[0:self.audio_capture_params.overlap] = \
+                self.collection[(self.audio_capture_params.min_samples - self.audio_capture_params.overlap):
+                                self.audio_capture_params.min_samples]
+
+            self.collection[self.audio_capture_params.overlap:(
+                    self.audio_capture_params.overlap + self.audio_capture_params.min_samples)] = new_data[:]
+        else:
+            raise ValueError(
+                "Capture Error: Overlap must be less than {}".format(self.audio_capture_params.min_samples))
+        audio_data = self.collection[0:self.audio_capture_params.min_samples]
+        return np.asarray(audio_data).astype(self.audio_capture_params.dtype)
+
+    def callback(self, data, frames, time, status):
+        """Places audio data from active stream into a queue for processing.
+        Update counter if recording duration is finite.
+         """
+
+        if self.duration:
+            self.update()
+
+        if self.audio_capture_params.mono:
+            audio_data = data.copy().flatten()
+        else:
+            audio_data = data.copy()
+
+        self.queue.put(audio_data)
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/common/cv_utils.py b/arch/arm/ARMnn/python/pyarmnn/examples/common/cv_utils.py
new file mode 100644
index 0000000000..e12ff50548
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/common/cv_utils.py
@@ -0,0 +1,192 @@
+# Copyright © 2020-2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""
+This file contains helper functions for reading video/image data and
+ pre/postprocessing of video/image data using OpenCV.
+"""
+
+import os
+
+import cv2
+import numpy as np
+
+import pyarmnn as ann
+
+
+def preprocess(frame: np.ndarray, input_binding_info: tuple, is_normalised: bool):
+    """
+    Takes a frame, resizes, swaps channels and converts data type to match
+    model input layer. The converted frame is wrapped in a const tensor
+    and bound to the input tensor.
+
+    Args:
+        frame: Captured frame from video.
+        input_binding_info:  Contains shape and data type of model input layer.
+        is_normalised: if the input layer expects normalised data
+
+    Returns:
+        Input tensor.
+    """
+    # Swap channels and resize frame to model resolution
+    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    resized_frame = resize_with_aspect_ratio(frame, input_binding_info)
+
+    # Expand dimensions and convert data type to match model input
+    if input_binding_info[1].GetDataType() == ann.DataType_Float32:
+        data_type = np.float32
+        if is_normalised:
+            resized_frame = resized_frame.astype("float32")/255
+    else:
+        data_type = np.uint8
+
+    resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)
+    assert resized_frame.shape == tuple(input_binding_info[1].GetShape())
+
+    input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])
+    return input_tensors
+
+
+
+def resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):
+    """
+    Resizes frame while maintaining aspect ratio, padding any empty space.
+
+    Args:
+        frame: Captured frame.
+        input_binding_info: Contains shape of model input layer.
+
+    Returns:
+        Frame resized to the size of model input layer.
+    """
+    aspect_ratio = frame.shape[1] / frame.shape[0]
+    model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
+
+    if aspect_ratio >= 1.0:
+        new_height, new_width = int(model_width / aspect_ratio), model_width
+        b_padding, r_padding = model_height - new_height, 0
+    else:
+        new_height, new_width = model_height, int(model_height * aspect_ratio)
+        b_padding, r_padding = 0, model_width - new_width
+
+    # Resize and pad any empty space
+    frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
+    frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,
+                               borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
+    return frame
+
+
+def create_video_writer(video: cv2.VideoCapture, video_path: str, output_path: str):
+    """
+    Creates a video writer object to write processed frames to file.
+
+    Args:
+        video: Video capture object, contains information about data source.
+        video_path: User-specified video file path.
+        output_path: Optional path to save the processed video.
+
+    Returns:
+        Video writer object.
+    """
+    _, ext = os.path.splitext(video_path)
+
+    if output_path is not None:
+        assert os.path.isdir(output_path)
+
+    i, filename = 0, os.path.join(output_path if output_path is not None else str(), f'object_detection_demo{ext}')
+    while os.path.exists(filename):
+        i += 1
+        filename = os.path.join(output_path if output_path is not None else str(), f'object_detection_demo({i}){ext}')
+
+    video_writer = cv2.VideoWriter(filename=filename,
+                                   fourcc=get_source_encoding_int(video),
+                                   fps=int(video.get(cv2.CAP_PROP_FPS)),
+                                   frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
+                                              int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
+    return video_writer
+
+
+def init_video_file_capture(video_path: str, output_path: str):
+    """
+    Creates a video capture object from a video file.
+
+    Args:
+        video_path: User-specified video file path.
+        output_path: Optional path to save the processed video.
+
+    Returns:
+        Video capture object to capture frames, video writer object to write processed
+        frames to file, plus total frame count of video source to iterate through.
+    """
+    if not os.path.exists(video_path):
+        raise FileNotFoundError(f'Video file not found for: {video_path}')
+    video = cv2.VideoCapture(video_path)
+    if not video.isOpened:
+        raise RuntimeError(f'Failed to open video capture from file: {video_path}')
+
+    video_writer = create_video_writer(video, video_path, output_path)
+    iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT)))
+    return video, video_writer, iter_frame_count
+
+
+def init_video_stream_capture(video_source: int):
+    """
+    Creates a video capture object from a device.
+
+    Args:
+        video_source: Device index used to read video stream.
+
+    Returns:
+        Video capture object used to capture frames from a video stream.
+    """
+    video = cv2.VideoCapture(video_source)
+    if not video.isOpened:
+        raise RuntimeError(f'Failed to open video capture for device with index: {video_source}')
+    print('Processing video stream. Press \'Esc\' key to exit the demo.')
+    return video
+
+
+def draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict):
+    """
+    Draws bounding boxes around detected objects and adds a label and confidence score.
+
+    Args:
+        frame: The original captured frame from video source.
+        detections: A list of detected objects in the form [class, [box positions], confidence].
+        resize_factor: Resizing factor to scale box coordinates to output frame size.
+        labels: Dictionary of labels and colors keyed on the classification index.
+    """
+    for detection in detections:
+        class_idx, box, confidence = [d for d in detection]
+        label, color = labels[class_idx][0].capitalize(), labels[class_idx][1]
+
+        # Obtain frame size and resized bounding box positions
+        frame_height, frame_width = frame.shape[:2]
+        x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
+
+        # Ensure box stays within the frame
+        x_min, y_min = max(0, x_min), max(0, y_min)
+        x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
+
+        # Draw bounding box around detected object
+        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
+
+        # Create label for detected object class
+        label = f'{label} {confidence * 100:.1f}%'
+        label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
+
+        # Make sure label always stays on-screen
+        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
+
+        lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
+        lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
+        lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
+
+        # Add label and confidence value
+        cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
+        cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
+                    label_color, 1, cv2.LINE_AA)
+
+
+def get_source_encoding_int(video_capture):
+    return int(video_capture.get(cv2.CAP_PROP_FOURCC))
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/common/mfcc.py b/arch/arm/ARMnn/python/pyarmnn/examples/common/mfcc.py
new file mode 100644
index 0000000000..2bab669fb7
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/common/mfcc.py
@@ -0,0 +1,238 @@
+# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""Class used to extract the Mel-frequency cepstral coefficients from a given audio frame."""
+
+import numpy as np
+import collections
+
+MFCCParams = collections.namedtuple('MFCCParams', ['sampling_freq', 'num_fbank_bins', 'mel_lo_freq', 'mel_hi_freq',
+                                                   'num_mfcc_feats', 'frame_len', 'use_htk_method', 'n_fft'])
+
+
+class MFCC:
+
+    def __init__(self, mfcc_params):
+        self.mfcc_params = mfcc_params
+        self.FREQ_STEP = 200.0 / 3
+        self.MIN_LOG_HZ = 1000.0
+        self.MIN_LOG_MEL = self.MIN_LOG_HZ / self.FREQ_STEP
+        self.LOG_STEP = 1.8562979903656 / 27.0
+        self._frame_len_padded = int(2 ** (np.ceil((np.log(self.mfcc_params.frame_len) / np.log(2.0)))))
+        self._filter_bank_initialised = False
+        self.__frame = np.zeros(self._frame_len_padded)
+        self.__buffer = np.zeros(self._frame_len_padded)
+        self._filter_bank_filter_first = np.zeros(self.mfcc_params.num_fbank_bins)
+        self._filter_bank_filter_last = np.zeros(self.mfcc_params.num_fbank_bins)
+        self.__mel_energies = np.zeros(self.mfcc_params.num_fbank_bins)
+        self._dct_matrix = self.create_dct_matrix(self.mfcc_params.num_fbank_bins, self.mfcc_params.num_mfcc_feats)
+        self.__mel_filter_bank = self.create_mel_filter_bank()
+        self._np_mel_bank = np.zeros([self.mfcc_params.num_fbank_bins, int(self.mfcc_params.n_fft / 2) + 1])
+
+        for i in range(self.mfcc_params.num_fbank_bins):
+            k = 0
+            for j in range(int(self._filter_bank_filter_first[i]), int(self._filter_bank_filter_last[i]) + 1):
+                self._np_mel_bank[i, j] = self.__mel_filter_bank[i][k]
+                k += 1
+
+    def mel_scale(self, freq, use_htk_method):
+        """
+        Gets the mel scale for a particular sample frequency.
+
+        Args:
+            freq: The sampling frequency.
+            use_htk_method: Boolean to set whether to use HTK method or not.
+
+        Returns:
+            the mel scale
+        """
+        if use_htk_method:
+            return 1127.0 * np.log(1.0 + freq / 700.0)
+        else:
+            mel = freq / self.FREQ_STEP
+
+        if freq >= self.MIN_LOG_HZ:
+            mel = self.MIN_LOG_MEL + np.log(freq / self.MIN_LOG_HZ) / self.LOG_STEP
+        return mel
+
+    def inv_mel_scale(self, mel_freq, use_htk_method):
+        """
+        Gets the sample frequency for a particular mel.
+
+        Args:
+            mel_freq: The mel frequency.
+            use_htk_method: Boolean to set whether to use HTK method or not.
+
+        Returns:
+            the sample frequency
+        """
+        if use_htk_method:
+            return 700.0 * (np.exp(mel_freq / 1127.0) - 1.0)
+        else:
+            freq = self.FREQ_STEP * mel_freq
+
+            if mel_freq >= self.MIN_LOG_MEL:
+                freq = self.MIN_LOG_HZ * np.exp(self.LOG_STEP * (mel_freq - self.MIN_LOG_MEL))
+            return freq
+
+    def spectrum_calc(self, audio_data):
+        return np.abs(np.fft.rfft(np.hanning(self.mfcc_params.frame_len + 1)[0:self.mfcc_params.frame_len] * audio_data,
+                                  self.mfcc_params.n_fft))
+
+    def log_mel(self, mel_energy):
+        mel_energy += 1e-10  # Avoid division by zero
+        return np.log(mel_energy)
+
+    def mfcc_compute(self, audio_data):
+        """
+        Extracts the MFCC for a single frame.
+
+        Args:
+            audio_data: The audio data to process.
+
+        Returns:
+            the MFCC features
+        """
+        if len(audio_data) != self.mfcc_params.frame_len:
+            raise ValueError(
+                f"audio_data buffer size {len(audio_data)} does not match frame length {self.mfcc_params.frame_len}")
+
+        audio_data = np.array(audio_data)
+        spec = self.spectrum_calc(audio_data)
+        mel_energy = np.dot(self._np_mel_bank.astype(np.float32),
+                            np.transpose(spec).astype(np.float32))
+        log_mel_energy = self.log_mel(mel_energy)
+        mfcc_feats = np.dot(self._dct_matrix, log_mel_energy)
+        return mfcc_feats
+
+    def create_dct_matrix(self, num_fbank_bins, num_mfcc_feats):
+        """
+        Creates the Discrete Cosine Transform matrix to be used in the compute function.
+
+        Args:
+            num_fbank_bins: The number of filter bank bins
+            num_mfcc_feats: the number of MFCC features
+
+        Returns:
+            the DCT matrix
+        """
+
+        dct_m = np.zeros(num_fbank_bins * num_mfcc_feats)
+        for k in range(num_mfcc_feats):
+            for n in range(num_fbank_bins):
+                dct_m[(k * num_fbank_bins) + n] = (np.sqrt(2 / num_fbank_bins)) * np.cos(
+                    (np.pi / num_fbank_bins) * (n + 0.5) * k)
+        dct_m = np.reshape(dct_m, [self.mfcc_params.num_mfcc_feats, self.mfcc_params.num_fbank_bins])
+        return dct_m
+
+    def mel_norm(self, weight, right_mel, left_mel):
+        """
+        Placeholder function over-ridden in child class
+        """
+        return weight
+
+    def create_mel_filter_bank(self):
+        """
+        Creates the Mel filter bank.
+
+        Returns:
+            the mel filter bank
+        """
+        # FFT calculations are greatly accelerated for frame lengths which are powers of 2
+        # Frames are padded and FFT bin width/length calculated accordingly
+        num_fft_bins = int(self._frame_len_padded / 2)
+        fft_bin_width = self.mfcc_params.sampling_freq / self._frame_len_padded
+
+        mel_low_freq = self.mel_scale(self.mfcc_params.mel_lo_freq, self.mfcc_params.use_htk_method)
+        mel_high_freq = self.mel_scale(self.mfcc_params.mel_hi_freq, self.mfcc_params.use_htk_method)
+        mel_freq_delta = (mel_high_freq - mel_low_freq) / (self.mfcc_params.num_fbank_bins + 1)
+
+        this_bin = np.zeros(num_fft_bins)
+        mel_fbank = [0] * self.mfcc_params.num_fbank_bins
+        for bin_num in range(self.mfcc_params.num_fbank_bins):
+            left_mel = mel_low_freq + bin_num * mel_freq_delta
+            center_mel = mel_low_freq + (bin_num + 1) * mel_freq_delta
+            right_mel = mel_low_freq + (bin_num + 2) * mel_freq_delta
+            first_index = last_index = -1
+
+            for i in range(num_fft_bins):
+                freq = (fft_bin_width * i)
+                mel = self.mel_scale(freq, self.mfcc_params.use_htk_method)
+                this_bin[i] = 0.0
+
+                if (mel > left_mel) and (mel < right_mel):
+                    if mel <= center_mel:
+                        weight = (mel - left_mel) / (center_mel - left_mel)
+                    else:
+                        weight = (right_mel - mel) / (right_mel - center_mel)
+
+                    this_bin[i] = self.mel_norm(weight, right_mel, left_mel)
+
+                    if first_index == -1:
+                        first_index = i
+                    last_index = i
+
+            self._filter_bank_filter_first[bin_num] = first_index
+            self._filter_bank_filter_last[bin_num] = last_index
+            mel_fbank[bin_num] = np.zeros(last_index - first_index + 1)
+            j = 0
+
+            for i in range(first_index, last_index + 1):
+                mel_fbank[bin_num][j] = this_bin[i]
+                j += 1
+
+        return mel_fbank
+
+
+class AudioPreprocessor:
+
+    def __init__(self, mfcc, model_input_size, stride):
+        self.model_input_size = model_input_size
+        self.stride = stride
+        self._mfcc_calc = mfcc
+
+    def _normalize(self, values):
+        """
+        Normalize values to mean 0 and std 1
+        """
+        ret_val = (values - np.mean(values)) / np.std(values)
+        return ret_val
+
+    def _get_features(self, features, mfcc_instance, audio_data):
+        idx = 0
+        while len(features) < self.model_input_size * mfcc_instance.mfcc_params.num_mfcc_feats:
+            current_frame_feats = mfcc_instance.mfcc_compute(audio_data[idx:idx + int(mfcc_instance.mfcc_params.frame_len)])
+            features.extend(current_frame_feats)
+            idx += self.stride
+
+    def mfcc_delta_calc(self, features):
+        """
+        Placeholder function over-ridden in child class
+        """
+        return features
+
+    def extract_features(self, audio_data):
+        """
+        Extracts the MFCC features. Also calculates each features first and second order derivatives
+        if the mfcc_delta_calc() function has been implemented by a child class.
+        The matrix returned should be sized appropriately for input to the model, based
+        on the model info specified in the MFCC instance.
+
+        Args:
+            audio_data: the audio data to be used for this calculation
+        Returns:
+            the derived MFCC feature vector, sized appropriately for inference
+        """
+
+        num_samples_per_inference = ((self.model_input_size - 1)
+                                     * self.stride) + self._mfcc_calc.mfcc_params.frame_len
+
+        if len(audio_data) < num_samples_per_inference:
+            raise ValueError("audio_data size for feature extraction is smaller than "
+                             "the expected number of samples needed for inference")
+
+        features = []
+        self._get_features(features, self._mfcc_calc, np.asarray(audio_data))
+        features = np.reshape(np.array(features), (self.model_input_size, self._mfcc_calc.mfcc_params.num_mfcc_feats))
+        features = self.mfcc_delta_calc(features)
+        return np.float32(features)
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/common/network_executor.py b/arch/arm/ARMnn/python/pyarmnn/examples/common/network_executor.py
new file mode 100644
index 0000000000..6e2c53c43d
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/common/network_executor.py
@@ -0,0 +1,108 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import os
+from typing import List, Tuple
+
+import pyarmnn as ann
+import numpy as np
+
+
+def create_network(model_file: str, backends: list, input_names: Tuple[str] = (), output_names: Tuple[str] = ()):
+    """
+    Creates a network based on the model file and a list of backends.
+
+    Args:
+        model_file: User-specified model file.
+        backends: List of backends to optimize network.
+        input_names:
+        output_names:
+
+    Returns:
+        net_id: Unique ID of the network to run.
+        runtime: Runtime context for executing inference.
+        input_binding_info: Contains essential information about the model input.
+        output_binding_info: Used to map output tensor and its memory.
+    """
+    if not os.path.exists(model_file):
+        raise FileNotFoundError(f'Model file not found for: {model_file}')
+
+    _, ext = os.path.splitext(model_file)
+    if ext == '.tflite':
+        parser = ann.ITfLiteParser()
+    else:
+        raise ValueError("Supplied model file type is not supported. Supported types are [ tflite ]")
+
+    network = parser.CreateNetworkFromBinaryFile(model_file)
+
+    # Specify backends to optimize network
+    preferred_backends = []
+    for b in backends:
+        preferred_backends.append(ann.BackendId(b))
+
+    # Select appropriate device context and optimize the network for that device
+    options = ann.CreationOptions()
+    runtime = ann.IRuntime(options)
+    opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),
+                                         ann.OptimizerOptions())
+    print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n'
+          f'Optimization warnings: {messages}')
+
+    # Load the optimized network onto the Runtime device
+    net_id, _ = runtime.LoadNetwork(opt_network)
+
+    # Get input and output binding information
+    graph_id = parser.GetSubgraphCount() - 1
+    input_names = parser.GetSubgraphInputTensorNames(graph_id)
+    input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])
+    output_names = parser.GetSubgraphOutputTensorNames(graph_id)
+    output_binding_info = []
+    for output_name in output_names:
+        out_bind_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name)
+        output_binding_info.append(out_bind_info)
+    return net_id, runtime, input_binding_info, output_binding_info
+
+
+def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]:
+    """
+    Executes inference for the loaded network.
+
+    Args:
+        input_tensors: The input frame tensor.
+        output_tensors: The output tensor from output node.
+        runtime: Runtime context for executing inference.
+        net_id: Unique ID of the network to run.
+
+    Returns:
+        list: Inference results as a list of ndarrays.
+    """
+    runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)
+    output = ann.workload_tensors_to_ndarray(output_tensors)
+    return output
+
+
+class ArmnnNetworkExecutor:
+
+    def __init__(self, model_file: str, backends: list):
+        """
+        Creates an inference executor for a given network and a list of backends.
+
+        Args:
+            model_file: User-specified model file.
+            backends: List of backends to optimize network.
+        """
+        self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = create_network(model_file,
+                                                                                                          backends)
+        self.output_tensors = ann.make_output_tensors(self.output_binding_info)
+
+    def run(self, input_tensors: list) -> List[np.ndarray]:
+        """
+        Executes inference for the loaded network.
+
+        Args:
+            input_tensors: The input frame tensor.
+
+        Returns:
+            list: Inference results as a list of ndarrays.
+        """
+        return execute_network(input_tensors, self.output_tensors, self.runtime, self.network_id)
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/common/utils.py b/arch/arm/ARMnn/python/pyarmnn/examples/common/utils.py
new file mode 100644
index 0000000000..d4dadf80a4
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/common/utils.py
@@ -0,0 +1,108 @@
+# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""Contains helper functions that can be used across the example apps."""
+
+import os
+import errno
+from pathlib import Path
+
+import numpy as np
+import pyarmnn as ann
+
+
+def dict_labels(labels_file_path: str, include_rgb=False) -> dict:
+    """Creates a dictionary of labels from the input labels file.
+
+    Args:
+        labels_file: Path to file containing labels to map model outputs.
+        include_rgb: Adds randomly generated RGB values to the values of the
+            dictionary. Used for plotting bounding boxes of different colours.
+
+    Returns:
+        Dictionary with classification indices for keys and labels for values.
+
+    Raises:
+        FileNotFoundError:
+            Provided `labels_file_path` does not exist.
+    """
+    labels_file = Path(labels_file_path)
+    if not labels_file.is_file():
+        raise FileNotFoundError(
+            errno.ENOENT, os.strerror(errno.ENOENT), labels_file_path
+        )
+
+    labels = {}
+    with open(labels_file, "r") as f:
+        for idx, line in enumerate(f, 0):
+            if include_rgb:
+                labels[idx] = line.strip("\n"), tuple(np.random.random(size=3) * 255)
+            else:
+                labels[idx] = line.strip("\n")
+        return labels
+
+
+def prepare_input_tensors(audio_data, input_binding_info, mfcc_preprocessor):
+    """
+    Takes a block of audio data, extracts the MFCC features, quantizes the array, and uses ArmNN to create the
+    input tensors.
+
+    Args:
+        audio_data: The audio data to process
+        mfcc_instance: the mfcc class instance
+        input_binding_info: the model input binding info
+        mfcc_preprocessor: the mfcc preprocessor instance
+    Returns:
+        input_tensors: the prepared input tensors, ready to be consumed by the ArmNN NetworkExecutor
+    """
+
+    data_type = input_binding_info[1].GetDataType()
+    input_tensor = mfcc_preprocessor.extract_features(audio_data)
+    if data_type != ann.DataType_Float32:
+        input_tensor = quantize_input(input_tensor, input_binding_info)
+    input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor])
+    return input_tensors
+
+
+def quantize_input(data, input_binding_info):
+    """Quantize the float input to (u)int8 ready for inputting to model."""
+    if data.ndim != 2:
+        raise RuntimeError("Audio data must have 2 dimensions for quantization")
+
+    quant_scale = input_binding_info[1].GetQuantizationScale()
+    quant_offset = input_binding_info[1].GetQuantizationOffset()
+    data_type = input_binding_info[1].GetDataType()
+
+    if data_type == ann.DataType_QAsymmS8:
+        data_type = np.int8
+    elif data_type == ann.DataType_QAsymmU8:
+        data_type = np.uint8
+    else:
+        raise ValueError("Could not quantize data to required data type")
+
+    d_min = np.iinfo(data_type).min
+    d_max = np.iinfo(data_type).max
+
+    for row in range(data.shape[0]):
+        for col in range(data.shape[1]):
+            data[row, col] = (data[row, col] / quant_scale) + quant_offset
+            data[row, col] = np.clip(data[row, col], d_min, d_max)
+    data = data.astype(data_type)
+    return data
+
+
+def dequantize_output(data, output_binding_info):
+    """Dequantize the (u)int8 output to float"""
+
+    if output_binding_info[1].IsQuantized():
+        if data.ndim != 2:
+            raise RuntimeError("Data must have 2 dimensions for quantization")
+
+        quant_scale = output_binding_info[1].GetQuantizationScale()
+        quant_offset = output_binding_info[1].GetQuantizationOffset()
+
+        data = data.astype(float)
+        for row in range(data.shape[0]):
+            for col in range(data.shape[1]):
+                data[row, col] = (data[row, col] - quant_offset)*quant_scale
+    return data
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/README.md b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/README.md
new file mode 100644
index 0000000000..6333423423
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/README.md
@@ -0,0 +1,46 @@
+# PyArmNN Image Classification Sample Application
+
+## Overview
+
+To further explore PyArmNN API, we provide an example for running image classification on an image.
+
+All resources are downloaded during execution, so if you do not have access to the internet, you may need to download these manually. The file `example_utils.py` contains code shared between the examples.
+
+## Prerequisites
+
+##### PyArmNN
+
+Before proceeding to the next steps, make sure that you have successfully installed the newest version of PyArmNN on your system by following the instructions in the README of the PyArmNN root directory.
+
+You can verify that PyArmNN library is installed and check PyArmNN version using:
+```bash
+$ pip show pyarmnn
+```
+
+You can also verify it by running the following and getting output similar to below:
+```bash
+$ python -c "import pyarmnn as ann;print(ann.GetVersion())"
+'28.0.0'
+```
+
+##### Dependencies
+
+Install the dependencies:
+
+```bash
+$ pip install -r requirements.txt
+```
+
+## Perform Image Classification
+
+Perform inference with TFLite model by running the sample script:
+```bash
+$ python tflite_mobilenetv1_quantized.py
+```
+
+Perform inference with ONNX model by running the sample script:
+```bash
+$ python onnx_mobilenetv2.py
+```
+
+The output from inference will be printed as <i>Top N</i> results, listing the classes and probabilities associated with the classified image.
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/example_utils.py b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/example_utils.py
new file mode 100644
index 0000000000..657f9d3559
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/example_utils.py
@@ -0,0 +1,358 @@
+# Copyright © 2020 NXP and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+from urllib.parse import urlparse
+from PIL import Image
+from zipfile import ZipFile
+import os
+import pyarmnn as ann
+import numpy as np
+import requests
+import argparse
+import warnings
+
+DEFAULT_IMAGE_URL = 'https://s3.amazonaws.com/model-server/inputs/kitten.jpg'
+
+
+def run_inference(runtime, net_id, images, labels, input_binding_info, output_binding_info):
+    """Runs inference on a set of images.
+
+    Args:
+        runtime: Arm NN runtime
+        net_id: Network ID
+        images: Loaded images to run inference on
+        labels: Loaded labels per class
+        input_binding_info: Network input information
+        output_binding_info: Network output information
+
+    Returns:
+        None
+    """
+    output_tensors = ann.make_output_tensors([output_binding_info])
+    for idx, im in enumerate(images):
+        # Create input tensors
+        input_tensors = ann.make_input_tensors([input_binding_info], [im])
+
+        # Run inference
+        print("Running inference({0}) ...".format(idx))
+        runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)
+
+        # Process output
+        # output tensor has a shape (1, 1001)
+        out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0][0]
+        results = np.argsort(out_tensor)[::-1]
+        print_top_n(5, results, labels, out_tensor)
+
+
+def unzip_file(filename: str):
+    """Unzips a file.
+
+    Args:
+        filename(str): Name of the file
+
+    Returns:
+        None
+    """
+    with ZipFile(filename, 'r') as zip_obj:
+        zip_obj.extractall()
+
+
+def parse_command_line(desc: str = ""):
+    """Adds arguments to the script.
+
+    Args:
+        desc (str): Script description
+
+    Returns:
+        Namespace: Arguments to the script command
+    """
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument("-v", "--verbose", help="Increase output verbosity",
+                        action="store_true")
+    parser.add_argument("-d", "--data-dir", help="Data directory which contains all the images.",
+                        action="store", default="")
+    parser.add_argument("-m", "--model-dir",
+                        help="Model directory which contains the model file (TFLite, ONNX).", action="store",
+                        default="")
+    return parser.parse_args()
+
+
+def __create_network(model_file: str, backends: list, parser=None):
+    """Creates a network based on a file and parser type.
+
+    Args:
+        model_file (str): Path of the model file
+        backends (list): List of backends to use when running inference.
+        parser_type: Parser instance. (pyarmnn.ITFliteParser/pyarmnn.IOnnxParser...)
+
+    Returns:
+        int: Network ID
+        IParser: TF Lite parser instance
+        IRuntime: Runtime object instance
+    """
+    args = parse_command_line()
+    options = ann.CreationOptions()
+    runtime = ann.IRuntime(options)
+
+    if parser is None:
+        # try to determine what parser to create based on model extension
+        _, ext = os.path.splitext(model_file)
+        if ext == ".onnx":
+            parser = ann.IOnnxParser()
+        elif ext == ".tflite":
+            parser = ann.ITfLiteParser()
+    assert (parser is not None)
+
+    network = parser.CreateNetworkFromBinaryFile(model_file)
+
+    preferred_backends = []
+    for b in backends:
+        preferred_backends.append(ann.BackendId(b))
+
+    opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),
+                                         ann.OptimizerOptions())
+    if args.verbose:
+        for m in messages:
+            warnings.warn(m)
+
+    net_id, w = runtime.LoadNetwork(opt_network)
+    if args.verbose and w:
+        warnings.warn(w)
+
+    return net_id, parser, runtime
+
+
+def create_tflite_network(model_file: str, backends: list = ('CpuAcc', 'CpuRef')):
+    """Creates a network from a tflite model file.
+
+    Args:
+        model_file (str): Path of the model file.
+        backends (list): List of backends to use when running inference.
+
+    Returns:
+        int: Network ID.
+        int: Graph ID.
+        ITFliteParser: TF Lite parser instance.
+        IRuntime: Runtime object instance.
+    """
+    net_id, parser, runtime = __create_network(model_file, backends, ann.ITfLiteParser())
+    graph_id = parser.GetSubgraphCount() - 1
+
+    return net_id, graph_id, parser, runtime
+
+
+def create_onnx_network(model_file: str, backends: list = ('CpuAcc', 'CpuRef')):
+    """Creates a network from an onnx model file.
+
+    Args:
+        model_file (str): Path of the model file.
+        backends (list): List of backends to use when running inference.
+
+    Returns:
+        int: Network ID.
+        IOnnxParser: ONNX parser instance.
+        IRuntime: Runtime object instance.
+    """
+    return __create_network(model_file, backends, ann.IOnnxParser())
+
+
+def preprocess_default(img: Image, width: int, height: int, data_type, scale: float, mean: list,
+                       stddev: list):
+    """Default preprocessing image function.
+
+    Args:
+        img (PIL.Image): PIL.Image object instance.
+        width (int): Width to resize to.
+        height (int): Height to resize to.
+        data_type: Data Type to cast the image to.
+        scale (float): Scaling value.
+        mean (list): RGB mean offset.
+        stddev (list): RGB standard deviation.
+
+    Returns:
+        np.array: Resized and preprocessed image.
+    """
+    img = img.resize((width, height), Image.BILINEAR)
+    img = img.convert('RGB')
+    img = np.array(img)
+    img = np.reshape(img, (-1, 3))  # reshape to [RGB][RGB]...
+    img = ((img / scale) - mean) / stddev
+    img = img.flatten().astype(data_type)
+    return img
+
+
+def load_images(image_files: list, input_width: int, input_height: int, data_type=np.uint8,
+                scale: float = 1., mean: list = (0., 0., 0.), stddev: list = (1., 1., 1.),
+                preprocess_fn=preprocess_default):
+    """Loads images, resizes and performs any additional preprocessing to run inference.
+
+    Args:
+        img (list): List of PIL.Image object instances.
+        input_width (int): Width to resize to.
+        input_height (int): Height to resize to.
+        data_type: Data Type to cast the image to.
+        scale (float): Scaling value.
+        mean (list): RGB mean offset.
+        stddev (list): RGB standard deviation.
+        preprocess_fn: Preprocessing function.
+
+    Returns:
+        np.array: Resized and preprocessed images.
+    """
+    images = []
+    for i in image_files:
+        img = Image.open(i)
+        img = preprocess_fn(img, input_width, input_height, data_type, scale, mean, stddev)
+        images.append(img)
+    return images
+
+
+def load_labels(label_file: str):
+    """Loads a labels file containing a label per line.
+
+    Args:
+        label_file (str): Labels file path.
+
+    Returns:
+        list: List of labels read from a file.
+    """
+    with open(label_file, 'r') as f:
+        labels = [l.rstrip() for l in f]
+        return labels
+
+
+def print_top_n(N: int, results: list, labels: list, prob: list):
+    """Prints TOP-N results
+
+    Args:
+        N (int): Result count to print.
+        results (list): Top prediction indices.
+        labels (list): A list of labels for every class.
+        prob (list): A list of probabilities for every class.
+
+    Returns:
+        None
+    """
+    assert (len(results) >= 1 and len(results) == len(labels) == len(prob))
+    for i in range(min(len(results), N)):
+        print("class={0} ; value={1}".format(labels[results[i]], prob[results[i]]))
+
+
+def download_file(url: str, force: bool = False, filename: str = None):
+    """Downloads a file.
+
+    Args:
+        url (str): File url.
+        force (bool): Forces to download the file even if it exists.
+        filename (str): Renames the file when set.
+
+    Raises:
+        RuntimeError: If for some reason download fails.
+
+    Returns:
+        str: Path to the downloaded file.
+    """
+    try:
+        if filename is None:  # extract filename from url when None
+            filename = urlparse(url)
+            filename = os.path.basename(filename.path)
+
+        print("Downloading '{0}' from '{1}' ...".format(filename, url))
+        if not os.path.exists(filename) or force is True:
+            r = requests.get(url)
+            with open(filename, 'wb') as f:
+                f.write(r.content)
+            print("Finished.")
+        else:
+            print("File already exists.")
+    except:
+        raise RuntimeError("Unable to download file.")
+
+    return filename
+
+
+def get_model_and_labels(model_dir: str, model: str, labels: str, archive: str = None, download_url: str = None):
+    """Gets model and labels.
+
+    Args:
+        model_dir(str): Folder in which model and label files can be found
+        model (str): Name of the model file
+        labels (str): Name of the labels file
+        archive (str): Name of the archive file (optional - need to provide only labels and model)
+        download_url(str or list): Archive url or urls if multiple files (optional - to to provide only to download it)
+
+    Returns:
+        tuple (str, str): Output label and model filenames
+    """
+    labels = os.path.join(model_dir, labels)
+    model = os.path.join(model_dir, model)
+
+    if os.path.exists(labels) and os.path.exists(model):
+        print("Found model ({0}) and labels ({1}).".format(model, labels))
+    elif archive is not None and os.path.exists(os.path.join(model_dir, archive)):
+        print("Found archive ({0}). Unzipping ...".format(archive))
+        unzip_file(archive)
+    elif download_url is not None:
+        print("Model, labels or archive not found. Downloading ...".format(archive))
+        try:
+            if isinstance(download_url, str):
+                download_url = [download_url]
+            for dl in download_url:
+                archive = download_file(dl)
+                if dl.lower().endswith(".zip"):
+                    unzip_file(archive)
+        except RuntimeError:
+            print("Unable to download file ({}).".format(download_url))
+
+    if not os.path.exists(labels) or not os.path.exists(model):
+        raise RuntimeError("Unable to provide model and labels.")
+
+    return model, labels
+
+
+def list_images(folder: str = None, formats: list = ('.jpg', '.jpeg')):
+    """Lists files of a certain format in a folder.
+
+    Args:
+        folder (str): Path to the folder to search
+        formats (list): List of supported files
+
+    Returns:
+        list: A list of found files
+    """
+    files = []
+    if folder and not os.path.exists(folder):
+        print("Folder '{}' does not exist.".format(folder))
+        return files
+
+    for file in os.listdir(folder if folder else os.getcwd()):
+        for frmt in formats:
+            if file.lower().endswith(frmt):
+                files.append(os.path.join(folder, file) if folder else file)
+                break  # only the format loop
+
+    return files
+
+
+def get_images(image_dir: str, image_url: str = DEFAULT_IMAGE_URL):
+    """Gets image.
+
+    Args:
+        image_dir (str): Image filename
+        image_url (str): Image url
+
+    Returns:
+        str: Output image filename
+    """
+    images = list_images(image_dir)
+    if not images and image_url is not None:
+        print("No images found. Downloading ...")
+        try:
+            images = [download_file(image_url)]
+        except RuntimeError:
+            print("Unable to download file ({0}).".format(image_url))
+
+    if not images:
+        raise RuntimeError("Unable to provide images.")
+
+    return images
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/onnx_mobilenetv2.py b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/onnx_mobilenetv2.py
new file mode 100644
index 0000000000..be28b585ba
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/onnx_mobilenetv2.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+# Copyright © 2020 NXP and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import pyarmnn as ann
+import numpy as np
+import os
+from PIL import Image
+import example_utils as eu
+
+
+def preprocess_onnx(img: Image, width: int, height: int, data_type, scale: float, mean: list,
+                    stddev: list):
+    """Preprocessing function for ONNX imagenet models based on:
+    https://github.com/onnx/models/blob/master/vision/classification/imagenet_inference.ipynb
+
+    Args:
+        img (PIL.Image): Loaded PIL.Image
+        width (int): Target image width
+        height (int): Target image height
+        data_type: Image datatype (np.uint8 or np.float32)
+        scale (float): Scaling factor
+        mean: RGB mean values
+        stddev: RGB standard deviation
+
+    Returns:
+        np.array: Preprocess image as Numpy array
+    """
+    img = img.resize((256, 256), Image.BILINEAR)
+    # first rescale to 256,256 and then center crop
+    left = (256 - width) / 2
+    top = (256 - height) / 2
+    right = (256 + width) / 2
+    bottom = (256 + height) / 2
+    img = img.crop((left, top, right, bottom))
+    img = img.convert('RGB')
+    img = np.array(img)
+    img = np.reshape(img, (-1, 3))  # reshape to [RGB][RGB]...
+    img = ((img / scale) - mean) / stddev
+    # NHWC to NCHW conversion, by default NHWC is expected
+    # image is loaded as [RGB][RGB][RGB]... transposing it makes it [RRR...][GGG...][BBB...]
+    img = np.transpose(img)
+    img = img.flatten().astype(data_type)  # flatten into a 1D tensor and convert to float32
+    return img
+
+
+if __name__ == "__main__":
+    args = eu.parse_command_line()
+
+    model_filename = 'mobilenetv2-1.0.onnx'
+    labels_filename = 'synset.txt'
+    archive_filename = 'mobilenetv2-1.0.zip'
+    labels_url = 'https://s3.amazonaws.com/onnx-model-zoo/' + labels_filename
+    model_url = 'https://s3.amazonaws.com/onnx-model-zoo/mobilenet/mobilenetv2-1.0/' + model_filename
+
+    # Download resources
+    image_filenames = eu.get_images(args.data_dir)
+
+    model_filename, labels_filename = eu.get_model_and_labels(args.model_dir, model_filename, labels_filename,
+                                                              archive_filename,
+                                                              [model_url, labels_url])
+
+    # all 3 resources must exist to proceed further
+    assert os.path.exists(labels_filename)
+    assert os.path.exists(model_filename)
+    assert image_filenames
+    for im in image_filenames:
+        assert (os.path.exists(im))
+
+    # Create a network from a model file
+    net_id, parser, runtime = eu.create_onnx_network(model_filename)
+
+    # Load input information from the model and create input tensors
+    input_binding_info = parser.GetNetworkInputBindingInfo("data")
+
+    # Load output information from the model and create output tensors
+    output_binding_info = parser.GetNetworkOutputBindingInfo("mobilenetv20_output_flatten0_reshape0")
+    output_tensors = ann.make_output_tensors([output_binding_info])
+
+    # Load labels
+    labels = eu.load_labels(labels_filename)
+
+    # Load images and resize to expected size
+    images = eu.load_images(image_filenames,
+                            224, 224,
+                            np.float32,
+                            255.0,
+                            [0.485, 0.456, 0.406],
+                            [0.229, 0.224, 0.225],
+                            preprocess_onnx)
+
+    eu.run_inference(runtime, net_id, images, labels, input_binding_info, output_binding_info)
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/requirements.txt b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/requirements.txt
new file mode 100644
index 0000000000..289a2b521a
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/requirements.txt
@@ -0,0 +1,4 @@
+requests>=2.23.0
+urllib3>=1.25.8
+Pillow>=6.1.0
+numpy>=1.19.2
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/tflite_mobilenetv1_quantized.py b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/tflite_mobilenetv1_quantized.py
new file mode 100644
index 0000000000..6b35f63a00
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/image_classification/tflite_mobilenetv1_quantized.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# Copyright © 2020 NXP and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import example_utils as eu
+import os
+
+if __name__ == "__main__":
+    args = eu.parse_command_line()
+
+    # names of the files in the archive
+    labels_filename = 'labels_mobilenet_quant_v1_224.txt'
+    model_filename = 'mobilenet_v1_1.0_224_quant.tflite'
+    archive_filename = 'mobilenet_v1_1.0_224_quant_and_labels.zip'
+
+    archive_url = \
+        'https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_1.0_224_quant_and_labels.zip'
+
+    model_filename, labels_filename = eu.get_model_and_labels(args.model_dir, model_filename, labels_filename,
+                                                              archive_filename, archive_url)
+
+    image_filenames = eu.get_images(args.data_dir)
+
+    # all 3 resources must exist to proceed further
+    assert os.path.exists(labels_filename)
+    assert os.path.exists(model_filename)
+    assert image_filenames
+    for im in image_filenames:
+        assert(os.path.exists(im))
+
+    # Create a network from the model file
+    net_id, graph_id, parser, runtime = eu.create_tflite_network(model_filename)
+
+    # Load input information from the model
+    # tflite has all the need information in the model unlike other formats
+    input_names = parser.GetSubgraphInputTensorNames(graph_id)
+    assert len(input_names) == 1  # there should be 1 input tensor in mobilenet
+
+    input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])
+    input_width = input_binding_info[1].GetShape()[1]
+    input_height = input_binding_info[1].GetShape()[2]
+
+    # Load output information from the model and create output tensors
+    output_names = parser.GetSubgraphOutputTensorNames(graph_id)
+    assert len(output_names) == 1  # and only one output tensor
+    output_binding_info = parser.GetNetworkOutputBindingInfo(graph_id, output_names[0])
+
+    # Load labels file
+    labels = eu.load_labels(labels_filename)
+
+    # Load images and resize to expected size
+    images = eu.load_images(image_filenames, input_width, input_height)
+
+    eu.run_inference(runtime, net_id, images, labels, input_binding_info, output_binding_info)
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/README.MD b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/README.MD
new file mode 100644
index 0000000000..4299fa0fd4
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/README.MD
@@ -0,0 +1,189 @@
+# Keyword Spotting with PyArmNN
+
+This sample application guides the user to perform Keyword Spotting (KWS) with PyArmNN API.
+
+## Prerequisites
+
+### PyArmNN
+
+Before proceeding to the next steps, make sure that you have successfully installed the newest version of PyArmNN on your system by following the instructions in the README of the PyArmNN root directory.
+
+You can verify that PyArmNN library is installed and check PyArmNN version using:
+
+```bash
+$ pip show pyarmnn
+```
+
+You can also verify it by running the following and getting output similar to below:
+
+```bash
+$ python -c "import pyarmnn as ann;print(ann.GetVersion())"
+'26.0.0'
+```
+
+### Dependencies
+
+Install the PortAudio package:
+
+```bash
+$ sudo apt-get install libsndfile1 libportaudio2
+```
+
+Install the required Python modules: 
+
+```bash
+$ pip install -r requirements.txt
+```
+
+### Model
+
+The model we are using is the [DS CNN Large](https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/) which can be found in the [Arm Model Zoo repository](
+https://github.com/ARM-software/ML-zoo/tree/master/models).
+
+A small selection of suitable wav files containing keywords can be found [here](https://git.mlplatform.org/ml/ethos-u/ml-embedded-evaluation-kit.git/plain/resources/kws/samples/).
+
+Labels for this model are defined within run_audio_classification.py.
+
+## Performing Keyword Spotting
+
+### Processing Audio Files
+
+Please ensure that your audio file has a sampling rate of 16000Hz.
+
+To run KWS on an audio file, use the following command:
+
+```bash
+$ python run_audio_classification.py --audio_file_path <path/to/your_audio> --model_file_path <path/to/your_model> 
+```
+
+You may also add the optional flags:
+
+* `--preferred_backends`
+
+  * Takes the preferred backends in preference order, separated by whitespace. For example, passing in "CpuAcc CpuRef" will be read as list ["CpuAcc", "CpuRef"] (defaults to this list)
+
+    * CpuAcc represents the CPU backend
+
+    * GpuAcc represents the GPU backend
+
+    * CpuRef represents the CPU reference kernels
+
+* `--help` prints all available options to screen
+
+
+### Processing Audio Streams
+
+To run KWS on a live audio stream, use the following command:
+
+```bash
+$ python run_audio_classification.py --model_file_path <path/to/your_model> --duration (optional)
+```
+You will be prompted to select an input microphone and inference will commence
+after 3 seconds.
+
+
+You may also add the following optional flag in addition to those for run_audio_file.py:
+
+* `--duration`
+
+  * Integer number of seconds to perform inference. Default is to continue indefinitely.
+
+## Application Overview
+
+1. [Initialization](#initialization)
+
+2. [Creating a network](#creating-a-network)
+
+3. [Keyword Spotting Pipeline](#keyword-spotting-pipeline)
+
+### Initialization
+
+The application parses the supplied user arguments and loads the audio file or stream in chunks through the `capture_audio()` method which accepts sampling criteria as an `AudioCaptureParams` tuple.
+
+With KWS from an audio file, the application will create a generator object to yield blocks of audio data from the file with a minimum sample size defined in AudioCaptureParams. 
+
+MFCC features are extracted from each block based on criteria defined in the `MFCCParams` tuple. These extracted features constitute the input tensors for the model.
+
+To interpret the inference result of the loaded network; the application passes the label dictionary defined in run_audio_classification.py to a decoder and displays the result.
+
+### Creating a network
+
+A PyArmNN application must import a graph from file using an appropriate parser. Arm NN provides parsers for various model file types, including TFLite and ONNX. These parsers are libraries for loading neural networks of various formats into the Arm NN runtime.
+
+Arm NN supports optimized execution on multiple CPU, GPU, and Ethos-N devices. Before executing a graph, the application must select the appropriate device context by using `IRuntime()` to create a runtime context with default options. We can optimize the imported graph by specifying a list of backends in order of preference and implementing backend-specific optimizations, identified by a unique string, for example CpuAcc, GpuAcc, CpuRef represent the accelerated CPU and GPU backends and the CPU reference kernels respectively.
+
+Arm NN splits the entire graph into subgraphs based on these backends. Each subgraph is then optimized, and the corresponding subgraph in the original graph is substituted with its optimized version.
+
+The `Optimize()` function optimizes the graph for inference, then `LoadNetwork()` loads the optimized network onto the compute device. The `LoadNetwork()` function also creates the backend-specific workloads for the layers and a backend-specific workload factory.
+
+Parsers extract the input information for the network. The `GetSubgraphInputTensorNames()` function extracts all the input names and the `GetNetworkInputBindingInfo()` function obtains the input binding information of the graph. The input binding information contains all the essential information about the input. This information is a tuple consisting of integer identifiers for bindable layers and tensor information (data type, quantization info, dimension count, total elements).
+
+Similarly, we can get the output binding information for an output layer by using the parser to retrieve output tensor names and calling the `GetNetworkOutputBindingInfo()` function
+
+For this application, the main point of contact with PyArmNN is through the `ArmnnNetworkExecutor` class, which will handle the network creation step for you.
+
+```python
+# common/network_executor.py
+# The provided kws model is in .tflite format so we use TfLiteParser() to import the graph
+if ext == '.tflite':
+    parser = ann.ITfLiteParser()
+network = parser.CreateNetworkFromBinaryFile(model_file)
+...
+# Optimize the network for the list of preferred backends
+opt_network, messages = ann.Optimize(
+    network, preferred_backends, self.runtime.GetDeviceSpec(), ann.OptimizerOptions()
+    )
+# Load the optimized network onto the runtime device
+self.network_id, _ = self.runtime.LoadNetwork(opt_network)
+# Get the input and output binding information
+self.input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])
+self.output_binding_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name)
+```
+
+### Keyword Spotting pipeline
+
+
+Mel-frequency Cepstral Coefficients (MFCCs, [see Wikipedia](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum)) are extracted based on criteria defined in the MFCCParams tuple and associated`MFCC Class`.
+MFCCs are the result of computing the dot product of the Discrete Cosine Transform (DCT) Matrix and the log of the Mel energy.
+
+The `MFCC` class is used in conjunction with the `AudioPreProcessor` class to extract and process MFCC features from a given audio frame. 
+
+
+After all the MFCCs needed for an inference have been extracted from the audio data they constitute the input tensors that will be classified by an `ArmnnNetworkExecutor`object.
+
+```python
+# mfcc.py
+# Extract MFCC features from audio_data
+audio_data.resize(self._frame_len_padded)
+spec = self.spectrum_calc(audio_data)
+mel_energy = np.dot(self._np_mel_bank.astype(np.float32),
+                    np.transpose(spec).astype(np.float32))
+log_mel_energy = self.log_mel(mel_energy)
+mfcc_feats = np.dot(self._dct_matrix, log_mel_energy)
+
+
+```python
+# audio_utils.py
+# Quantize the input data and create input tensors with PyArmNN
+input_tensor = quantize_input(input_tensor, input_binding_info)
+input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor])
+```
+
+Note: `ArmnnNetworkExecutor` has already created the output tensors for you.
+
+After creating the workload tensors, the compute device performs inference for the loaded network by using the `EnqueueWorkload()` function of the runtime context. Calling the `workload_tensors_to_ndarray()` function obtains the inference results as a list of ndarrays.
+
+```python
+# common/network_executor.py
+status = runtime.EnqueueWorkload(net_id, input_tensors, self.output_tensors)
+self.output_result = ann.workload_tensors_to_ndarray(self.output_tensors)
+```
+
+The output from the inference must be decoded to obtain the recognised classification. A simple greedy decoder classifies the results by taking the highest element of the output as a key for the labels dictionary. The value returned is a keyword or unknown/silence which is appended to a list along with the calculated probability. The list elements are displayed on the console if they exceed the threshold value specified in main().
+
+
+## Next steps
+
+Having now gained a solid understanding of performing keyword spotting with PyArmNN, you are able to take control and create your own application. We suggest to first implement your own network, which can be done by updating the parameters of `AudioCaptureParams` and `MFCC_Params` to match your custom model. The `ArmnnNetworkExecutor` class will handle the network optimisation and loading for you.
+
+An important factor in improving accuracy of the generated output is providing cleaner data to the network. This can be done by including additional preprocessing steps such as noise reduction of your audio data.
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/__init__.py b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/audio_utils.py b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/audio_utils.py
new file mode 100644
index 0000000000..723c0e38f6
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/audio_utils.py
@@ -0,0 +1,31 @@
+# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""Utilities for speech recognition apps."""
+
+import numpy as np
+
+
+def decode(model_output: np.ndarray, labels: dict) -> list:
+    """Decodes the integer encoded results from inference into a string.
+
+    Args:
+        model_output: Results from running inference.
+        labels: Dictionary of labels keyed on the classification index.
+
+    Returns:
+        Decoded string.
+    """
+    results = [labels[np.argmax(model_output)], model_output[0][0][np.argmax(model_output)]]
+
+    return results
+
+
+def display_text(text: list):
+    """Presents the results on the console.
+
+    Args:
+        text: Results of performing ASR on the input audio data.
+    """
+    print('Classification: %s' % text[0])
+    print('Probability: %s' % text[1])
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/requirements.txt b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/requirements.txt
new file mode 100644
index 0000000000..96782eafd0
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/requirements.txt
@@ -0,0 +1,5 @@
+numpy>=1.19.2
+soundfile>=0.10.3
+pytest==6.2.4
+pytest-allclose==1.0.0
+sounddevice==0.4.2
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/run_audio_classification.py b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/run_audio_classification.py
new file mode 100644
index 0000000000..6dfa4cc806
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/keyword_spotting/run_audio_classification.py
@@ -0,0 +1,136 @@
+# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""Keyword Spotting with PyArmNN demo for processing live microphone data or pre-recorded files."""
+
+import sys
+import os
+from argparse import ArgumentParser
+
+import numpy as np
+import sounddevice as sd
+
+script_dir = os.path.dirname(__file__)
+sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
+
+from network_executor import ArmnnNetworkExecutor
+from utils import prepare_input_tensors, dequantize_output
+from mfcc import AudioPreprocessor, MFCC, MFCCParams
+from audio_utils import decode, display_text
+from audio_capture import AudioCaptureParams, CaptureAudioStream, capture_audio
+
+# Model Specific Labels
+labels = {0: 'silence',
+          1: 'unknown',
+          2: 'yes',
+          3: 'no',
+          4: 'up',
+          5: 'down',
+          6: 'left',
+          7: 'right',
+          8: 'on',
+          9: 'off',
+          10: 'stop',
+          11: 'go'}
+
+
+def parse_args():
+    parser = ArgumentParser(description="KWS with PyArmNN")
+    parser.add_argument(
+        "--audio_file_path",
+        required=False,
+        type=str,
+        help="Path to the audio file to perform KWS",
+    )
+    parser.add_argument(
+        "--duration",
+        type=int,
+        default=0,
+        help="""Duration for recording audio in seconds. Values <= 0 result in infinite
+           recording. Defaults to infinite.""",
+    )
+    parser.add_argument(
+        "--model_file_path",
+        required=True,
+        type=str,
+        help="Path to KWS model to use",
+    )
+    parser.add_argument(
+        "--preferred_backends",
+        type=str,
+        nargs="+",
+        default=["CpuAcc", "CpuRef"],
+        help="""List of backends in order of preference for optimizing
+        subgraphs, falling back to the next backend in the list on unsupported
+        layers. Defaults to [CpuAcc, CpuRef]""",
+    )
+    return parser.parse_args()
+
+
+def recognise_speech(audio_data, network, preprocessor, threshold):
+    # Prepare the input Tensors
+    input_tensors = prepare_input_tensors(audio_data, network.input_binding_info, preprocessor)
+    # Run inference
+    output_result = network.run(input_tensors)
+
+    dequantized_result = []
+    for index, ofm in enumerate(output_result):
+        dequantized_result.append(dequantize_output(ofm, network.output_binding_info[index]))
+
+    # Decode the text and display result if above threshold
+    decoded_result = decode(dequantized_result, labels)
+
+    if decoded_result[1] > threshold:
+        display_text(decoded_result)
+
+
+def main(args):
+    # Read command line args and invoke mic streaming if no file path supplied
+    audio_file = args.audio_file_path
+    if args.audio_file_path:
+        streaming_enabled = False
+    else:
+        streaming_enabled = True
+    # Create the ArmNN inference runner
+    network = ArmnnNetworkExecutor(args.model_file_path, args.preferred_backends)
+
+    # Specify model specific audio data requirements
+    # Overlap value specifies the number of samples to rewind between each data window
+    audio_capture_params = AudioCaptureParams(dtype=np.float32, overlap=2000, min_samples=16000, sampling_freq=16000,
+                                              mono=True)
+
+    # Create the preprocessor
+    mfcc_params = MFCCParams(sampling_freq=16000, num_fbank_bins=40, mel_lo_freq=20, mel_hi_freq=4000,
+                             num_mfcc_feats=10, frame_len=640, use_htk_method=True, n_fft=1024)
+    mfcc = MFCC(mfcc_params)
+    preprocessor = AudioPreprocessor(mfcc, model_input_size=49, stride=320)
+
+    # Set threshold for displaying classification and commence stream or file processing
+    threshold = .90
+    if streaming_enabled:
+        # Initialise audio stream
+        record_stream = CaptureAudioStream(audio_capture_params)
+        record_stream.set_stream_defaults()
+        record_stream.set_recording_duration(args.duration)
+        record_stream.countdown()
+
+        with sd.InputStream(callback=record_stream.callback):
+            print("Recording audio. Please speak.")
+            while record_stream.is_active:
+
+                audio_data = record_stream.capture_data()
+                recognise_speech(audio_data, network, preprocessor, threshold)
+                record_stream.is_first_window = False
+            print("\nFinished recording.")
+
+    # If file path has been supplied read-in and run inference
+    else:
+        print("Processing Audio Frames...")
+        buffer = capture_audio(audio_file, audio_capture_params)
+        for audio_data in buffer:
+            recognise_speech(audio_data, network, preprocessor, threshold)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/README.md b/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/README.md
new file mode 100644
index 0000000000..421cfae074
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/README.md
@@ -0,0 +1,196 @@
+# PyArmNN Object Detection Sample Application
+
+## Introduction
+This sample application guides the user and shows how to perform object detection using PyArmNN API. We assume the user has already built PyArmNN by following the instructions of the README in the main PyArmNN directory.
+
+We provide example scripts for performing object detection from video file and video stream with `run_video_file.py` and `run_video_stream.py`.
+
+The application takes a model and video file or camera feed as input, runs inference on each frame, and draws bounding boxes around detected objects, with the corresponding labels and confidence scores overlaid.
+
+A similar implementation of this object detection application is also provided in C++ in the examples for ArmNN.
+
+## Prerequisites
+
+##### PyArmNN
+
+Before proceeding to the next steps, make sure that you have successfully installed the newest version of PyArmNN on your system by following the instructions in the README of the PyArmNN root directory.
+
+You can verify that PyArmNN library is installed and check PyArmNN version using:
+```bash
+$ pip show pyarmnn
+```
+
+You can also verify it by running the following and getting output similar to below:
+```bash
+$ python -c "import pyarmnn as ann;print(ann.GetVersion())"
+'28.0.0'
+```
+
+##### Dependencies
+
+Install the following libraries on your system:
+```bash
+$ sudo apt-get install python3-opencv libqtgui4 libqt4-test
+```
+
+Create a virtual environment:
+```bash
+$ python3.7 -m venv devenv --system-site-packages
+$ source devenv/bin/activate
+```
+
+Install the dependencies:
+```bash
+$ pip install -r requirements.txt
+```
+
+---
+
+# Performing Object Detection
+
+## Object Detection from Video File
+The `run_video_file.py` example takes a video file as input, runs inference on each frame, and produces frames with bounding boxes drawn around detected objects. The processed frames are written to video file.
+
+The user can specify these arguments at command line:
+
+* `--video_file_path` - <b>Required:</b> Path to the video file to run object detection on
+
+* `--model_file_path` - <b>Required:</b> Path to <b>.tflite, .pb</b> or <b>.onnx</b> object detection model
+
+* `--model_name` - <b>Required:</b> The name of the model being used. Assembles the workflow for the input model. The examples support the model names:
+
+  * `ssd_mobilenet_v1`
+
+  * `yolo_v3_tiny`
+
+* `--label_path` - <b>Required:</b> Path to labels file for the specified model file
+
+* `--output_video_file_path` - Path to the output video file with detections added in
+
+* `--preferred_backends` - You can specify one or more backend in order of preference. Accepted backends include `CpuAcc, GpuAcc, CpuRef`. Arm NN will decide which layers of the network are supported by the backend, falling back to the next if a layer is unsupported. Defaults to `['CpuAcc', 'CpuRef']`
+
+
+Run the sample script:
+```bash
+$ python run_video_file.py --video_file_path <video_file_path> --model_file_path <model_file_path> --model_name <model_name>
+```
+
+## Object Detection from Video Stream
+The `run_video_stream.py` example captures frames from a video stream of a device, runs inference on each frame, and produces frames with bounding boxes drawn around detected objects. A window is displayed and refreshed with the latest processed frame.
+
+The user can specify these arguments at command line:
+
+* `--video_source` - Device index to access video stream. Defaults to primary device camera at index 0
+
+* `--model_file_path` - <b>Required:</b> Path to <b>.tflite, .pb</b> or <b>.onnx</b> object detection model
+
+* `--model_name` - <b>Required:</b> The name of the model being used. Assembles the workflow for the input model. The examples support the model names:
+
+  * `ssd_mobilenet_v1`
+
+  * `yolo_v3_tiny`
+
+* `--label_path` - <b>Required:</b> Path to labels file for the specified model file
+
+* `--preferred_backends` - You can specify one or more backend in order of preference. Accepted backends include `CpuAcc, GpuAcc, CpuRef`. Arm NN will decide which layers of the network are supported by the backend, falling back to the next if a layer is unsupported. Defaults to `['CpuAcc', 'CpuRef']`
+
+
+Run the sample script:
+```bash
+$ python run_video_stream.py --model_file_path <model_file_path> --model_name <model_name>
+```
+
+This application has been verified to work against the MobileNet SSD model, which can be downloaded along with it's label set from:
+
+* https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip
+
+## Implementing Your Own Network
+The examples provide support for `ssd_mobilenet_v1` and `yolo_v3_tiny` models. However, the user is able to add their own network to the object detection scripts by following the steps:
+
+1. Create a new file for your network, for example `network.py`, to contain functions to process the output of the model
+2. In that file, the user will need to write a function that decodes the output vectors obtained from running inference on their network and return the bounding box positions of detected objects plus their class index and confidence. Additionally, include a function that returns a resize factor that will scale the obtained bounding boxes to their correct positions in the original frame
+3. Import the functions into the main file and, such as with the provided networks, add a conditional statement to the `get_model_processing()` function with the new model name and functions
+4. The labels associated with the model can then be passed in with `--label_path` argument
+
+---
+
+# Application Overview
+
+This section provides a walkthrough of the application, explaining in detail the steps:
+
+1. Initialisation
+2. Creating a Network
+3. Preparing the Workload Tensors
+4. Executing Inference
+5. Postprocessing
+
+
+### Initialisation
+
+##### Reading from Video Source
+After parsing user arguments, the chosen video file or stream is loaded into an OpenCV `cv2.VideoCapture()` object. We use this object to capture frames from the source using the `read()` function.
+
+The `VideoCapture` object also tells us information about the source, such as the framerate and resolution of the input video. Using this information, we create a `cv2.VideoWriter()` object which will be used at the end of every loop to write the processed frame to an output video file of the same format as the input.
+
+##### Preparing Labels and Model Specific Functions
+In order to interpret the result of running inference on the loaded network, it is required to load the labels associated with the model. In the provided example code, the `dict_labels()` function creates a dictionary that is keyed on the classification index at the output node of the model, with values of the dictionary corresponding to a label and a randomly generated RGB color. This ensures that each class has a unique color which will prove helpful when plotting the bounding boxes of various detected objects in a frame.
+
+Depending on the model being used, the user-specified model name accesses and returns functions to decode and process the inference output, along with a resize factor used when plotting bounding boxes to ensure they are scaled to their correct position in the original frame.
+
+
+### Creating a Network
+
+##### Creating Parser and Importing Graph
+The first step with PyArmNN is to import a graph from file by using the appropriate parser.
+
+The Arm NN SDK provides parsers for reading graphs from a variety of model formats. In our application we specifically focus on `.tflite, .pb, .onnx` models.
+
+Based on the extension of the provided model file, the corresponding parser is created and the network file loaded with `CreateNetworkFromBinaryFile()` function. The parser will handle the creation of the underlying Arm NN graph.
+
+##### Optimizing Graph for Compute Device
+Arm NN supports optimized execution on multiple CPU and GPU devices. Prior to executing a graph, we must select the appropriate device context. We do this by creating a runtime context with default options with `IRuntime()`.
+
+We can optimize the imported graph by specifying a list of backends in order of preference and implement backend-specific optimizations. The backends are identified by a string unique to the backend, for example `CpuAcc, GpuAcc, CpuRef`.
+
+Internally and transparently, Arm NN splits the graph into subgraph based on backends, it calls a optimize subgraphs function on each of them and, if possible, substitutes the corresponding subgraph in the original graph with its optimized version.
+
+Using the `Optimize()` function we optimize the graph for inference and load the optimized network onto the compute device with `LoadNetwork()`. This function creates the backend-specific workloads for the layers and a backend specific workload factory which is called to create the workloads.
+
+##### Creating Input and Output Binding Information
+Parsers can also be used to extract the input information for the network. By calling `GetSubgraphInputTensorNames` we extract all the input names and, with `GetNetworkInputBindingInfo`, bind the input points of the graph.
+
+The input binding information contains all the essential information about the input. It is a tuple consisting of integer identifiers for bindable layers (inputs, outputs) and the tensor info (data type, quantization information, number of dimensions, total number of elements).
+
+Similarly, we can get the output binding information for an output layer by using the parser to retrieve output tensor names and calling `GetNetworkOutputBindingInfo()`.
+
+
+### Preparing the Workload Tensors
+
+##### Preprocessing the Captured Frame
+Each frame captured from source is read as an `ndarray` in BGR format and therefore has to be preprocessed before being passed into the network.
+
+This preprocessing step consists of swapping channels (BGR to RGB in this example), resizing the frame to the required resolution, expanding dimensions of the array and doing data type conversion to match the model input layer. This information about the input tensor can be readily obtained from reading the `input_binding_info`. For example, SSD MobileNet V1 takes for input a tensor with shape `[1, 300, 300, 3]` and data type `uint8`.
+
+##### Making Input and Output Tensors
+To produce the workload tensors, calling the functions `make_input_tensors()` and `make_output_tensors()` will return the input and output tensors respectively.
+
+
+### Executing Inference
+After making the workload tensors, a compute device performs inference for the loaded network using the `EnqueueWorkload()` function of the runtime context. By calling the `workload_tensors_to_ndarray()` function, we obtain the results from inference as a list of `ndarrays`.
+
+
+### Postprocessing
+
+##### Decoding and Processing Inference Output
+The output from inference must be decoded to obtain information about detected objects in the frame. In the examples there are implementations for two networks but you may also implement your own network decoding solution here. Please refer to <i>Implementing Your Own Network</i> section of this document to learn how to do this.
+
+For SSD MobileNet V1 models, we decode the results to obtain the bounding box positions, classification index, confidence and number of detections in the input frame.
+
+For YOLO V3 Tiny models, we decode the output and perform non-maximum suppression to filter out any weak detections below a confidence threshold and any redudant bounding boxes above an intersection-over-union threshold.
+
+It is encouraged to experiment with threshold values for confidence and intersection-over-union (IoU) to achieve the best visual results.
+
+The detection results are always returned as a list in the form `[class index, [box positions], confidence score]`, with the box positions list containing bounding box coordinates in the form `[x_min, y_min, x_max, y_max]`.
+
+##### Drawing Bounding Boxes
+With the obtained results and using `draw_bounding_boxes()`, we are able to draw bounding boxes around detected objects and add the associated label and confidence score. The labels dictionary created earlier uses the class index of the detected object as a key to return the associated label and color for that class. The resize factor defined at the beginning scales the bounding box coordinates to their correct positions in the original frame. The processed frames are written to file or displayed in a separate window.
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/requirements.txt b/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/requirements.txt
new file mode 100644
index 0000000000..9bfffb5e69
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/requirements.txt
@@ -0,0 +1,2 @@
+numpy>=1.19.2
+tqdm>=4.47.0
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/run_video_file.py b/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/run_video_file.py
new file mode 100644
index 0000000000..52f19d2c15
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/run_video_file.py
@@ -0,0 +1,87 @@
+# Copyright © 2020-2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""
+Object detection demo that takes a video file, runs inference on each frame producing
+bounding boxes and labels around detected objects, and saves the processed video.
+"""
+
+import os
+import sys
+script_dir = os.path.dirname(__file__)
+sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
+
+import cv2
+from tqdm import tqdm
+from argparse import ArgumentParser
+
+from ssd import ssd_processing, ssd_resize_factor
+from yolo import yolo_processing, yolo_resize_factor
+from utils import dict_labels
+from cv_utils import init_video_file_capture, preprocess, draw_bounding_boxes
+from network_executor import ArmnnNetworkExecutor
+
+
+def get_model_processing(model_name: str, video: cv2.VideoCapture, input_binding_info: tuple):
+    """
+    Gets model-specific information such as model labels and decoding and processing functions.
+    The user can include their own network and functions by adding another statement.
+
+    Args:
+        model_name: Name of type of supported model.
+        video: Video capture object, contains information about data source.
+        input_binding_info: Contains shape of model input layer, used for scaling bounding boxes.
+
+    Returns:
+        Model labels, decoding and processing functions.
+    """
+    if model_name == 'ssd_mobilenet_v1':
+        return ssd_processing, ssd_resize_factor(video)
+    elif model_name == 'yolo_v3_tiny':
+        return yolo_processing, yolo_resize_factor(video, input_binding_info)
+    else:
+        raise ValueError(f'{model_name} is not a valid model name')
+
+
+def main(args):
+    video, video_writer, frame_count = init_video_file_capture(args.video_file_path, args.output_video_file_path)
+
+    executor = ArmnnNetworkExecutor(args.model_file_path, args.preferred_backends)
+    process_output, resize_factor = get_model_processing(args.model_name, video, executor.input_binding_info)
+    labels = dict_labels(args.label_path, include_rgb=True)
+
+    for _ in tqdm(frame_count, desc='Processing frames'):
+        frame_present, frame = video.read()
+        if not frame_present:
+            continue
+        model_name = args.model_name
+        if model_name == "ssd_mobilenet_v1":
+            input_tensors = preprocess(frame, executor.input_binding_info, True)
+        else:
+            input_tensors = preprocess(frame, executor.input_binding_info, False)
+        output_result = executor.run(input_tensors)
+        detections = process_output(output_result)
+        draw_bounding_boxes(frame, detections, resize_factor, labels)
+        video_writer.write(frame)
+    print('Finished processing frames')
+    video.release(), video_writer.release()
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument('--video_file_path', required=True, type=str,
+                        help='Path to the video file to run object detection on')
+    parser.add_argument('--model_file_path', required=True, type=str,
+                        help='Path to the Object Detection model to use')
+    parser.add_argument('--model_name', required=True, type=str,
+                        help='The name of the model being used. Accepted options: ssd_mobilenet_v1, yolo_v3_tiny')
+    parser.add_argument('--label_path', required=True, type=str,
+                        help='Path to the labelset for the provided model file')
+    parser.add_argument('--output_video_file_path', type=str,
+                        help='Path to the output video file with detections added in')
+    parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
+                        help='Takes the preferred backends in preference order, separated by whitespace, '
+                             'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
+                             'Defaults to [CpuAcc, CpuRef]')
+    args = parser.parse_args()
+    main(args)
diff --git a/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/run_video_stream.py b/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/run_video_stream.py
new file mode 100644
index 0000000000..dba615b97e
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/examples/object_detection/run_video_stream.py
@@ -0,0 +1,90 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""
+Object detection demo that takes a video stream from a device, runs inference
+on each frame producing bounding boxes and labels around detected objects,
+and displays a window with the latest processed frame.
+"""
+
+import os
+import sys
+script_dir = os.path.dirname(__file__)
+sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
+
+import cv2
+from argparse import ArgumentParser
+
+from ssd import ssd_processing, ssd_resize_factor
+from yolo import yolo_processing, yolo_resize_factor
+from utils import dict_labels
+from cv_utils import init_video_stream_capture, preprocess, draw_bounding_boxes
+from network_executor import ArmnnNetworkExecutor
+
+
+def get_model_processing(model_name: str, video: cv2.VideoCapture, input_binding_info: tuple):
+    """
+    Gets model-specific information such as model labels and decoding and processing functions.
+    The user can include their own network and functions by adding another statement.
+
+    Args:
+        model_name: Name of type of supported model.
+        video: Video capture object, contains information about data source.
+        input_binding_info: Contains shape of model input layer, used for scaling bounding boxes.
+
+    Returns:
+        Model labels, decoding and processing functions.
+    """
+    if model_name == 'ssd_mobilenet_v1':
+        return ssd_processing, ssd_resize_factor(video)
+    elif model_name == 'yolo_v3_tiny':
+        return yolo_processing, yolo_resize_factor(video, input_binding_info)
+    else:
+        raise ValueError(f'{model_name} is not a valid model name')
+
+
+def main(args):
+    video = init_video_stream_capture(args.video_source)
+    executor = ArmnnNetworkExecutor(args.model_file_path, args.preferred_backends)
+
+    model_name = args.model_name
+    process_output, resize_factor = get_model_processing(args.model_name, video, executor.input_binding_info)
+    labels = dict_labels(args.label_path, include_rgb=True)
+
+    while True:
+        frame_present, frame = video.read()
+        frame = cv2.flip(frame, 1)  # Horizontally flip the frame
+        if not frame_present:
+            raise RuntimeError('Error reading frame from video stream')
+
+        if model_name == "ssd_mobilenet_v1":
+            input_tensors = preprocess(frame, executor.input_binding_info, True)
+        else:
+            input_tensors = preprocess(frame, executor.input_binding_info, False)
+        print("Running inference...")
+        output_result = executor.run(input_tensors)
+        detections = process_output(output_result)
+        draw_bounding_boxes(frame, detections, resize_factor, labels)
+        cv2.imshow('PyArmNN Object Detection Demo', frame)
+        if cv2.waitKey(1) == 27:
+            print('\nExit key activated. Closing video...')
+            break
+    video.release(), cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument('--video_source', type=int, default=0,
+                        help='Device index to access video stream. Defaults to primary device camera at index 0')
+    parser.add_argument('--model_file_path', required=True, type=str,
+                        help='Path to the Object Detection model to use')
+    parser.add_argument('--model_name', required=True, type=str,
+                        help='The name of the model being used. Accepted options: ssd_mobilenet_v1, yolo_v3_tiny')
+    parser.add_argument('--label_path', required=True, type=str,
+                        help='Path to the labelset for the provided model file')
+    parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
+                        help='Takes the preferred backends in preference order, separated by whitespace, '
+                             'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
+                             'Defaults to [CpuAcc, CpuRef]')
+    args = parser.parse_args()
+    main(args)
diff --git a/arch/arm/ARMnn/python/pyarmnn/pylintconfig b/arch/arm/ARMnn/python/pyarmnn/pylintconfig
new file mode 100644
index 0000000000..f3b28d4708
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/pylintconfig
@@ -0,0 +1,486 @@
+[MASTER]
+
+# Add files or directories to the ignore list. They should be base names, not
+# paths.
+ignore=CVS,generated,_generated
+
+# Add files or directories matching the regex patterns to the ignore list. The
+# regex matches against base names, not paths.
+ignore-patterns=_version.py
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint.
+jobs=1
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Specify a configuration file.
+#rcfile=
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+confidence=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+disable=
+  print-statement,
+  parameter-unpacking,
+  unpacking-in-except,
+  old-raise-syntax,
+  backtick,
+  long-suffix,
+  old-ne-operator,
+  old-octal-literal,
+  import-star-module-level,
+  raw-checker-failed,
+  bad-inline-option,
+  locally-disabled,
+  locally-enabled,
+  file-ignored,
+  suppressed-message,
+  useless-suppression,
+  deprecated-pragma,
+  apply-builtin,
+  basestring-builtin,
+  buffer-builtin,
+  cmp-builtin,
+  coerce-builtin,
+  execfile-builtin,
+  file-builtin,
+  long-builtin,
+  raw_input-builtin,
+  reduce-builtin,
+  standarderror-builtin,
+  unicode-builtin,
+  xrange-builtin,
+  coerce-method,
+  delslice-method,
+  getslice-method,
+  setslice-method,
+  no-absolute-import,
+  old-division,
+  dict-iter-method,
+  dict-view-method,
+  next-method-called,
+  metaclass-assignment,
+  indexing-exception,
+  raising-string,
+  reload-builtin,
+  oct-method,
+  hex-method,
+  nonzero-method,
+  cmp-method,
+  input-builtin,
+  round-builtin,
+  intern-builtin,
+  unichr-builtin,
+  map-builtin-not-iterating,
+  zip-builtin-not-iterating,
+  range-builtin-not-iterating,
+  filter-builtin-not-iterating,
+  using-cmp-argument,
+  eq-without-hash,
+  div-method,
+  idiv-method,
+  rdiv-method,
+  exception-message-attribute,
+  invalid-str-codec,
+  sys-max-int,
+  bad-python3-import,
+  deprecated-string-function,
+  deprecated-str-translate-call
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=
+
+
+[REPORTS]
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio).You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=colorized
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Activate the evaluation score.
+score=yes
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+
+[BASIC]
+
+# Naming hint for argument names
+argument-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Regular expression matching correct argument names
+argument-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Naming hint for attribute names
+attr-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Regular expression matching correct attribute names
+attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Naming hint for class attribute names
+class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Regular expression matching correct class attribute names
+class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Naming hint for class names
+class-name-hint=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression matching correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Naming hint for constant names
+const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression matching correct constant names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming hint for function names
+function-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Regular expression matching correct function names
+function-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+
+# Naming hint for inline iteration names
+inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
+
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Naming hint for method names
+method-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Regular expression matching correct method names
+method-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Naming hint for module names
+module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression matching correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+property-classes=abc.abstractproperty
+
+# Naming hint for variable names
+variable-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+# Regular expression matching correct variable names
+variable-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
+
+
+[SPELLING]
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[SIMILARITIES]
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging  or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=120
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=trailing-comma,dict-separator
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[LOGGING]
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,future.builtins
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[IMPORTS]
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=optparse,tkinter.tix
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in a if statement
+max-bool-expr=5
+
+# Maximum number of branch for function / method body
+max-branches=12
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=Exception
diff --git a/arch/arm/ARMnn/python/pyarmnn/setup.py b/arch/arm/ARMnn/python/pyarmnn/setup.py
new file mode 100644
index 0000000000..44e810d422
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/setup.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python3
+# Copyright © 2020 Arm Ltd. All rights reserved.
+# Copyright © 2020 NXP and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+"""Python bindings for Arm NN
+
+PyArmNN is a python extension for Arm NN SDK providing an interface similar to Arm NN C++ API.
+"""
+__version__ = None
+__arm_ml_version__ = None
+
+import logging
+import os
+import sys
+import subprocess
+from functools import lru_cache
+from pathlib import Path
+from itertools import chain
+
+from setuptools import setup
+from distutils.core import Extension
+from setuptools.command.build_py import build_py
+from setuptools.command.build_ext import build_ext
+
+logger = logging.Logger(__name__)
+
+DOCLINES = __doc__.split("\n")
+LIB_ENV_NAME = "ARMNN_LIB"
+INCLUDE_ENV_NAME = "ARMNN_INCLUDE"
+
+
+def check_armnn_version(*args):
+    pass
+
+__current_dir = os.path.dirname(os.path.realpath(__file__))
+
+exec(open(os.path.join(__current_dir, 'src', 'pyarmnn', '_version.py'), encoding="utf-8").read())
+
+
+class ExtensionPriorityBuilder(build_py):
+    """Runs extension builder before other stages. Otherwise generated files are not included to the distribution.
+    """
+
+    def run(self):
+        self.run_command('build_ext')
+        return super().run()
+
+
+class ArmnnVersionCheckerExtBuilder(build_ext):
+    """Builds an extension (i.e. wrapper). Additionally checks for version.
+    """
+
+    def __init__(self, dist):
+        super().__init__(dist)
+        self.failed_ext = []
+
+    def build_extension(self, ext):
+        if ext.optional:
+            try:
+                super().build_extension(ext)
+            except Exception as err:
+                self.failed_ext.append(ext)
+                logger.warning('Failed to build extension %s. \n %s', ext.name, str(err))
+        else:
+            super().build_extension(ext)
+            if ext.name == 'pyarmnn._generated._pyarmnn_version':
+                sys.path.append(os.path.abspath(os.path.join(self.build_lib, str(Path(ext._file_name).parent))))
+                from _pyarmnn_version import GetVersion
+                check_armnn_version(GetVersion(), __arm_ml_version__)
+
+    def copy_extensions_to_source(self):
+
+        for ext in self.failed_ext:
+            self.extensions.remove(ext)
+        super().copy_extensions_to_source()
+
+
+def linux_gcc_name():
+    """Returns the name of the `gcc` compiler. Might happen that we are cross-compiling and the
+    compiler has a longer name.
+
+    Args:
+        None
+
+    Returns:
+        str: Name of the `gcc` compiler or None
+    """
+    cc_env = os.getenv('CC')
+    if cc_env is not None:
+        if subprocess.Popen([cc_env, "--version"], stdout=subprocess.DEVNULL):
+            return cc_env
+    return "gcc" if subprocess.Popen(["gcc", "--version"], stdout=subprocess.DEVNULL) else None
+
+
+def linux_gcc_lib_search(gcc_compiler_name: str = linux_gcc_name()):
+    """Calls the `gcc` to get linker default system paths.
+
+    Args:
+        gcc_compiler_name(str): Name of the GCC compiler
+
+    Returns:
+        list: A list of paths.
+
+    Raises:
+        RuntimeError: If unable to find GCC.
+    """
+    if gcc_compiler_name is None:
+        raise RuntimeError("Unable to find gcc compiler")
+    cmd1 = subprocess.Popen([gcc_compiler_name, "--print-search-dirs"], stdout=subprocess.PIPE)
+    cmd2 = subprocess.Popen(["grep", "libraries"], stdin=cmd1.stdout,
+                         stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+    cmd1.stdout.close()
+    out, _ = cmd2.communicate()
+    out = out.decode("utf-8").split('=')
+    return tuple(out[1].split(':')) if len(out) > 0 else None
+
+
+def find_includes(armnn_include_env: str = INCLUDE_ENV_NAME):
+    """Searches for ArmNN includes.
+
+    Args:
+        armnn_include_env(str): Environmental variable to use as path.
+
+    Returns:
+        list: A list of paths to include.
+    """
+
+    # split multiple paths
+    global armnn_include_path
+    armnn_include_path_raw = os.getenv(armnn_include_env)
+    if not armnn_include_path_raw == None:
+        armnn_include_path = armnn_include_path_raw.split(":")
+
+    # validate input paths
+    armnn_include_path_result = []
+    for path in armnn_include_path:
+        if path is not None and os.path.exists(path):
+            armnn_include_path_result = armnn_include_path_result + [path]
+
+
+    # if none exist revert to default
+    if len(armnn_include_path_result) == 0:
+        armnn_include_path_result = ['/usr/local/include', '/usr/include']
+    return armnn_include_path_result
+
+
+
+@lru_cache(maxsize=1)
+def find_armnn(lib_name: str,
+               optional: bool = False,
+               armnn_libs_env: str = LIB_ENV_NAME,
+               default_lib_search: tuple = linux_gcc_lib_search()):
+    """Searches for ArmNN installation on the local machine.
+
+    Args:
+        lib_name(str): Lib name to find.
+        optional(bool): Do not fail if optional. Default is False - fail if library was not found.
+        armnn_libs_env(str): Custom environment variable pointing to ArmNN libraries location, default is 'ARMNN_LIBS'
+        default_lib_search(tuple): list of paths to search for ArmNN if not found within path provided by 'ARMNN_LIBS'
+                            env variable
+    Returns:
+        tuple: Contains name of the armnn libs, paths to the libs.
+
+    Raises:
+        RuntimeError: If armnn libs are not found.
+    """
+    armnn_lib_path = os.getenv(armnn_libs_env)
+    lib_search = [armnn_lib_path] if armnn_lib_path is not None else default_lib_search
+    armnn_libs = dict(map(lambda path: (':{}'.format(path.name), path),
+                          chain.from_iterable(map(lambda lib_path: Path(lib_path).glob(lib_name),
+                                                  lib_search))))
+    if not optional and len(armnn_libs) == 0:
+        raise RuntimeError("""ArmNN library {} was not found in {}. Please install ArmNN to one of the standard
+                           locations or set correct ARMNN_INCLUDE and ARMNN_LIB env variables.""".format(lib_name,
+                                                                                                         lib_search))
+    if optional and len(armnn_libs) == 0:
+        logger.warning("""Optional parser library %s was not found in %s and will not be installed.""", lib_name,
+                                                                                                        lib_search)
+
+    # gives back tuple of names of the libs, set of unique libs locations and includes.
+    return list(armnn_libs.keys()), list(set(
+        map(lambda path: str(path.absolute().parent), armnn_libs.values())))
+
+
+class LazyArmnnFinderExtension(Extension):
+    """Derived from `Extension` this class adds ArmNN libraries search on the user's machine.
+    SWIG options and compilation flags are updated with relevant ArmNN libraries files locations (-L) and headers (-I).
+
+    Search for ArmNN is executed only when attributes include_dirs, library_dirs, runtime_library_dirs, libraries or
+    swig_opts are queried.
+
+    """
+
+    def __init__(self, name, sources, armnn_libs, include_dirs=None, define_macros=None, undef_macros=None,
+                 library_dirs=None,
+                 libraries=None, runtime_library_dirs=None, extra_objects=None, extra_compile_args=None,
+                 extra_link_args=None, export_symbols=None, language=None, optional=None, **kw):
+        self._include_dirs = None
+        self._library_dirs = None
+        self._runtime_library_dirs = None
+        self._armnn_libs = armnn_libs
+        self._optional = False if optional is None else optional
+
+        super().__init__(name=name, sources=sources, include_dirs=include_dirs, define_macros=define_macros,
+                         undef_macros=undef_macros, library_dirs=library_dirs, libraries=libraries,
+                         runtime_library_dirs=runtime_library_dirs, extra_objects=extra_objects,
+                         extra_compile_args=extra_compile_args, extra_link_args=extra_link_args,
+                         export_symbols=export_symbols, language=language, optional=optional, **kw)
+
+    @property
+    def include_dirs(self):
+        return self._include_dirs + find_includes()
+
+    @include_dirs.setter
+    def include_dirs(self, include_dirs):
+        self._include_dirs = include_dirs
+
+    @property
+    def library_dirs(self):
+        library_dirs = self._library_dirs
+        for lib in self._armnn_libs:
+            _, lib_path = find_armnn(lib, self._optional)
+            library_dirs = library_dirs + lib_path
+
+        return library_dirs
+
+    @library_dirs.setter
+    def library_dirs(self, library_dirs):
+        self._library_dirs = library_dirs
+
+    @property
+    def runtime_library_dirs(self):
+        library_dirs = self._runtime_library_dirs
+        for lib in self._armnn_libs:
+            _, lib_path = find_armnn(lib, self._optional)
+            library_dirs = library_dirs + lib_path
+
+        return library_dirs
+
+    @runtime_library_dirs.setter
+    def runtime_library_dirs(self, runtime_library_dirs):
+        self._runtime_library_dirs = runtime_library_dirs
+
+    @property
+    def libraries(self):
+        libraries = self._libraries
+        for lib in self._armnn_libs:
+            lib_names, _ = find_armnn(lib, self._optional)
+            libraries = libraries + lib_names
+
+        return libraries
+
+    @libraries.setter
+    def libraries(self, libraries):
+        self._libraries = libraries
+
+    def __eq__(self, other):
+        return self.__class__ == other.__class__ and self.name == other.name
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return self.name.__hash__()
+
+
+if __name__ == '__main__':
+    # mandatory extensions
+    pyarmnn_module = LazyArmnnFinderExtension('pyarmnn._generated._pyarmnn',
+                                              sources=['src/pyarmnn/_generated/armnn_wrap.cpp'],
+                                              extra_compile_args=['-std=c++14'],
+                                              language='c++',
+                                              armnn_libs=['libarmnn.so'],
+                                              optional=False
+                                              )
+    pyarmnn_v_module = LazyArmnnFinderExtension('pyarmnn._generated._pyarmnn_version',
+                                                sources=['src/pyarmnn/_generated/armnn_version_wrap.cpp'],
+                                                extra_compile_args=['-std=c++14'],
+                                                language='c++',
+                                                armnn_libs=['libarmnn.so'],
+                                                optional=False
+                                                )
+    extensions_to_build = [pyarmnn_v_module, pyarmnn_module]
+
+
+    # optional extensions
+    def add_parsers_ext(name: str, ext_list: list):
+        pyarmnn_optional_module = LazyArmnnFinderExtension('pyarmnn._generated._pyarmnn_{}'.format(name.lower()),
+                                                           sources=['src/pyarmnn/_generated/armnn_{}_wrap.cpp'.format(
+                                                               name.lower())],
+                                                           extra_compile_args=['-std=c++14'],
+                                                           language='c++',
+                                                           armnn_libs=['libarmnn.so', 'libarmnn{}.so'.format(name)],
+                                                           optional=True
+                                                           )
+        ext_list.append(pyarmnn_optional_module)
+
+
+    add_parsers_ext('OnnxParser', extensions_to_build)
+    add_parsers_ext('TfLiteParser', extensions_to_build)
+    add_parsers_ext('Deserializer', extensions_to_build)
+
+    setup(
+        name='pyarmnn',
+        version=__version__,
+        author='Arm Ltd, NXP Semiconductors',
+        author_email='support@linaro.org',
+        description=DOCLINES[0],
+        long_description="\n".join(DOCLINES[2:]),
+        url='https://mlplatform.org/',
+        license='MIT',
+        keywords='armnn neural network machine learning',
+        classifiers=[
+            'Development Status :: 3 - Alpha',
+            'Intended Audience :: Developers',
+            'Intended Audience :: Education',
+            'Intended Audience :: Science/Research',
+            'License :: OSI Approved :: MIT License',
+            'Programming Language :: Python :: 3',
+            'Programming Language :: Python :: 3 :: Only',
+            'Programming Language :: Python :: 3.6',
+            'Programming Language :: Python :: 3.7',
+            'Programming Language :: Python :: 3.8',
+            'Topic :: Scientific/Engineering',
+            'Topic :: Scientific/Engineering :: Artificial Intelligence',
+            'Topic :: Software Development',
+            'Topic :: Software Development :: Libraries',
+            'Topic :: Software Development :: Libraries :: Python Modules',
+        ],
+        package_dir={'': 'src'},
+        packages=[
+            'pyarmnn',
+            'pyarmnn._generated',
+            'pyarmnn._quantization',
+            'pyarmnn._tensor',
+            'pyarmnn._utilities'
+        ],
+        data_files=[('', ['LICENSE'])],
+        python_requires='>=3.5',
+        install_requires=['numpy'],
+        cmdclass={
+            'build_py': ExtensionPriorityBuilder,
+            'build_ext': ArmnnVersionCheckerExtBuilder
+        },
+        ext_modules=extensions_to_build
+    )
diff --git a/arch/arm/ARMnn/python/pyarmnn/swig_generate.py b/arch/arm/ARMnn/python/pyarmnn/swig_generate.py
new file mode 100644
index 0000000000..e6e2b346be
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/swig_generate.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# Copyright © 2020 Arm Ltd. All rights reserved.
+# Copyright 2020 NXP
+# SPDX-License-Identifier: MIT
+"""This script executes SWIG commands to generate armnn and armnn version wrappers.
+This script cannot be moved to ./script dir because it uses find_armnn function from setup.py script.
+Both scripts must be in the same folder.
+"""
+import os
+import re
+import subprocess
+import argparse
+
+from setup import find_includes
+
+__current_dir = os.path.dirname(os.path.realpath(__file__))
+__swig_exec = None
+__verbose = False
+
+SWIG_EXEC_ENV = "SWIG_EXECUTABLE"
+
+
+def get_swig_exec(swig_exec_env: str = SWIG_EXEC_ENV):
+    """Returns the swig command. Uses either an env variable or the `swig` command
+    and verifies it works.
+
+    Args:
+        swig_exec_env(str): Env variable pointing to the swig executable.
+
+    Returns:
+        str: Path to swig executable.
+
+    Raises:
+        RuntimeError: If unable to execute any version of swig.
+    """
+    swig_exec = os.getenv(swig_exec_env)
+    if swig_exec is None:
+        swig_exec = "swig"
+    if subprocess.Popen([swig_exec, "-version"], stdout=subprocess.DEVNULL):
+        return swig_exec
+    else:
+        raise RuntimeError("Unable to execute swig.")
+
+
+def check_swig_version(expected_version: str):
+    """Checks version of swig.
+
+    Args:
+        expected_version(str): String containing expected version.
+
+    Returns:
+        bool: True if version is correct, False otherwise
+    """
+    cmd = subprocess.Popen([__swig_exec, "-version"], stdout=subprocess.PIPE)
+    out, _ = cmd.communicate()
+
+    pattern = re.compile(r"(?<=Version ).+(?=$)", re.MULTILINE)
+    match = pattern.search(out.decode('utf-8'))
+
+    if match:
+        version_string = match.group(0).strip()
+        if __verbose:
+            print(f"SWIG version: {version_string}")
+        return version_string.startswith(expected_version)
+    else:
+        return False
+
+
+def generate_wrap(name: str, extr_includes):
+    """Generates the python wrapper using swig.
+
+    Args:
+        name(str): Name of the wrapper template.
+        extr_includes(str): Include paths.
+
+    Raises:
+        RuntimeError: If wrapper fails to be generated.
+    """
+    in_dir = os.path.join(__current_dir, "src", "pyarmnn", "swig")
+    out_dir = os.path.join(__current_dir, "src", "pyarmnn", "_generated")
+    if __verbose:
+        print(f"Generating wrap for {name} ...")
+    code = os.system(f"{__swig_exec} -c++ -python -Wall "
+        + "-o {} ".format(os.path.join(out_dir, f"{name}_wrap.cpp"))
+        + f"-outdir {out_dir} "
+        + f"{extr_includes} "
+        + f"-I{in_dir} "
+        + os.path.join(in_dir, f"{name}.i"))
+    if code != 0:
+        raise RuntimeError(f"Failed to generate {name} ext.")
+
+
+if __name__ == "__main__":
+    __swig_exec = get_swig_exec()
+
+    # This check is redundant in case CMake is used, it's here for standalone use
+    if not check_swig_version('4.'):
+        raise RuntimeError("Wrong swig version was found. Expected SWIG version is 4.x.x")
+
+    armnn_includes = find_includes()
+
+    parser = argparse.ArgumentParser("Script to generate SWIG wrappers.")
+    parser.add_argument("-v", "--verbose", help="Verbose output.", action="store_true")
+    args = parser.parse_args()
+
+    __verbose = args.verbose
+
+    wrap_names = ['armnn_version',
+        'armnn',
+        'armnn_onnxparser',
+        'armnn_tfliteparser',
+        'armnn_deserializer']
+
+    for n in wrap_names:
+        generate_wrap(n, f"-I{' -I'.join(armnn_includes)} ")
diff --git a/arch/arm/ARMnn/python/pyarmnn/tox.ini b/arch/arm/ARMnn/python/pyarmnn/tox.ini
new file mode 100644
index 0000000000..be3118b07f
--- /dev/null
+++ b/arch/arm/ARMnn/python/pyarmnn/tox.ini
@@ -0,0 +1,63 @@
+[tox]
+skip_missing_interpreters=true
+envlist =
+    py35
+    py36
+    py37
+
+[testenv]
+deps = pytest==5.2.0
+       pytest-cov==2.8.1
+       attrs==19.3.0
+       setuptools==41.6.0
+       numpy==1.17.2
+       pillow==6.1.0
+       requests==2.23.0
+       more-itertools==8.10.0
+
+recreate = True
+allowlist_externals = /bin/sh
+commands =
+    python ./scripts/download_test_resources.py
+    python -m pytest test/ -v {posargs} --junit-xml=test_report_junit-{envname}.xml --cov=pyarmnn --cov-report xml:coverage-{envname}.xml
+
+[testenv:devenv]
+envdir = env
+basepython = python3.6
+usedevelop = True
+deps = {[testenv]deps}
+       tox
+skip_install = True
+commands = python -c "import sys; print('Dev environment created: ' + sys.executable)"
+
+[testenv:gen]
+basepython = python3.6
+skip_install = True
+usedevelop = True
+passenv =
+    ARMNN_LIB
+    ARMNN_INCLUDE
+commands =
+    python setup.py clean --all
+    python ./swig_generate.py
+    python setup.py build_ext --inplace
+
+[testenv:doc]
+basepython = python3.6
+deps = pdoc3==0.6.3
+passenv =
+    PYARMNN_DEV_VER
+commands =
+    python ./scripts/generate_docs.py --html --output-dir docs pyarmnn --force --template-dir=./docs_conf
+
+[testenv:pylint]
+basepython = python3.6
+deps = pylint==2.3.1
+       numpy==1.17.2
+recreate = False
+skip_install = True
+usedevelop = True
+setenv =
+    PYTHONPATH = src
+commands =
+    sh -c "pylint --rcfile=pylintconfig src --output-format=parseable --reports=no > pylint_results.txt || true"
diff --git a/arch/arm/Kbuild b/arch/arm/Kbuild
new file mode 100644
index 0000000000..b506622e7e
--- /dev/null
+++ b/arch/arm/Kbuild
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_FPE_NWFPE)		+= nwfpe/
+# Put arch/arm/fastfpe/ to use this.
+obj-$(CONFIG_FPE_FASTFPE)	+= $(patsubst $(srctree)/$(src)/%,%,$(wildcard $(srctree)/$(src)/fastfpe/))
+obj-$(CONFIG_VFP)		+= vfp/
+obj-$(CONFIG_XEN)		+= xen/
+obj-$(CONFIG_VDSO)		+= vdso/
+obj-y				+= kernel/ mm/ common/
+obj-y				+= probes/
+obj-y				+= net/
+obj-y				+= crypto/
+
+# for cleaning
+subdir- += boot
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
new file mode 100644
index 0000000000..4c97cb40ee
--- /dev/null
+++ b/arch/arm/Kconfig
@@ -0,0 +1,2006 @@
+# SPDX-License-Identifier: GPL-2.0
+config ARM
+	bool
+	default y
+	select ARCH_32BIT_OFF_T
+	select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND
+	select ARCH_HAS_BINFMT_FLAT
+	select ARCH_HAS_DEBUG_VIRTUAL if MMU
+	select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
+	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORTIFY_SOURCE
+	select ARCH_HAS_KEEPINITRD
+	select ARCH_HAS_KCOV
+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
+	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+	select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
+	select ARCH_HAS_PHYS_TO_DMA
+	select ARCH_HAS_SETUP_DMA_OPS
+	select ARCH_HAS_SET_MEMORY
+	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
+	select ARCH_HAS_STRICT_MODULE_RWX if MMU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB || !MMU
+	select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB || !MMU
+	select ARCH_HAS_TEARDOWN_DMA_OPS if MMU
+	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAVE_CUSTOM_GPIO_H
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG if CPU_V7 || CPU_V7M || CPU_V6K
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_KEEP_MEMBLOCK
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN
+	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
+	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_USE_MEMTEST
+	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
+	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WANT_LD_ORPHAN_WARN
+	select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
+	select BUILDTIME_TABLE_SORT if MMU
+	select CLONE_BACKWARDS
+	select CPU_PM if SUSPEND || CPU_IDLE
+	select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select DMA_DECLARE_COHERENT
+	select DMA_GLOBAL_POOL if !MMU
+	select DMA_OPS
+	select DMA_REMAP if MMU
+	select EDAC_SUPPORT
+	select EDAC_ATOMIC_SCRUB
+	select GENERIC_ALLOCATOR
+	select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
+	select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_IRQ_IPI if SMP
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_EARLY_IOREMAP
+	select GENERIC_IDLE_POLL_SETUP
+	select GENERIC_IRQ_PROBE
+	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
+	select GENERIC_LIB_DEVMEM_IS_ALLOWED
+	select GENERIC_PCI_IOMAP
+	select GENERIC_SCHED_CLOCK
+	select GENERIC_SMP_IDLE_THREAD
+	select HARDIRQS_SW_RESEND
+	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
+	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+	select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
+	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
+	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
+	select HAVE_ARCH_MMAP_RND_BITS if MMU
+	select HAVE_ARCH_PFN_VALID
+	select HAVE_ARCH_SECCOMP
+	select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
+	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE
+	select HAVE_ARM_SMCCC if CPU_V7
+	select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
+	select HAVE_CONTEXT_TRACKING
+	select HAVE_C_RECORDMCOUNT
+	select HAVE_BUILDTIME_MCOUNT_SORT
+	select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL
+	select HAVE_DMA_CONTIGUOUS if MMU
+	select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
+	select HAVE_EXIT_THREAD
+	select HAVE_FAST_GUP if ARM_LPAE
+	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
+	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
+	select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !(THUMB2_KERNEL && CC_IS_CLANG)
+	select HAVE_GCC_PLUGINS
+	select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
+	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZ4
+	select HAVE_KERNEL_LZMA
+	select HAVE_KERNEL_LZO
+	select HAVE_KERNEL_XZ
+	select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
+	select HAVE_KRETPROBES if HAVE_KPROBES
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_NMI
+	select HAVE_OPTPROBES if !THUMB2_KERNEL
+	select HAVE_PERF_EVENTS
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
+	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
+	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RSEQ
+	select HAVE_STACKPROTECTOR
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_UID16
+	select HAVE_VIRT_CPU_ACCOUNTING_GEN
+	select IRQ_FORCED_THREADING
+	select MODULES_USE_ELF_REL
+	select NEED_DMA_MAP_STATE
+	select OF_EARLY_FLATTREE if OF
+	select OLD_SIGACTION
+	select OLD_SIGSUSPEND3
+	select PCI_SYSCALL if PCI
+	select PERF_USE_VMALLOC
+	select RTC_LIB
+	select SYS_SUPPORTS_APM_EMULATION
+	select THREAD_INFO_IN_TASK if CURRENT_POINTER_IN_TPIDRURO
+	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
+	# Above selects are sorted alphabetically; please add new ones
+	# according to that.  Thanks.
+	help
+	  The ARM series is a line of low-power-consumption RISC chip designs
+	  licensed by ARM Ltd and targeted at embedded applications and
+	  handhelds such as the Compaq IPAQ.  ARM-based PCs are no longer
+	  manufactured, but legacy ARM-based PC hardware remains popular in
+	  Europe.  There is an ARM Linux project with a web page at
+	  <http://www.arm.linux.org.uk/>.
+
+config ARM_HAS_SG_CHAIN
+	bool
+
+config ARM_DMA_USE_IOMMU
+	bool
+	select ARM_HAS_SG_CHAIN
+	select NEED_SG_DMA_LENGTH
+
+if ARM_DMA_USE_IOMMU
+
+config ARM_DMA_IOMMU_ALIGNMENT
+	int "Maximum PAGE_SIZE order of alignment for DMA IOMMU buffers"
+	range 4 9
+	default 8
+	help
+	  DMA mapping framework by default aligns all buffers to the smallest
+	  PAGE_SIZE order which is greater than or equal to the requested buffer
+	  size. This works well for buffers up to a few hundreds kilobytes, but
+	  for larger buffers it just a waste of address space. Drivers which has
+	  relatively small addressing window (like 64Mib) might run out of
+	  virtual space with just a few allocations.
+
+	  With this parameter you can specify the maximum PAGE_SIZE order for
+	  DMA IOMMU buffers. Larger buffers will be aligned only to this
+	  specified order. The order is expressed as a power of two multiplied
+	  by the PAGE_SIZE.
+
+endif
+
+config SYS_SUPPORTS_APM_EMULATION
+	bool
+
+config HAVE_TCM
+	bool
+	select GENERIC_ALLOCATOR
+
+config HAVE_PROC_CPU
+	bool
+
+config NO_IOPORT_MAP
+	bool
+
+config SBUS
+	bool
+
+config STACKTRACE_SUPPORT
+	bool
+	default y
+
+config LOCKDEP_SUPPORT
+	bool
+	default y
+
+config ARCH_HAS_ILOG2_U32
+	bool
+
+config ARCH_HAS_ILOG2_U64
+	bool
+
+config ARCH_HAS_BANDGAP
+	bool
+
+config FIX_EARLYCON_MEM
+	def_bool y if MMU
+
+config GENERIC_HWEIGHT
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config ARCH_MAY_HAVE_PC_FDC
+	bool
+
+config ARCH_SUPPORTS_UPROBES
+	def_bool y
+
+config ARCH_HAS_DMA_SET_COHERENT_MASK
+	bool
+
+config GENERIC_ISA_DMA
+	bool
+
+config FIQ
+	bool
+
+config NEED_RET_TO_USER
+	bool
+
+config ARCH_MTD_XIP
+	bool
+
+config ARM_PATCH_PHYS_VIRT
+	bool "Patch physical to virtual translations at runtime" if EMBEDDED
+	default y
+	depends on !XIP_KERNEL && MMU
+	help
+	  Patch phys-to-virt and virt-to-phys translation functions at
+	  boot and module load time according to the position of the
+	  kernel in system memory.
+
+	  This can only be used with non-XIP MMU kernels where the base
+	  of physical memory is at a 2 MiB boundary.
+
+	  Only disable this option if you know that you do not require
+	  this feature (eg, building a kernel for a single machine) and
+	  you need to shrink the kernel to the minimal size.
+
+config NEED_MACH_IO_H
+	bool
+	help
+	  Select this when mach/io.h is required to provide special
+	  definitions for this platform.  The need for mach/io.h should
+	  be avoided when possible.
+
+config NEED_MACH_MEMORY_H
+	bool
+	help
+	  Select this when mach/memory.h is required to provide special
+	  definitions for this platform.  The need for mach/memory.h should
+	  be avoided when possible.
+
+config PHYS_OFFSET
+	hex "Physical address of main memory" if MMU
+	depends on !ARM_PATCH_PHYS_VIRT
+	default DRAM_BASE if !MMU
+	default 0x00000000 if ARCH_FOOTBRIDGE || ARCH_IXP4XX
+	default 0x10000000 if ARCH_OMAP1 || ARCH_RPC
+	default 0x30000000 if ARCH_S3C24XX
+	default 0xa0000000 if ARCH_IOP32X || ARCH_PXA
+	default 0xc0000000 if ARCH_EP93XX || ARCH_SA1100
+	default 0
+	help
+	  Please provide the physical address corresponding to the
+	  location of main memory in your system.
+
+config GENERIC_BUG
+	def_bool y
+	depends on BUG
+
+config PGTABLE_LEVELS
+	int
+	default 3 if ARM_LPAE
+	default 2
+
+menu "System Type"
+
+config MMU
+	bool "MMU-based Paged Memory Management Support"
+	default y
+	help
+	  Select if you want MMU-based virtualised addressing space
+	  support by paged memory management. If unsure, say 'Y'.
+
+config ARCH_MMAP_RND_BITS_MIN
+	default 8
+
+config ARCH_MMAP_RND_BITS_MAX
+	default 14 if PAGE_OFFSET=0x40000000
+	default 15 if PAGE_OFFSET=0x80000000
+	default 16
+
+#
+# The "ARM system type" choice list is ordered alphabetically by option
+# text.  Please add new entries in the option alphabetic order.
+#
+choice
+	prompt "ARM system type"
+	default ARM_SINGLE_ARMV7M if !MMU
+	default ARCH_MULTIPLATFORM if MMU
+
+config ARCH_MULTIPLATFORM
+	bool "Allow multiple platforms to be selected"
+	depends on MMU
+	select ARCH_FLATMEM_ENABLE
+	select ARCH_SPARSEMEM_ENABLE
+	select ARCH_SELECT_MEMORY_MODEL
+	select ARM_HAS_SG_CHAIN
+	select ARM_PATCH_PHYS_VIRT
+	select AUTO_ZRELADDR
+	select TIMER_OF
+	select COMMON_CLK
+	select GENERIC_IRQ_MULTI_HANDLER
+	select HAVE_PCI
+	select PCI_DOMAINS_GENERIC if PCI
+	select SPARSE_IRQ
+	select USE_OF
+
+config ARM_SINGLE_ARMV7M
+	bool "ARMv7-M based platforms (Cortex-M0/M3/M4)"
+	depends on !MMU
+	select ARM_NVIC
+	select AUTO_ZRELADDR
+	select TIMER_OF
+	select COMMON_CLK
+	select CPU_V7M
+	select NO_IOPORT_MAP
+	select SPARSE_IRQ
+	select USE_OF
+
+config ARCH_EP93XX
+	bool "EP93xx-based"
+	select ARCH_SPARSEMEM_ENABLE
+	select ARM_AMBA
+	imply ARM_PATCH_PHYS_VIRT
+	select ARM_VIC
+	select GENERIC_IRQ_MULTI_HANDLER
+	select AUTO_ZRELADDR
+	select CLKSRC_MMIO
+	select CPU_ARM920T
+	select GPIOLIB
+	select COMMON_CLK
+	help
+	  This enables support for the Cirrus EP93xx series of CPUs.
+
+config ARCH_FOOTBRIDGE
+	bool "FootBridge"
+	select CPU_SA110
+	select FOOTBRIDGE
+	select NEED_MACH_IO_H if !MMU
+	select NEED_MACH_MEMORY_H
+	help
+	  Support for systems based on the DC21285 companion chip
+	  ("FootBridge"), such as the Simtec CATS and the Rebel NetWinder.
+
+config ARCH_IOP32X
+	bool "IOP32x-based"
+	depends on MMU
+	select CPU_XSCALE
+	select GPIO_IOP
+	select GPIOLIB
+	select NEED_RET_TO_USER
+	select FORCE_PCI
+	select PLAT_IOP
+	help
+	  Support for Intel's 80219 and IOP32X (XScale) family of
+	  processors.
+
+config ARCH_IXP4XX
+	bool "IXP4xx-based"
+	depends on MMU
+	select ARCH_HAS_DMA_SET_COHERENT_MASK
+	select ARCH_SUPPORTS_BIG_ENDIAN
+	select CPU_XSCALE
+	select DMABOUNCE if PCI
+	select GENERIC_IRQ_MULTI_HANDLER
+	select GPIO_IXP4XX
+	select GPIOLIB
+	select HAVE_PCI
+	select IXP4XX_IRQ
+	select IXP4XX_TIMER
+	# With the new PCI driver this is not needed
+	select NEED_MACH_IO_H if IXP4XX_PCI_LEGACY
+	select USB_EHCI_BIG_ENDIAN_DESC
+	select USB_EHCI_BIG_ENDIAN_MMIO
+	help
+	  Support for Intel's IXP4XX (XScale) family of processors.
+
+config ARCH_DOVE
+	bool "Marvell Dove"
+	select CPU_PJ4
+	select GENERIC_IRQ_MULTI_HANDLER
+	select GPIOLIB
+	select HAVE_PCI
+	select MVEBU_MBUS
+	select PINCTRL
+	select PINCTRL_DOVE
+	select PLAT_ORION_LEGACY
+	select SPARSE_IRQ
+	select PM_GENERIC_DOMAINS if PM
+	help
+	  Support for the Marvell Dove SoC 88AP510
+
+config ARCH_PXA
+	bool "PXA2xx/PXA3xx-based"
+	depends on MMU
+	select ARCH_MTD_XIP
+	select ARM_CPU_SUSPEND if PM
+	select AUTO_ZRELADDR
+	select COMMON_CLK
+	select CLKSRC_PXA
+	select CLKSRC_MMIO
+	select TIMER_OF
+	select CPU_XSCALE if !CPU_XSC3
+	select GENERIC_IRQ_MULTI_HANDLER
+	select GPIO_PXA
+	select GPIOLIB
+	select IRQ_DOMAIN
+	select PLAT_PXA
+	select SPARSE_IRQ
+	help
+	  Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
+
+config ARCH_RPC
+	bool "RiscPC"
+	depends on MMU
+	depends on !CC_IS_CLANG && GCC_VERSION < 90100 && GCC_VERSION >= 60000
+	select ARCH_ACORN
+	select ARCH_MAY_HAVE_PC_FDC
+	select ARCH_SPARSEMEM_ENABLE
+	select ARM_HAS_SG_CHAIN
+	select CPU_SA110
+	select FIQ
+	select HAVE_PATA_PLATFORM
+	select ISA_DMA_API
+	select LEGACY_TIMER_TICK
+	select NEED_MACH_IO_H
+	select NEED_MACH_MEMORY_H
+	select NO_IOPORT_MAP
+	help
+	  On the Acorn Risc-PC, Linux can support the internal IDE disk and
+	  CD-ROM interface, serial and parallel port, and the floppy drive.
+
+config ARCH_SA1100
+	bool "SA1100-based"
+	select ARCH_MTD_XIP
+	select ARCH_SPARSEMEM_ENABLE
+	select CLKSRC_MMIO
+	select CLKSRC_PXA
+	select TIMER_OF if OF
+	select COMMON_CLK
+	select CPU_FREQ
+	select CPU_SA1100
+	select GENERIC_IRQ_MULTI_HANDLER
+	select GPIOLIB
+	select IRQ_DOMAIN
+	select ISA
+	select NEED_MACH_MEMORY_H
+	select SPARSE_IRQ
+	help
+	  Support for StrongARM 11x0 based boards.
+
+config ARCH_S3C24XX
+	bool "Samsung S3C24XX SoCs"
+	select ATAGS
+	select CLKSRC_SAMSUNG_PWM
+	select GPIO_SAMSUNG
+	select GPIOLIB
+	select GENERIC_IRQ_MULTI_HANDLER
+	select NEED_MACH_IO_H
+	select S3C2410_WATCHDOG
+	select SAMSUNG_ATAGS
+	select USE_OF
+	select WATCHDOG
+	help
+	  Samsung S3C2410, S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443
+	  and S3C2450 SoCs based systems, such as the Simtec Electronics BAST
+	  (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or the
+	  Samsung SMDK2410 development board (and derivatives).
+
+config ARCH_OMAP1
+	bool "TI OMAP1"
+	depends on MMU
+	select ARCH_OMAP
+	select CLKSRC_MMIO
+	select GENERIC_IRQ_CHIP
+	select GENERIC_IRQ_MULTI_HANDLER
+	select GPIOLIB
+	select HAVE_LEGACY_CLK
+	select IRQ_DOMAIN
+	select NEED_MACH_IO_H if PCCARD
+	select NEED_MACH_MEMORY_H
+	select SPARSE_IRQ
+	help
+	  Support for older TI OMAP1 (omap7xx, omap15xx or omap16xx)
+
+endchoice
+
+menu "Multiple platform selection"
+	depends on ARCH_MULTIPLATFORM
+
+comment "CPU Core family selection"
+
+config ARCH_MULTI_V4
+	bool "ARMv4 based platforms (FA526)"
+	depends on !ARCH_MULTI_V6_V7
+	select ARCH_MULTI_V4_V5
+	select CPU_FA526
+
+config ARCH_MULTI_V4T
+	bool "ARMv4T based platforms (ARM720T, ARM920T, ...)"
+	depends on !ARCH_MULTI_V6_V7
+	select ARCH_MULTI_V4_V5
+	select CPU_ARM920T if !(CPU_ARM7TDMI || CPU_ARM720T || \
+		CPU_ARM740T || CPU_ARM9TDMI || CPU_ARM922T || \
+		CPU_ARM925T || CPU_ARM940T)
+
+config ARCH_MULTI_V5
+	bool "ARMv5 based platforms (ARM926T, XSCALE, PJ1, ...)"
+	depends on !ARCH_MULTI_V6_V7
+	select ARCH_MULTI_V4_V5
+	select CPU_ARM926T if !(CPU_ARM946E || CPU_ARM1020 || \
+		CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \
+		CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_FEROCEON)
+
+config ARCH_MULTI_V4_V5
+	bool
+
+config ARCH_MULTI_V6
+	bool "ARMv6 based platforms (ARM11)"
+	select ARCH_MULTI_V6_V7
+	select CPU_V6K
+
+config ARCH_MULTI_V7
+	bool "ARMv7 based platforms (Cortex-A, PJ4, Scorpion, Krait)"
+	default y
+	select ARCH_MULTI_V6_V7
+	select CPU_V7
+	select HAVE_SMP
+
+config ARCH_MULTI_V6_V7
+	bool
+	select MIGHT_HAVE_CACHE_L2X0
+
+config ARCH_MULTI_CPU_AUTO
+	def_bool !(ARCH_MULTI_V4 || ARCH_MULTI_V4T || ARCH_MULTI_V6_V7)
+	select ARCH_MULTI_V5
+
+endmenu
+
+config ARCH_VIRT
+	bool "Dummy Virtual Machine"
+	depends on ARCH_MULTI_V7
+	select ARM_AMBA
+	select ARM_GIC
+	select ARM_GIC_V2M if PCI
+	select ARM_GIC_V3
+	select ARM_GIC_V3_ITS if PCI
+	select ARM_PSCI
+	select HAVE_ARM_ARCH_TIMER
+	select ARCH_SUPPORTS_BIG_ENDIAN
+
+#
+# This is sorted alphabetically by mach-* pathname.  However, plat-*
+# Kconfigs may be included either alphabetically (according to the
+# plat- suffix) or along side the corresponding mach-* source.
+#
+source "arch/arm/mach-actions/Kconfig"
+
+source "arch/arm/mach-alpine/Kconfig"
+
+source "arch/arm/mach-artpec/Kconfig"
+
+source "arch/arm/mach-asm9260/Kconfig"
+
+source "arch/arm/mach-aspeed/Kconfig"
+
+source "arch/arm/mach-at91/Kconfig"
+
+source "arch/arm/mach-axxia/Kconfig"
+
+source "arch/arm/mach-bcm/Kconfig"
+
+source "arch/arm/mach-berlin/Kconfig"
+
+source "arch/arm/mach-clps711x/Kconfig"
+
+source "arch/arm/mach-cns3xxx/Kconfig"
+
+source "arch/arm/mach-davinci/Kconfig"
+
+source "arch/arm/mach-digicolor/Kconfig"
+
+source "arch/arm/mach-dove/Kconfig"
+
+source "arch/arm/mach-ep93xx/Kconfig"
+
+source "arch/arm/mach-exynos/Kconfig"
+
+source "arch/arm/mach-footbridge/Kconfig"
+
+source "arch/arm/mach-gemini/Kconfig"
+
+source "arch/arm/mach-highbank/Kconfig"
+
+source "arch/arm/mach-hisi/Kconfig"
+
+source "arch/arm/mach-imx/Kconfig"
+
+source "arch/arm/mach-integrator/Kconfig"
+
+source "arch/arm/mach-iop32x/Kconfig"
+
+source "arch/arm/mach-ixp4xx/Kconfig"
+
+source "arch/arm/mach-keystone/Kconfig"
+
+source "arch/arm/mach-lpc32xx/Kconfig"
+
+source "arch/arm/mach-mediatek/Kconfig"
+
+source "arch/arm/mach-meson/Kconfig"
+
+source "arch/arm/mach-milbeaut/Kconfig"
+
+source "arch/arm/mach-mmp/Kconfig"
+
+source "arch/arm/mach-moxart/Kconfig"
+
+source "arch/arm/mach-mstar/Kconfig"
+
+source "arch/arm/mach-mv78xx0/Kconfig"
+
+source "arch/arm/mach-mvebu/Kconfig"
+
+source "arch/arm/mach-mxs/Kconfig"
+
+source "arch/arm/mach-nomadik/Kconfig"
+
+source "arch/arm/mach-npcm/Kconfig"
+
+source "arch/arm/mach-nspire/Kconfig"
+
+source "arch/arm/plat-omap/Kconfig"
+
+source "arch/arm/mach-omap1/Kconfig"
+
+source "arch/arm/mach-omap2/Kconfig"
+
+source "arch/arm/mach-orion5x/Kconfig"
+
+source "arch/arm/mach-oxnas/Kconfig"
+
+source "arch/arm/mach-pxa/Kconfig"
+source "arch/arm/plat-pxa/Kconfig"
+
+source "arch/arm/mach-qcom/Kconfig"
+
+source "arch/arm/mach-rda/Kconfig"
+
+source "arch/arm/mach-realtek/Kconfig"
+
+source "arch/arm/mach-realview/Kconfig"
+
+source "arch/arm/mach-rockchip/Kconfig"
+
+source "arch/arm/mach-s3c/Kconfig"
+
+source "arch/arm/mach-s5pv210/Kconfig"
+
+source "arch/arm/mach-sa1100/Kconfig"
+
+source "arch/arm/mach-shmobile/Kconfig"
+
+source "arch/arm/mach-socfpga/Kconfig"
+
+source "arch/arm/mach-spear/Kconfig"
+
+source "arch/arm/mach-sti/Kconfig"
+
+source "arch/arm/mach-stm32/Kconfig"
+
+source "arch/arm/mach-sunxi/Kconfig"
+
+source "arch/arm/mach-tegra/Kconfig"
+
+source "arch/arm/mach-uniphier/Kconfig"
+
+source "arch/arm/mach-ux500/Kconfig"
+
+source "arch/arm/mach-versatile/Kconfig"
+
+source "arch/arm/mach-vexpress/Kconfig"
+
+source "arch/arm/mach-vt8500/Kconfig"
+
+source "arch/arm/mach-zynq/Kconfig"
+
+# ARMv7-M architecture
+config ARCH_LPC18XX
+	bool "NXP LPC18xx/LPC43xx"
+	depends on ARM_SINGLE_ARMV7M
+	select ARCH_HAS_RESET_CONTROLLER
+	select ARM_AMBA
+	select CLKSRC_LPC32XX
+	select PINCTRL
+	help
+	  Support for NXP's LPC18xx Cortex-M3 and LPC43xx Cortex-M4
+	  high performance microcontrollers.
+
+config ARCH_MPS2
+	bool "ARM MPS2 platform"
+	depends on ARM_SINGLE_ARMV7M
+	select ARM_AMBA
+	select CLKSRC_MPS2
+	help
+	  Support for Cortex-M Prototyping System (or V2M-MPS2) which comes
+	  with a range of available cores like Cortex-M3/M4/M7.
+
+	  Please, note that depends which Application Note is used memory map
+	  for the platform may vary, so adjustment of RAM base might be needed.
+
+# Definitions to make life easier
+config ARCH_ACORN
+	bool
+
+config PLAT_IOP
+	bool
+
+config PLAT_ORION
+	bool
+	select CLKSRC_MMIO
+	select COMMON_CLK
+	select GENERIC_IRQ_CHIP
+	select IRQ_DOMAIN
+
+config PLAT_ORION_LEGACY
+	bool
+	select PLAT_ORION
+
+config PLAT_PXA
+	bool
+
+config PLAT_VERSATILE
+	bool
+
+source "arch/arm/mm/Kconfig"
+
+config IWMMXT
+	bool "Enable iWMMXt support"
+	depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 || CPU_PJ4B
+	default y if PXA27x || PXA3xx || ARCH_MMP || CPU_PJ4 || CPU_PJ4B
+	help
+	  Enable support for iWMMXt context switching at run time if
+	  running on a CPU that supports it.
+
+if !MMU
+source "arch/arm/Kconfig-nommu"
+endif
+
+config PJ4B_ERRATA_4742
+	bool "PJ4B Errata 4742: IDLE Wake Up Commands can Cause the CPU Core to Cease Operation"
+	depends on CPU_PJ4B && MACH_ARMADA_370
+	default y
+	help
+	  When coming out of either a Wait for Interrupt (WFI) or a Wait for
+	  Event (WFE) IDLE states, a specific timing sensitivity exists between
+	  the retiring WFI/WFE instructions and the newly issued subsequent
+	  instructions.  This sensitivity can result in a CPU hang scenario.
+	  Workaround:
+	  The software must insert either a Data Synchronization Barrier (DSB)
+	  or Data Memory Barrier (DMB) command immediately after the WFI/WFE
+	  instruction
+
+config ARM_ERRATA_326103
+	bool "ARM errata: FSR write bit incorrect on a SWP to read-only memory"
+	depends on CPU_V6
+	help
+	  Executing a SWP instruction to read-only memory does not set bit 11
+	  of the FSR on the ARM 1136 prior to r1p0. This causes the kernel to
+	  treat the access as a read, preventing a COW from occurring and
+	  causing the faulting task to livelock.
+
+config ARM_ERRATA_411920
+	bool "ARM errata: Invalidation of the Instruction Cache operation can fail"
+	depends on CPU_V6 || CPU_V6K
+	help
+	  Invalidation of the Instruction Cache operation can
+	  fail. This erratum is present in 1136 (before r1p4), 1156 and 1176.
+	  It does not affect the MPCore. This option enables the ARM Ltd.
+	  recommended workaround.
+
+config ARM_ERRATA_430973
+	bool "ARM errata: Stale prediction on replaced interworking branch"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 430973 Cortex-A8
+	  r1p* erratum. If a code sequence containing an ARM/Thumb
+	  interworking branch is replaced with another code sequence at the
+	  same virtual address, whether due to self-modifying code or virtual
+	  to physical address re-mapping, Cortex-A8 does not recover from the
+	  stale interworking branch prediction. This results in Cortex-A8
+	  executing the new code sequence in the incorrect ARM or Thumb state.
+	  The workaround enables the BTB/BTAC operations by setting ACTLR.IBE
+	  and also flushes the branch target cache at every context switch.
+	  Note that setting specific bits in the ACTLR register may not be
+	  available in non-secure mode.
+
+config ARM_ERRATA_458693
+	bool "ARM errata: Processor deadlock when a false hazard is created"
+	depends on CPU_V7
+	depends on !ARCH_MULTIPLATFORM
+	help
+	  This option enables the workaround for the 458693 Cortex-A8 (r2p0)
+	  erratum. For very specific sequences of memory operations, it is
+	  possible for a hazard condition intended for a cache line to instead
+	  be incorrectly associated with a different cache line. This false
+	  hazard might then cause a processor deadlock. The workaround enables
+	  the L1 caching of the NEON accesses and disables the PLD instruction
+	  in the ACTLR register. Note that setting specific bits in the ACTLR
+	  register may not be available in non-secure mode.
+
+config ARM_ERRATA_460075
+	bool "ARM errata: Data written to the L2 cache can be overwritten with stale data"
+	depends on CPU_V7
+	depends on !ARCH_MULTIPLATFORM
+	help
+	  This option enables the workaround for the 460075 Cortex-A8 (r2p0)
+	  erratum. Any asynchronous access to the L2 cache may encounter a
+	  situation in which recent store transactions to the L2 cache are lost
+	  and overwritten with stale memory contents from external memory. The
+	  workaround disables the write-allocate mode for the L2 cache via the
+	  ACTLR register. Note that setting specific bits in the ACTLR register
+	  may not be available in non-secure mode.
+
+config ARM_ERRATA_742230
+	bool "ARM errata: DMB operation may be faulty"
+	depends on CPU_V7 && SMP
+	depends on !ARCH_MULTIPLATFORM
+	help
+	  This option enables the workaround for the 742230 Cortex-A9
+	  (r1p0..r2p2) erratum. Under rare circumstances, a DMB instruction
+	  between two write operations may not ensure the correct visibility
+	  ordering of the two writes. This workaround sets a specific bit in
+	  the diagnostic register of the Cortex-A9 which causes the DMB
+	  instruction to behave as a DSB, ensuring the correct behaviour of
+	  the two writes.
+
+config ARM_ERRATA_742231
+	bool "ARM errata: Incorrect hazard handling in the SCU may lead to data corruption"
+	depends on CPU_V7 && SMP
+	depends on !ARCH_MULTIPLATFORM
+	help
+	  This option enables the workaround for the 742231 Cortex-A9
+	  (r2p0..r2p2) erratum. Under certain conditions, specific to the
+	  Cortex-A9 MPCore micro-architecture, two CPUs working in SMP mode,
+	  accessing some data located in the same cache line, may get corrupted
+	  data due to bad handling of the address hazard when the line gets
+	  replaced from one of the CPUs at the same time as another CPU is
+	  accessing it. This workaround sets specific bits in the diagnostic
+	  register of the Cortex-A9 which reduces the linefill issuing
+	  capabilities of the processor.
+
+config ARM_ERRATA_643719
+	bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
+	depends on CPU_V7 && SMP
+	default y
+	help
+	  This option enables the workaround for the 643719 Cortex-A9 (prior to
+	  r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR
+	  register returns zero when it should return one. The workaround
+	  corrects this value, ensuring cache maintenance operations which use
+	  it behave as intended and avoiding data corruption.
+
+config ARM_ERRATA_720789
+	bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 720789 Cortex-A9 (prior to
+	  r2p0) erratum. A faulty ASID can be sent to the other CPUs for the
+	  broadcasted CP15 TLB maintenance operations TLBIASIDIS and TLBIMVAIS.
+	  As a consequence of this erratum, some TLB entries which should be
+	  invalidated are not, resulting in an incoherency in the system page
+	  tables. The workaround changes the TLB flushing routines to invalidate
+	  entries regardless of the ASID.
+
+config ARM_ERRATA_743622
+	bool "ARM errata: Faulty hazard checking in the Store Buffer may lead to data corruption"
+	depends on CPU_V7
+	depends on !ARCH_MULTIPLATFORM
+	help
+	  This option enables the workaround for the 743622 Cortex-A9
+	  (r2p*) erratum. Under very rare conditions, a faulty
+	  optimisation in the Cortex-A9 Store Buffer may lead to data
+	  corruption. This workaround sets a specific bit in the diagnostic
+	  register of the Cortex-A9 which disables the Store Buffer
+	  optimisation, preventing the defect from occurring. This has no
+	  visible impact on the overall performance or power consumption of the
+	  processor.
+
+config ARM_ERRATA_751472
+	bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation"
+	depends on CPU_V7
+	depends on !ARCH_MULTIPLATFORM
+	help
+	  This option enables the workaround for the 751472 Cortex-A9 (prior
+	  to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the
+	  completion of a following broadcasted operation if the second
+	  operation is received by a CPU before the ICIALLUIS has completed,
+	  potentially leading to corrupted entries in the cache or TLB.
+
+config ARM_ERRATA_754322
+	bool "ARM errata: possible faulty MMU translations following an ASID switch"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 754322 Cortex-A9 (r2p*,
+	  r3p*) erratum. A speculative memory access may cause a page table walk
+	  which starts prior to an ASID switch but completes afterwards. This
+	  can populate the micro-TLB with a stale entry which may be hit with
+	  the new ASID. This workaround places two dsb instructions in the mm
+	  switching code so that no page table walks can cross the ASID switch.
+
+config ARM_ERRATA_754327
+	bool "ARM errata: no automatic Store Buffer drain"
+	depends on CPU_V7 && SMP
+	help
+	  This option enables the workaround for the 754327 Cortex-A9 (prior to
+	  r2p0) erratum. The Store Buffer does not have any automatic draining
+	  mechanism and therefore a livelock may occur if an external agent
+	  continuously polls a memory location waiting to observe an update.
+	  This workaround defines cpu_relax() as smp_mb(), preventing correctly
+	  written polling loops from denying visibility of updates to memory.
+
+config ARM_ERRATA_364296
+	bool "ARM errata: Possible cache data corruption with hit-under-miss enabled"
+	depends on CPU_V6
+	help
+	  This options enables the workaround for the 364296 ARM1136
+	  r0p2 erratum (possible cache data corruption with
+	  hit-under-miss enabled). It sets the undocumented bit 31 in
+	  the auxiliary control register and the FI bit in the control
+	  register, thus disabling hit-under-miss without putting the
+	  processor into full low interrupt latency mode. ARM11MPCore
+	  is not affected.
+
+config ARM_ERRATA_764369
+	bool "ARM errata: Data cache line maintenance operation by MVA may not succeed"
+	depends on CPU_V7 && SMP
+	help
+	  This option enables the workaround for erratum 764369
+	  affecting Cortex-A9 MPCore with two or more processors (all
+	  current revisions). Under certain timing circumstances, a data
+	  cache line maintenance operation by MVA targeting an Inner
+	  Shareable memory region may fail to proceed up to either the
+	  Point of Coherency or to the Point of Unification of the
+	  system. This workaround adds a DSB instruction before the
+	  relevant cache maintenance functions and sets a specific bit
+	  in the diagnostic control register of the SCU.
+
+config ARM_ERRATA_775420
+       bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
+       depends on CPU_V7
+       help
+	 This option enables the workaround for the 775420 Cortex-A9 (r2p2,
+	 r2p6,r2p8,r2p10,r3p0) erratum. In case a data cache maintenance
+	 operation aborts with MMU exception, it might cause the processor
+	 to deadlock. This workaround puts DSB before executing ISB if
+	 an abort may occur on cache maintenance.
+
+config ARM_ERRATA_798181
+	bool "ARM errata: TLBI/DSB failure on Cortex-A15"
+	depends on CPU_V7 && SMP
+	help
+	  On Cortex-A15 (r0p0..r3p2) the TLBI*IS/DSB operations are not
+	  adequately shooting down all use of the old entries. This
+	  option enables the Linux kernel workaround for this erratum
+	  which sends an IPI to the CPUs that are running the same ASID
+	  as the one being invalidated.
+
+config ARM_ERRATA_773022
+	bool "ARM errata: incorrect instructions may be executed from loop buffer"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 773022 Cortex-A15
+	  (up to r0p4) erratum. In certain rare sequences of code, the
+	  loop buffer may deliver incorrect instructions. This
+	  workaround disables the loop buffer to avoid the erratum.
+
+config ARM_ERRATA_818325_852422
+	bool "ARM errata: A12: some seqs of opposed cond code instrs => deadlock or corruption"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for:
+	  - Cortex-A12 818325: Execution of an UNPREDICTABLE STR or STM
+	    instruction might deadlock.  Fixed in r0p1.
+	  - Cortex-A12 852422: Execution of a sequence of instructions might
+	    lead to either a data corruption or a CPU deadlock.  Not fixed in
+	    any Cortex-A12 cores yet.
+	  This workaround for all both errata involves setting bit[12] of the
+	  Feature Register. This bit disables an optimisation applied to a
+	  sequence of 2 instructions that use opposing condition codes.
+
+config ARM_ERRATA_821420
+	bool "ARM errata: A12: sequence of VMOV to core registers might lead to a dead lock"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 821420 Cortex-A12
+	  (all revs) erratum. In very rare timing conditions, a sequence
+	  of VMOV to Core registers instructions, for which the second
+	  one is in the shadow of a branch or abort, can lead to a
+	  deadlock when the VMOV instructions are issued out-of-order.
+
+config ARM_ERRATA_825619
+	bool "ARM errata: A12: DMB NSHST/ISHST mixed ... might cause deadlock"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 825619 Cortex-A12
+	  (all revs) erratum. Within rare timing constraints, executing a
+	  DMB NSHST or DMB ISHST instruction followed by a mix of Cacheable
+	  and Device/Strongly-Ordered loads and stores might cause deadlock
+
+config ARM_ERRATA_857271
+	bool "ARM errata: A12: CPU might deadlock under some very rare internal conditions"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 857271 Cortex-A12
+	  (all revs) erratum. Under very rare timing conditions, the CPU might
+	  hang. The workaround is expected to have a < 1% performance impact.
+
+config ARM_ERRATA_852421
+	bool "ARM errata: A17: DMB ST might fail to create order between stores"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 852421 Cortex-A17
+	  (r1p0, r1p1, r1p2) erratum. Under very rare timing conditions,
+	  execution of a DMB ST instruction might fail to properly order
+	  stores from GroupA and stores from GroupB.
+
+config ARM_ERRATA_852423
+	bool "ARM errata: A17: some seqs of opposed cond code instrs => deadlock or corruption"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for:
+	  - Cortex-A17 852423: Execution of a sequence of instructions might
+	    lead to either a data corruption or a CPU deadlock.  Not fixed in
+	    any Cortex-A17 cores yet.
+	  This is identical to Cortex-A12 erratum 852422.  It is a separate
+	  config option from the A12 erratum due to the way errata are checked
+	  for and handled.
+
+config ARM_ERRATA_857272
+	bool "ARM errata: A17: CPU might deadlock under some very rare internal conditions"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 857272 Cortex-A17 erratum.
+	  This erratum is not known to be fixed in any A17 revision.
+	  This is identical to Cortex-A12 erratum 857271.  It is a separate
+	  config option from the A12 erratum due to the way errata are checked
+	  for and handled.
+
+endmenu
+
+source "arch/arm/common/Kconfig"
+
+menu "Bus support"
+
+config ISA
+	bool
+	help
+	  Find out whether you have ISA slots on your motherboard.  ISA is the
+	  name of a bus system, i.e. the way the CPU talks to the other stuff
+	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
+	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
+	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
+
+# Select ISA DMA controller support
+config ISA_DMA
+	bool
+	select ISA_DMA_API
+
+# Select ISA DMA interface
+config ISA_DMA_API
+	bool
+
+config PCI_NANOENGINE
+	bool "BSE nanoEngine PCI support"
+	depends on SA1100_NANOENGINE
+	help
+	  Enable PCI on the BSE nanoEngine board.
+
+config ARM_ERRATA_814220
+	bool "ARM errata: Cache maintenance by set/way operations can execute out of order"
+	depends on CPU_V7
+	help
+	  The v7 ARM states that all cache and branch predictor maintenance
+	  operations that do not specify an address execute, relative to
+	  each other, in program order.
+	  However, because of this erratum, an L2 set/way cache maintenance
+	  operation can overtake an L1 set/way cache maintenance operation.
+	  This ERRATA only affected the Cortex-A7 and present in r0p2, r0p3,
+	  r0p4, r0p5.
+
+endmenu
+
+menu "Kernel Features"
+
+config HAVE_SMP
+	bool
+	help
+	  This option should be selected by machines which have an SMP-
+	  capable CPU.
+
+	  The only effect of this option is to make the SMP-related
+	  options available to the user for configuration.
+
+config SMP
+	bool "Symmetric Multi-Processing"
+	depends on CPU_V6K || CPU_V7
+	depends on HAVE_SMP
+	depends on MMU || ARM_MPU
+	select IRQ_WORK
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N. If you have a system with more
+	  than one CPU, say Y.
+
+	  If you say N here, the kernel will run on uni- and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on many, but not all,
+	  uniprocessor machines. On a uniprocessor machine, the kernel
+	  will run faster if you say N here.
+
+	  See also <file:Documentation/x86/i386/IO-APIC.rst>,
+	  <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO available at
+	  <http://tldp.org/HOWTO/SMP-HOWTO.html>.
+
+	  If you don't know what to do here, say N.
+
+config SMP_ON_UP
+	bool "Allow booting SMP kernel on uniprocessor systems"
+	depends on SMP && !XIP_KERNEL && MMU
+	default y
+	help
+	  SMP kernels contain instructions which fail on non-SMP processors.
+	  Enabling this option allows the kernel to modify itself to make
+	  these instructions safe.  Disabling it allows about 1K of space
+	  savings.
+
+	  If you don't know what to do here, say Y.
+
+
+config CURRENT_POINTER_IN_TPIDRURO
+	def_bool y
+	depends on SMP && CPU_32v6K && !CPU_V6
+
+config ARM_CPU_TOPOLOGY
+	bool "Support cpu topology definition"
+	depends on SMP && CPU_V7
+	default y
+	help
+	  Support ARM cpu topology definition. The MPIDR register defines
+	  affinity between processors which is then used to describe the cpu
+	  topology of an ARM System.
+
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on ARM_CPU_TOPOLOGY
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+	bool "SMT scheduler support"
+	depends on ARM_CPU_TOPOLOGY
+	help
+	  Improves the CPU scheduler's decision making when dealing with
+	  MultiThreading at a cost of slightly increased overhead in some
+	  places. If unsure say N here.
+
+config HAVE_ARM_SCU
+	bool
+	help
+	  This option enables support for the ARM snoop control unit
+
+config HAVE_ARM_ARCH_TIMER
+	bool "Architected timer support"
+	depends on CPU_V7
+	select ARM_ARCH_TIMER
+	help
+	  This option enables support for the ARM architected timer
+
+config HAVE_ARM_TWD
+	bool
+	help
+	  This options enables support for the ARM timer and watchdog unit
+
+config MCPM
+	bool "Multi-Cluster Power Management"
+	depends on CPU_V7 && SMP
+	help
+	  This option provides the common power management infrastructure
+	  for (multi-)cluster based systems, such as big.LITTLE based
+	  systems.
+
+config MCPM_QUAD_CLUSTER
+	bool
+	depends on MCPM
+	help
+	  To avoid wasting resources unnecessarily, MCPM only supports up
+	  to 2 clusters by default.
+	  Platforms with 3 or 4 clusters that use MCPM must select this
+	  option to allow the additional clusters to be managed.
+
+config BIG_LITTLE
+	bool "big.LITTLE support (Experimental)"
+	depends on CPU_V7 && SMP
+	select MCPM
+	help
+	  This option enables support selections for the big.LITTLE
+	  system architecture.
+
+config BL_SWITCHER
+	bool "big.LITTLE switcher support"
+	depends on BIG_LITTLE && MCPM && HOTPLUG_CPU && ARM_GIC
+	select CPU_PM
+	help
+	  The big.LITTLE "switcher" provides the core functionality to
+	  transparently handle transition between a cluster of A15's
+	  and a cluster of A7's in a big.LITTLE system.
+
+config BL_SWITCHER_DUMMY_IF
+	tristate "Simple big.LITTLE switcher user interface"
+	depends on BL_SWITCHER && DEBUG_KERNEL
+	help
+	  This is a simple and dummy char dev interface to control
+	  the big.LITTLE switcher core code.  It is meant for
+	  debugging purposes only.
+
+choice
+	prompt "Memory split"
+	depends on MMU
+	default VMSPLIT_3G
+	help
+	  Select the desired split between kernel and user memory.
+
+	  If you are not absolutely sure what you are doing, leave this
+	  option alone!
+
+	config VMSPLIT_3G
+		bool "3G/1G user/kernel split"
+	config VMSPLIT_3G_OPT
+		depends on !ARM_LPAE
+		bool "3G/1G user/kernel split (for full 1G low memory)"
+	config VMSPLIT_2G
+		bool "2G/2G user/kernel split"
+	config VMSPLIT_1G
+		bool "1G/3G user/kernel split"
+endchoice
+
+config PAGE_OFFSET
+	hex
+	default PHYS_OFFSET if !MMU
+	default 0x40000000 if VMSPLIT_1G
+	default 0x80000000 if VMSPLIT_2G
+	default 0xB0000000 if VMSPLIT_3G_OPT
+	default 0xC0000000
+
+config KASAN_SHADOW_OFFSET
+	hex
+	depends on KASAN
+	default 0x1f000000 if PAGE_OFFSET=0x40000000
+	default 0x5f000000 if PAGE_OFFSET=0x80000000
+	default 0x9f000000 if PAGE_OFFSET=0xC0000000
+	default 0x8f000000 if PAGE_OFFSET=0xB0000000
+	default 0xffffffff
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-32)"
+	range 2 16 if DEBUG_KMAP_LOCAL
+	range 2 32 if !DEBUG_KMAP_LOCAL
+	depends on SMP
+	default "4"
+	help
+	  The maximum number of CPUs that the kernel can support.
+	  Up to 32 CPUs can be supported, or up to 16 if kmap_local()
+	  debugging is enabled, which uses half of the per-CPU fixmap
+	  slots as guard regions.
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP
+	select GENERIC_IRQ_MIGRATION
+	help
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu.
+
+config ARM_PSCI
+	bool "Support for the ARM Power State Coordination Interface (PSCI)"
+	depends on HAVE_ARM_SMCCC
+	select ARM_PSCI_FW
+	help
+	  Say Y here if you want Linux to communicate with system firmware
+	  implementing the PSCI specification for CPU-centric power
+	  management operations described in ARM document number ARM DEN
+	  0022A ("Power State Coordination Interface System Software on
+	  ARM processors").
+
+# The GPIO number here must be sorted by descending number. In case of
+# a multiplatform kernel, we just want the highest value required by the
+# selected platforms.
+config ARCH_NR_GPIO
+	int
+	default 2048 if ARCH_INTEL_SOCFPGA
+	default 1024 if ARCH_BRCMSTB || ARCH_RENESAS || ARCH_TEGRA || \
+		ARCH_ZYNQ || ARCH_ASPEED
+	default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \
+		SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210
+	default 416 if ARCH_SUNXI
+	default 392 if ARCH_U8500
+	default 352 if ARCH_VT8500
+	default 288 if ARCH_ROCKCHIP
+	default 264 if MACH_H4700
+	default 0
+	help
+	  Maximum number of GPIOs in the system.
+
+	  If unsure, leave the default value.
+
+config HZ_FIXED
+	int
+	default 128 if SOC_AT91RM9200
+	default 0
+
+choice
+	depends on HZ_FIXED = 0
+	prompt "Timer frequency"
+
+config HZ_100
+	bool "100 Hz"
+
+config HZ_200
+	bool "200 Hz"
+
+config HZ_250
+	bool "250 Hz"
+
+config HZ_300
+	bool "300 Hz"
+
+config HZ_500
+	bool "500 Hz"
+
+config HZ_1000
+	bool "1000 Hz"
+
+endchoice
+
+config HZ
+	int
+	default HZ_FIXED if HZ_FIXED != 0
+	default 100 if HZ_100
+	default 200 if HZ_200
+	default 250 if HZ_250
+	default 300 if HZ_300
+	default 500 if HZ_500
+	default 1000
+
+config SCHED_HRTICK
+	def_bool HIGH_RES_TIMERS
+
+config THUMB2_KERNEL
+	bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY
+	depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K
+	default y if CPU_THUMBONLY
+	select ARM_UNWIND
+	help
+	  By enabling this option, the kernel will be compiled in
+	  Thumb-2 mode.
+
+	  If unsure, say N.
+
+config ARM_PATCH_IDIV
+	bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()"
+	depends on CPU_32v7 && !XIP_KERNEL
+	default y
+	help
+	  The ARM compiler inserts calls to __aeabi_idiv() and
+	  __aeabi_uidiv() when it needs to perform division on signed
+	  and unsigned integers. Some v7 CPUs have support for the sdiv
+	  and udiv instructions that can be used to implement those
+	  functions.
+
+	  Enabling this option allows the kernel to modify itself to
+	  replace the first two instructions of these library functions
+	  with the sdiv or udiv plus "bx lr" instructions when the CPU
+	  it is running on supports them. Typically this will be faster
+	  and less power intensive than running the original library
+	  code to do integer division.
+
+config AEABI
+	bool "Use the ARM EABI to compile the kernel" if !CPU_V7 && \
+		!CPU_V7M && !CPU_V6 && !CPU_V6K && !CC_IS_CLANG
+	default CPU_V7 || CPU_V7M || CPU_V6 || CPU_V6K || CC_IS_CLANG
+	help
+	  This option allows for the kernel to be compiled using the latest
+	  ARM ABI (aka EABI).  This is only useful if you are using a user
+	  space environment that is also compiled with EABI.
+
+	  Since there are major incompatibilities between the legacy ABI and
+	  EABI, especially with regard to structure member alignment, this
+	  option also changes the kernel syscall calling convention to
+	  disambiguate both ABIs and allow for backward compatibility support
+	  (selected with CONFIG_OABI_COMPAT).
+
+	  To use this you need GCC version 4.0.0 or later.
+
+config OABI_COMPAT
+	bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)"
+	depends on AEABI && !THUMB2_KERNEL
+	help
+	  This option preserves the old syscall interface along with the
+	  new (ARM EABI) one. It also provides a compatibility layer to
+	  intercept syscalls that have structure arguments which layout
+	  in memory differs between the legacy ABI and the new ARM EABI
+	  (only for non "thumb" binaries). This option adds a tiny
+	  overhead to all syscalls and produces a slightly larger kernel.
+
+	  The seccomp filter system will not be available when this is
+	  selected, since there is no way yet to sensibly distinguish
+	  between calling conventions during filtering.
+
+	  If you know you'll be using only pure EABI user space then you
+	  can say N here. If this option is not selected and you attempt
+	  to execute a legacy ABI binary then the result will be
+	  UNPREDICTABLE (in fact it can be predicted that it won't work
+	  at all). If in doubt say N.
+
+config ARCH_SELECT_MEMORY_MODEL
+	bool
+
+config ARCH_FLATMEM_ENABLE
+	bool
+
+config ARCH_SPARSEMEM_ENABLE
+	bool
+	select SPARSEMEM_STATIC if SPARSEMEM
+
+config HIGHMEM
+	bool "High Memory Support"
+	depends on MMU
+	select KMAP_LOCAL
+	select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
+	help
+	  The address space of ARM processors is only 4 Gigabytes large
+	  and it has to accommodate user address space, kernel address
+	  space as well as some memory mapped IO. That means that, if you
+	  have a large amount of physical memory and/or IO, not all of the
+	  memory can be "permanently mapped" by the kernel. The physical
+	  memory that is not permanently mapped is called "high memory".
+
+	  Depending on the selected kernel/user memory split, minimum
+	  vmalloc space and actual amount of RAM, you may not need this
+	  option which should result in a slightly faster kernel.
+
+	  If unsure, say n.
+
+config HIGHPTE
+	bool "Allocate 2nd-level pagetables from highmem" if EXPERT
+	depends on HIGHMEM
+	default y
+	help
+	  The VM uses one page of physical memory for each page table.
+	  For systems with a lot of processes, this can use a lot of
+	  precious low memory, eventually leading to low memory being
+	  consumed by page tables.  Setting this option will allow
+	  user-space 2nd level page tables to reside in high memory.
+
+config CPU_SW_DOMAIN_PAN
+	bool "Enable use of CPU domains to implement privileged no-access"
+	depends on MMU && !ARM_LPAE
+	default y
+	help
+	  Increase kernel security by ensuring that normal kernel accesses
+	  are unable to access userspace addresses.  This can help prevent
+	  use-after-free bugs becoming an exploitable privilege escalation
+	  by ensuring that magic values (such as LIST_POISON) will always
+	  fault when dereferenced.
+
+	  CPUs with low-vector mappings use a best-efforts implementation.
+	  Their lower 1MB needs to remain accessible for the vectors, but
+	  the remainder of userspace will become appropriately inaccessible.
+
+config HW_PERF_EVENTS
+	def_bool y
+	depends on ARM_PMU
+
+config ARCH_WANT_GENERAL_HUGETLB
+	def_bool y
+
+config ARM_MODULE_PLTS
+	bool "Use PLTs to allow module memory to spill over into vmalloc area"
+	depends on MODULES
+	default y
+	help
+	  Allocate PLTs when loading modules so that jumps and calls whose
+	  targets are too far away for their relative offsets to be encoded
+	  in the instructions themselves can be bounced via veneers in the
+	  module's PLT. This allows modules to be allocated in the generic
+	  vmalloc area after the dedicated module memory area has been
+	  exhausted. The modules will use slightly more memory, but after
+	  rounding up to page size, the actual memory footprint is usually
+	  the same.
+
+	  Disabling this is usually safe for small single-platform
+	  configurations. If unsure, say y.
+
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	default "12" if SOC_AM33XX
+	default "9" if SA1111
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+config ALIGNMENT_TRAP
+	def_bool CPU_CP15_MMU
+	select HAVE_PROC_CPU if PROC_FS
+	help
+	  ARM processors cannot fetch/store information which is not
+	  naturally aligned on the bus, i.e., a 4 byte fetch must start at an
+	  address divisible by 4. On 32-bit ARM processors, these non-aligned
+	  fetch/store instructions will be emulated in software if you say
+	  here, which has a severe performance impact. This is necessary for
+	  correct operation of some network protocols. With an IP-only
+	  configuration it is safe to say N, otherwise say Y.
+
+config UACCESS_WITH_MEMCPY
+	bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user()"
+	depends on MMU
+	default y if CPU_FEROCEON
+	help
+	  Implement faster copy_to_user and clear_user methods for CPU
+	  cores where a 8-word STM instruction give significantly higher
+	  memory write throughput than a sequence of individual 32bit stores.
+
+	  A possible side effect is a slight increase in scheduling latency
+	  between threads sharing the same address space if they invoke
+	  such copy operations with large buffers.
+
+	  However, if the CPU data cache is using a write-allocate mode,
+	  this option is unlikely to provide any performance gain.
+
+config PARAVIRT
+	bool "Enable paravirtualization code"
+	help
+	  This changes the kernel so it can modify itself when it is run
+	  under a hypervisor, potentially improving performance significantly
+	  over full virtualization.
+
+config PARAVIRT_TIME_ACCOUNTING
+	bool "Paravirtual steal time accounting"
+	select PARAVIRT
+	help
+	  Select this option to enable fine granularity task steal time
+	  accounting. Time spent executing other tasks in parallel with
+	  the current vCPU is discounted from the vCPU power. To account for
+	  that, there can be a small performance impact.
+
+	  If in doubt, say N here.
+
+config XEN_DOM0
+	def_bool y
+	depends on XEN
+
+config XEN
+	bool "Xen guest support on ARM"
+	depends on ARM && AEABI && OF
+	depends on CPU_V7 && !CPU_V6
+	depends on !GENERIC_ATOMIC64
+	depends on MMU
+	select ARCH_DMA_ADDR_T_64BIT
+	select ARM_PSCI
+	select SWIOTLB
+	select SWIOTLB_XEN
+	select PARAVIRT
+	help
+	  Say Y if you want to run Linux in a Virtual Machine on Xen on ARM.
+
+config STACKPROTECTOR_PER_TASK
+	bool "Use a unique stack canary value for each task"
+	depends on GCC_PLUGINS && STACKPROTECTOR && THREAD_INFO_IN_TASK && !XIP_DEFLATED_DATA
+	select GCC_PLUGIN_ARM_SSP_PER_TASK
+	default y
+	help
+	  Due to the fact that GCC uses an ordinary symbol reference from
+	  which to load the value of the stack canary, this value can only
+	  change at reboot time on SMP systems, and all tasks running in the
+	  kernel's address space are forced to use the same canary value for
+	  the entire duration that the system is up.
+
+	  Enable this option to switch to a different method that uses a
+	  different canary value for each task.
+
+endmenu
+
+menu "Boot options"
+
+config USE_OF
+	bool "Flattened Device Tree support"
+	select IRQ_DOMAIN
+	select OF
+	help
+	  Include support for flattened device tree machine descriptions.
+
+config ATAGS
+	bool "Support for the traditional ATAGS boot data passing" if USE_OF
+	default y
+	help
+	  This is the traditional way of passing data to the kernel at boot
+	  time. If you are solely relying on the flattened device tree (or
+	  the ARM_ATAG_DTB_COMPAT option) then you may unselect this option
+	  to remove ATAGS support from your kernel binary.  If unsure,
+	  leave this to y.
+
+config DEPRECATED_PARAM_STRUCT
+	bool "Provide old way to pass kernel parameters"
+	depends on ATAGS
+	help
+	  This was deprecated in 2001 and announced to live on for 5 years.
+	  Some old boot loaders still use this way.
+
+# Compressed boot loader in ROM.  Yes, we really want to ask about
+# TEXT and BSS so we preserve their values in the config files.
+config ZBOOT_ROM_TEXT
+	hex "Compressed ROM boot loader base address"
+	default 0x0
+	help
+	  The physical address at which the ROM-able zImage is to be
+	  placed in the target.  Platforms which normally make use of
+	  ROM-able zImage formats normally set this to a suitable
+	  value in their defconfig file.
+
+	  If ZBOOT_ROM is not enabled, this has no effect.
+
+config ZBOOT_ROM_BSS
+	hex "Compressed ROM boot loader BSS address"
+	default 0x0
+	help
+	  The base address of an area of read/write memory in the target
+	  for the ROM-able zImage which must be available while the
+	  decompressor is running. It must be large enough to hold the
+	  entire decompressed kernel plus an additional 128 KiB.
+	  Platforms which normally make use of ROM-able zImage formats
+	  normally set this to a suitable value in their defconfig file.
+
+	  If ZBOOT_ROM is not enabled, this has no effect.
+
+config ZBOOT_ROM
+	bool "Compressed boot loader in ROM/flash"
+	depends on ZBOOT_ROM_TEXT != ZBOOT_ROM_BSS
+	depends on !ARM_APPENDED_DTB && !XIP_KERNEL && !AUTO_ZRELADDR
+	help
+	  Say Y here if you intend to execute your compressed kernel image
+	  (zImage) directly from ROM or flash.  If unsure, say N.
+
+config ARM_APPENDED_DTB
+	bool "Use appended device tree blob to zImage (EXPERIMENTAL)"
+	depends on OF
+	help
+	  With this option, the boot code will look for a device tree binary
+	  (DTB) appended to zImage
+	  (e.g. cat zImage <filename>.dtb > zImage_w_dtb).
+
+	  This is meant as a backward compatibility convenience for those
+	  systems with a bootloader that can't be upgraded to accommodate
+	  the documented boot protocol using a device tree.
+
+	  Beware that there is very little in terms of protection against
+	  this option being confused by leftover garbage in memory that might
+	  look like a DTB header after a reboot if no actual DTB is appended
+	  to zImage.  Do not leave this option active in a production kernel
+	  if you don't intend to always append a DTB.  Proper passing of the
+	  location into r2 of a bootloader provided DTB is always preferable
+	  to this option.
+
+config ARM_ATAG_DTB_COMPAT
+	bool "Supplement the appended DTB with traditional ATAG information"
+	depends on ARM_APPENDED_DTB
+	help
+	  Some old bootloaders can't be updated to a DTB capable one, yet
+	  they provide ATAGs with memory configuration, the ramdisk address,
+	  the kernel cmdline string, etc.  Such information is dynamically
+	  provided by the bootloader and can't always be stored in a static
+	  DTB.  To allow a device tree enabled kernel to be used with such
+	  bootloaders, this option allows zImage to extract the information
+	  from the ATAG list and store it at run time into the appended DTB.
+
+choice
+	prompt "Kernel command line type" if ARM_ATAG_DTB_COMPAT
+	default ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER
+
+config ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER
+	bool "Use bootloader kernel arguments if available"
+	help
+	  Uses the command-line options passed by the boot loader instead of
+	  the device tree bootargs property. If the boot loader doesn't provide
+	  any, the device tree bootargs property will be used.
+
+config ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND
+	bool "Extend with bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the the device tree bootargs property.
+
+endchoice
+
+config CMDLINE
+	string "Default kernel command string"
+	default ""
+	help
+	  On some architectures (e.g. CATS), there is currently no way
+	  for the boot loader to pass arguments to the kernel. For these
+	  architectures, you should supply some command-line options at build
+	  time by entering them here. As a minimum, you should specify the
+	  memory size and the root device (e.g., mem=64M root=/dev/nfs).
+
+choice
+	prompt "Kernel command line type" if CMDLINE != ""
+	default CMDLINE_FROM_BOOTLOADER
+	depends on ATAGS
+
+config CMDLINE_FROM_BOOTLOADER
+	bool "Use bootloader kernel arguments if available"
+	help
+	  Uses the command-line options passed by the boot loader. If
+	  the boot loader doesn't provide any, the default kernel command
+	  string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+	bool "Extend bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the default kernel command string.
+
+config CMDLINE_FORCE
+	bool "Always use the default kernel command string"
+	help
+	  Always use the default kernel command string, even if the boot
+	  loader passes other arguments to the kernel.
+	  This is useful if you cannot or don't want to change the
+	  command-line options your boot loader passes to the kernel.
+endchoice
+
+config XIP_KERNEL
+	bool "Kernel Execute-In-Place from ROM"
+	depends on !ARM_LPAE && !ARCH_MULTIPLATFORM
+	help
+	  Execute-In-Place allows the kernel to run from non-volatile storage
+	  directly addressable by the CPU, such as NOR flash. This saves RAM
+	  space since the text section of the kernel is not loaded from flash
+	  to RAM.  Read-write sections, such as the data section and stack,
+	  are still copied to RAM.  The XIP kernel is not compressed since
+	  it has to run directly from flash, so it will take more space to
+	  store it.  The flash address used to link the kernel object files,
+	  and for storing it, is configuration dependent. Therefore, if you
+	  say Y here, you must know the proper physical address where to
+	  store the kernel image depending on your own flash memory usage.
+
+	  Also note that the make target becomes "make xipImage" rather than
+	  "make zImage" or "make Image".  The final kernel binary to put in
+	  ROM memory will be arch/arm/boot/xipImage.
+
+	  If unsure, say N.
+
+config XIP_PHYS_ADDR
+	hex "XIP Kernel Physical Location"
+	depends on XIP_KERNEL
+	default "0x00080000"
+	help
+	  This is the physical address in your flash memory the kernel will
+	  be linked for and stored to.  This address is dependent on your
+	  own flash usage.
+
+config XIP_DEFLATED_DATA
+	bool "Store kernel .data section compressed in ROM"
+	depends on XIP_KERNEL
+	select ZLIB_INFLATE
+	help
+	  Before the kernel is actually executed, its .data section has to be
+	  copied to RAM from ROM. This option allows for storing that data
+	  in compressed form and decompressed to RAM rather than merely being
+	  copied, saving some precious ROM space. A possible drawback is a
+	  slightly longer boot delay.
+
+config KEXEC
+	bool "Kexec system call (EXPERIMENTAL)"
+	depends on (!SMP || PM_SLEEP_SMP)
+	depends on MMU
+	select KEXEC_CORE
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  is properly shutdown, so do not be surprised if this code does not
+	  initially work for you.
+
+config ATAGS_PROC
+	bool "Export atags in procfs"
+	depends on ATAGS && KEXEC
+	default y
+	help
+	  Should the atags used to boot the kernel be exported in an "atags"
+	  file in procfs. Useful with kexec.
+
+config CRASH_DUMP
+	bool "Build kdump crash kernel (EXPERIMENTAL)"
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec. The crash dump kernel must be compiled to a
+	  memory address not used by the main kernel
+
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
+
+config AUTO_ZRELADDR
+	bool "Auto calculation of the decompressed kernel image address"
+	help
+	  ZRELADDR is the physical address where the decompressed kernel
+	  image will be placed. If AUTO_ZRELADDR is selected, the address
+	  will be determined at run-time, either by masking the current IP
+	  with 0xf8000000, or, if invalid, from the DTB passed in r2.
+	  This assumes the zImage being placed in the first 128MB from
+	  start of memory.
+
+config EFI_STUB
+	bool
+
+config EFI
+	bool "UEFI runtime support"
+	depends on OF && !CPU_BIG_ENDIAN && MMU && AUTO_ZRELADDR && !XIP_KERNEL
+	select UCS2_STRING
+	select EFI_PARAMS_FROM_FDT
+	select EFI_STUB
+	select EFI_GENERIC_STUB
+	select EFI_RUNTIME_WRAPPERS
+	help
+	  This option provides support for runtime services provided
+	  by UEFI firmware (such as non-volatile variables, realtime
+	  clock, and platform reset). A UEFI stub is also provided to
+	  allow the kernel to be booted as an EFI application. This
+	  is only useful for kernels that may run on systems that have
+	  UEFI firmware.
+
+config DMI
+	bool "Enable support for SMBIOS (DMI) tables"
+	depends on EFI
+	default y
+	help
+	  This enables SMBIOS/DMI feature for systems.
+
+	  This option is only useful on systems that have UEFI firmware.
+	  However, even with this option, the resultant kernel should
+	  continue to boot on existing non-UEFI platforms.
+
+	  NOTE: This does *NOT* enable or encourage the use of DMI quirks,
+	  i.e., the the practice of identifying the platform via DMI to
+	  decide whether certain workarounds for buggy hardware and/or
+	  firmware need to be enabled. This would require the DMI subsystem
+	  to be enabled much earlier than we do on ARM, which is non-trivial.
+
+endmenu
+
+menu "CPU Power Management"
+
+source "drivers/cpufreq/Kconfig"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
+menu "Floating point emulation"
+
+comment "At least one emulation must be selected"
+
+config FPE_NWFPE
+	bool "NWFPE math emulation"
+	depends on (!AEABI || OABI_COMPAT) && !THUMB2_KERNEL
+	help
+	  Say Y to include the NWFPE floating point emulator in the kernel.
+	  This is necessary to run most binaries. Linux does not currently
+	  support floating point hardware so you need to say Y here even if
+	  your machine has an FPA or floating point co-processor podule.
+
+	  You may say N here if you are going to load the Acorn FPEmulator
+	  early in the bootup.
+
+config FPE_NWFPE_XP
+	bool "Support extended precision"
+	depends on FPE_NWFPE
+	help
+	  Say Y to include 80-bit support in the kernel floating-point
+	  emulator.  Otherwise, only 32 and 64-bit support is compiled in.
+	  Note that gcc does not generate 80-bit operations by default,
+	  so in most cases this option only enlarges the size of the
+	  floating point emulator without any good reason.
+
+	  You almost surely want to say N here.
+
+config FPE_FASTFPE
+	bool "FastFPE math emulation (EXPERIMENTAL)"
+	depends on (!AEABI || OABI_COMPAT) && !CPU_32v3
+	help
+	  Say Y here to include the FAST floating point emulator in the kernel.
+	  This is an experimental much faster emulator which now also has full
+	  precision for the mantissa.  It does not support any exceptions.
+	  It is very simple, and approximately 3-6 times faster than NWFPE.
+
+	  It should be sufficient for most programs.  It may be not suitable
+	  for scientific calculations, but you have to check this for yourself.
+	  If you do not feel you need a faster FP emulation you should better
+	  choose NWFPE.
+
+config VFP
+	bool "VFP-format floating point maths"
+	depends on CPU_V6 || CPU_V6K || CPU_ARM926T || CPU_V7 || CPU_FEROCEON
+	help
+	  Say Y to include VFP support code in the kernel. This is needed
+	  if your hardware includes a VFP unit.
+
+	  Please see <file:Documentation/arm/vfp/release-notes.rst> for
+	  release notes and additional status information.
+
+	  Say N if your target does not have VFP hardware.
+
+config VFPv3
+	bool
+	depends on VFP
+	default y if CPU_V7
+
+config NEON
+	bool "Advanced SIMD (NEON) Extension support"
+	depends on VFPv3 && CPU_V7
+	help
+	  Say Y to include support code for NEON, the ARMv7 Advanced SIMD
+	  Extension.
+
+config KERNEL_MODE_NEON
+	bool "Support for NEON in kernel mode"
+	depends on NEON && AEABI
+	help
+	  Say Y to include support for NEON in kernel mode.
+
+endmenu
+
+menu "Power management options"
+
+source "kernel/power/Kconfig"
+
+config ARCH_SUSPEND_POSSIBLE
+	depends on CPU_ARM920T || CPU_ARM926T || CPU_FEROCEON || CPU_SA1100 || \
+		CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK
+	def_bool y
+
+config ARM_CPU_SUSPEND
+	def_bool PM_SLEEP || BL_SWITCHER || ARM_PSCI_FW
+	depends on ARCH_SUSPEND_POSSIBLE
+
+config ARCH_HIBERNATION_POSSIBLE
+	bool
+	depends on MMU
+	default y if ARCH_SUSPEND_POSSIBLE
+
+endmenu
+
+if CRYPTO
+source "arch/arm/crypto/Kconfig"
+endif
+
+source "arch/arm/Kconfig.assembler"
diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu
new file mode 100644
index 0000000000..36c80d3dd9
--- /dev/null
+++ b/arch/arm/Kconfig-nommu
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Kconfig for uClinux(non-paged MM) depend configurations
+# Hyok S. Choi <hyok.choi@samsung.com>
+# 
+
+config SET_MEM_PARAM
+	bool "Set flash/sdram size and base addr"
+	help
+	 Say Y to manually set the base addresses and sizes.
+	 otherwise, the default values are assigned.
+
+config DRAM_BASE
+	hex '(S)DRAM Base Address' if SET_MEM_PARAM
+	default 0x00800000
+
+config DRAM_SIZE
+	hex '(S)DRAM SIZE' if SET_MEM_PARAM
+	default 0x00800000
+
+config FLASH_MEM_BASE
+	hex 'FLASH Base Address' if SET_MEM_PARAM
+	depends on CPU_ARM740T || CPU_ARM946E || CPU_ARM940T
+	default 0x00400000
+
+config FLASH_SIZE
+	hex 'FLASH Size' if SET_MEM_PARAM
+	depends on CPU_ARM740T || CPU_ARM946E || CPU_ARM940T
+	default 0x00400000
+
+config PROCESSOR_ID
+	hex 'Hard wire the processor ID'
+	default 0x00007700
+	depends on !(CPU_CP15 || CPU_V7M)
+	help
+	  If processor has no CP15 register, this processor ID is
+	  used instead of the auto-probing which utilizes the register.
+
+config REMAP_VECTORS_TO_RAM
+	bool 'Install vectors to the beginning of RAM'
+	help
+	  The kernel needs to change the hardware exception vectors.
+	  In nommu mode, the hardware exception vectors are normally
+	  placed at address 0x00000000. However, this region may be
+	  occupied by read-only memory depending on H/W design.
+
+	  If the region contains read-write memory, say 'n' here.
+
+	  If your CPU provides a remap facility which allows the exception
+	  vectors to be mapped to writable memory, say 'n' here.
+
+	  Otherwise, say 'y' here.  In this case, the kernel will require
+	  external support to redirect the hardware exception vectors to
+	  the writable versions located at DRAM_BASE.
+
+config ARM_MPU
+       bool 'Use the ARM v7 PMSA Compliant MPU'
+       depends on CPU_V7 || CPU_V7M
+       default y if CPU_V7
+       help
+         Some ARM systems without an MMU have instead a Memory Protection
+         Unit (MPU) that defines the type and permissions for regions of
+         memory.
+
+         If your CPU has an MPU then you should choose 'y' here unless you
+         know that you do not want to use the MPU.
diff --git a/arch/arm/Kconfig.assembler b/arch/arm/Kconfig.assembler
new file mode 100644
index 0000000000..5cb31aae11
--- /dev/null
+++ b/arch/arm/Kconfig.assembler
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config AS_VFP_VMRS_FPINST
+	def_bool $(as-instr,.fpu vfpv2\nvmrs r0$(comma)FPINST)
+	help
+	  Supported by binutils >= 2.24 and LLVM integrated assembler.
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
new file mode 100644
index 0000000000..976315dea9
--- /dev/null
+++ b/arch/arm/Kconfig.debug
@@ -0,0 +1,1929 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config ARM_PTDUMP_CORE
+	def_bool n
+
+config ARM_PTDUMP_DEBUGFS
+	bool "Export kernel pagetable layout to userspace via debugfs"
+	depends on DEBUG_KERNEL
+	depends on MMU
+	select ARM_PTDUMP_CORE
+	select DEBUG_FS
+	help
+	  Say Y here if you want to show the kernel pagetable layout in a
+	  debugfs file. This information is only useful for kernel developers
+	  who are working in architecture specific areas of the kernel.
+	  It is probably not a good idea to enable this feature in a production
+	  kernel.
+	  If in doubt, say "N"
+
+config DEBUG_WX
+	bool "Warn on W+X mappings at boot"
+	depends on MMU
+	select ARM_PTDUMP_CORE
+	help
+		Generate a warning if any W+X mappings are found at boot.
+
+		This is useful for discovering cases where the kernel is leaving
+		W+X mappings after applying NX, as such mappings are a security risk.
+
+		Look for a message in dmesg output like this:
+
+			arm/mm: Checked W+X mappings: passed, no W+X pages found.
+
+		or like this, if the check failed:
+
+			arm/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
+
+		Note that even if the check fails, your kernel is possibly
+		still fine, as W+X mappings are not a security hole in
+		themselves, what they do is that they make the exploitation
+		of other unfixed kernel bugs easier.
+
+		There is no runtime or memory usage effect of this option
+		once the kernel has booted up - it's a one time check.
+
+		If in doubt, say "Y".
+
+choice
+	prompt "Choose kernel unwinder"
+	default UNWINDER_ARM if AEABI
+	default UNWINDER_FRAME_POINTER if !AEABI
+	help
+	  This determines which method will be used for unwinding kernel stack
+	  traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
+	  livepatch, lockdep, and more.
+
+config UNWINDER_FRAME_POINTER
+	bool "Frame pointer unwinder"
+	depends on !THUMB2_KERNEL
+	select ARCH_WANT_FRAME_POINTERS
+	select FRAME_POINTER
+	help
+	  This option enables the frame pointer unwinder for unwinding
+	  kernel stack traces.
+
+config UNWINDER_ARM
+	bool "ARM EABI stack unwinder"
+	depends on AEABI && !FUNCTION_GRAPH_TRACER
+	select ARM_UNWIND
+	help
+	  This option enables stack unwinding support in the kernel
+	  using the information automatically generated by the
+	  compiler. The resulting kernel image is slightly bigger but
+	  the performance is not affected. Currently, this feature
+	  only works with EABI compilers.
+
+endchoice
+
+config ARM_UNWIND
+	bool
+
+config BACKTRACE_VERBOSE
+	bool "Verbose backtrace"
+	depends on EXPERT
+	help
+	  When the kernel produces a warning or oops, the kernel prints a
+	  trace of the call chain. This option controls whether we include
+	  the numeric addresses or only include the symbolic information.
+
+	  In most cases, say N here, unless you are intending to debug the
+	  kernel and have access to the kernel binary image.
+
+config FRAME_POINTER
+	bool
+
+config DEBUG_USER
+	bool "Verbose user fault messages"
+	help
+	  When a user program crashes due to an exception, the kernel can
+	  print a brief message explaining what the problem was. This is
+	  sometimes helpful for debugging but serves no purpose on a
+	  production system. Most people should say N here.
+
+	  In addition, you need to pass user_debug=N on the kernel command
+	  line to enable this feature.  N consists of the sum of:
+
+	      1 - undefined instruction events
+	      2 - system calls
+	      4 - invalid data aborts
+	      8 - SIGSEGV faults
+	     16 - SIGBUS faults
+
+# These options are only for real kernel hackers who want to get their hands dirty.
+config DEBUG_LL
+	bool "Kernel low-level debugging functions (read help!)"
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here to include definitions of printascii, printch, printhex
+	  in the kernel.  This is helpful if you are debugging code that
+	  executes before the console is initialized.
+
+	  Note that selecting this option will limit the kernel to a single
+	  UART definition, as specified below. Attempting to boot the kernel
+	  image on a different platform *will not work*, so this option should
+	  not be enabled for kernels that are intended to be portable.
+
+choice
+	prompt "Kernel low-level debugging port"
+	depends on DEBUG_LL
+
+	config DEBUG_ALPINE_UART0
+		bool "Kernel low-level debugging messages via Alpine UART0"
+		depends on ARCH_ALPINE
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Alpine based platforms.
+
+	config DEBUG_ASM9260_UART
+		bool "Kernel low-level debugging via asm9260 UART"
+		depends on MACH_ASM9260
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to an UART or USART port on asm9260 based
+		  machines.
+
+		    DEBUG_UART_PHYS | DEBUG_UART_VIRT
+
+		    0x80000000      | 0xf0000000     | UART0
+		    0x80004000      | 0xf0004000     | UART1
+		    0x80008000      | 0xf0008000     | UART2
+		    0x8000c000      | 0xf000c000     | UART3
+		    0x80010000      | 0xf0010000     | UART4
+		    0x80014000      | 0xf0014000     | UART5
+		    0x80018000      | 0xf0018000     | UART6
+		    0x8001c000      | 0xf001c000     | UART7
+		    0x80020000      | 0xf0020000     | UART8
+		    0x80024000      | 0xf0024000     | UART9
+
+	config DEBUG_AT91_RM9200_DBGU
+		bool "Kernel low-level debugging on AT91RM9200, AT91SAM9, SAM9X60 DBGU"
+		select DEBUG_AT91_UART
+		depends on SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAM9X60
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on the DBGU port of:
+		    at91rm9200, at91sam9260, at91sam9g20, at91sam9261,
+		    at91sam9g10, at91sam9n12, at91sam9rl64, at91sam9x5, sam9x60
+
+	config DEBUG_AT91_SAM9263_DBGU
+		bool "Kernel low-level debugging on AT91SAM{9263,9G45,A5D3} DBGU"
+		select DEBUG_AT91_UART
+		depends on SOC_AT91SAM9 || SOC_SAMA5D3
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on the DBGU port of:
+		    at91sam9263, at91sam9g45, at91sam9m10,
+		    sama5d3
+
+	config DEBUG_AT91_SAMA5D2_UART1
+		bool "Kernel low-level debugging on SAMA5D2 UART1"
+		select DEBUG_AT91_UART
+		depends on SOC_SAMA5D2
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on the UART1 port of sama5d2.
+
+	config DEBUG_AT91_SAMA5D4_USART3
+		bool "Kernel low-level debugging on SAMA5D4 USART3"
+		select DEBUG_AT91_UART
+		depends on SOC_SAMA5D4
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on the USART3 port of sama5d4.
+
+	config DEBUG_AT91_SAMV7_USART1
+		bool "Kernel low-level debugging via SAMV7 USART1"
+		select DEBUG_AT91_UART
+		depends on SOC_SAMV7
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the USART1 port on SAMV7 based
+		  machines.
+
+	config DEBUG_AT91_SAMA7G5_FLEXCOM3
+		bool "Kernel low-level debugging on SAMA7G5 FLEXCOM3"
+		select DEBUG_AT91_UART
+		depends on SOC_SAMA7G5
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on the FLEXCOM3 port of SAMA7G5.
+
+	config DEBUG_BCM2835
+		bool "Kernel low-level debugging on BCM2835 PL011 UART"
+		depends on ARCH_BCM2835 && ARCH_MULTI_V6
+		select DEBUG_UART_PL01X
+
+	config DEBUG_BCM2836
+		bool "Kernel low-level debugging on BCM2836 PL011 UART"
+		depends on ARCH_BCM2835 && ARCH_MULTI_V7
+		select DEBUG_UART_PL01X
+
+	config DEBUG_BCM_5301X
+		bool "Kernel low-level debugging on BCM5301X/NSP UART1"
+		depends on ARCH_BCM_5301X || ARCH_BCM_NSP
+		select DEBUG_UART_8250
+
+	config DEBUG_BCM_HR2
+		bool "Kernel low-level debugging on Hurricane 2 UART2"
+		depends on ARCH_BCM_HR2
+		select DEBUG_UART_8250
+
+	config DEBUG_BCM_IPROC_UART3
+		bool "Kernel low-level debugging on BCM IPROC UART3"
+		depends on ARCH_BCM_CYGNUS
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the third serial port on these devices.
+
+	config DEBUG_BCM_KONA_UART
+		bool "Kernel low-level debugging messages via BCM KONA UART"
+		depends on ARCH_BCM_MOBILE
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Broadcom SoC platforms.
+		  This low level debug works for Broadcom
+		  mobile SoCs in the Kona family of chips (e.g. bcm28155,
+		  bcm11351, etc...)
+
+	config DEBUG_BCM63XX_UART
+		bool "Kernel low-level debugging on BCM63XX UART"
+		depends on ARCH_BCM_63XX
+
+	config DEBUG_BERLIN_UART
+		bool "Marvell Berlin SoC Debug UART"
+		depends on ARCH_BERLIN
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Marvell Berlin SoC based platforms.
+
+	config DEBUG_BRCMSTB_UART
+		bool "Use BRCMSTB UART for low-level debug"
+		depends on ARCH_BRCMSTB
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the first serial port on these devices. The
+		  UART physical and virtual address is automatically provided
+		  based on the chip identification register value.
+
+		  If you have a Broadcom STB chip and would like early print
+		  messages to appear over the UART, select this option.
+
+	config DEBUG_CLPS711X_UART1
+		bool "Kernel low-level debugging messages via UART1"
+		depends on ARCH_CLPS711X
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the first serial port on these devices.
+
+	config DEBUG_CLPS711X_UART2
+		bool "Kernel low-level debugging messages via UART2"
+		depends on ARCH_CLPS711X
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the second serial port on these devices.
+
+	config DEBUG_CNS3XXX
+		bool "Kernel Kernel low-level debugging on Cavium Networks CNS3xxx"
+		depends on ARCH_CNS3XXX
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+                  their output to the CNS3xxx UART0.
+
+	config DEBUG_DAVINCI_DA8XX_UART1
+		bool "Kernel low-level debugging on DaVinci DA8XX using UART1"
+		depends on ARCH_DAVINCI_DA8XX
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART1 serial port on DaVinci DA8XX devices.
+
+	config DEBUG_DAVINCI_DA8XX_UART2
+		bool "Kernel low-level debugging on DaVinci DA8XX using UART2"
+		depends on ARCH_DAVINCI_DA8XX
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART2 serial port on DaVinci DA8XX devices.
+
+	config DEBUG_DAVINCI_DMx_UART0
+		bool "Kernel low-level debugging on DaVinci DMx using UART0"
+		depends on ARCH_DAVINCI_DMx
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART0 serial port on DaVinci DMx devices.
+
+	config DEBUG_DC21285_PORT
+		bool "Kernel low-level debugging messages via footbridge serial port"
+		depends on FOOTBRIDGE
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the serial port in the DC21285 (Footbridge).
+
+	config DEBUG_DIGICOLOR_UA0
+		bool "Kernel low-level debugging messages via Digicolor UA0"
+		depends on ARCH_DIGICOLOR
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the UA0 serial port in the CX92755.
+
+	config DEBUG_EP93XX
+		bool "Kernel low-level debugging messages via ep93xx UART"
+		depends on ARCH_EP93XX
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Cirrus Logic EP93xx based platforms.
+
+	config DEBUG_FOOTBRIDGE_COM1
+		bool "Kernel low-level debugging messages via footbridge 8250 at PCI COM1"
+		depends on FOOTBRIDGE
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the 8250 at PCI COM1.
+
+	config DEBUG_GEMINI
+		bool "Kernel low-level debugging messages via Cortina Systems Gemini UART"
+		depends on ARCH_GEMINI
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Cortina Gemini based platforms.
+
+	config DEBUG_HI3620_UART
+		bool "Hisilicon HI3620 Debug UART"
+		depends on ARCH_HI3xxx
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on HI3620 UART.
+
+	config DEBUG_HIGHBANK_UART
+		bool "Kernel low-level debugging messages via Highbank UART"
+		depends on ARCH_HIGHBANK
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the UART on Highbank based devices.
+
+	config DEBUG_HIP01_UART
+		bool "Hisilicon Hip01 Debug UART"
+		depends on ARCH_HIP01
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on HIP01 UART.
+
+	config DEBUG_HIP04_UART
+		bool "Hisilicon HiP04 Debug UART"
+		depends on ARCH_HIP04
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on HIP04 UART.
+
+	config DEBUG_HIX5HD2_UART
+		bool "Hisilicon Hix5hd2 Debug UART"
+		depends on ARCH_HIX5HD2
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Hix5hd2 UART.
+
+	config DEBUG_IMX1_UART
+		bool "i.MX1 Debug UART"
+		depends on SOC_IMX1
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX1.
+
+	config DEBUG_IMX23_UART
+		bool "i.MX23 Debug UART"
+		depends on SOC_IMX23
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX23.
+
+	config DEBUG_IMX25_UART
+		bool "i.MX25 Debug UART"
+		depends on SOC_IMX25
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX25.
+
+	config DEBUG_IMX27_UART
+		bool "i.MX27 Debug UART"
+		depends on SOC_IMX27
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX27.
+
+	config DEBUG_IMX28_UART
+		bool "i.MX28 Debug UART"
+		depends on SOC_IMX28
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX28.
+
+	config DEBUG_IMX31_UART
+		bool "i.MX31 Debug UART"
+		depends on SOC_IMX31
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX31.
+
+	config DEBUG_IMX35_UART
+		bool "i.MX35 Debug UART"
+		depends on SOC_IMX35
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX35.
+
+	config DEBUG_IMX50_UART
+		bool "i.MX50 Debug UART"
+		depends on SOC_IMX50
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX50.
+
+	config DEBUG_IMX51_UART
+		bool "i.MX51 Debug UART"
+		depends on SOC_IMX51
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX51.
+
+	config DEBUG_IMX53_UART
+		bool "i.MX53 Debug UART"
+		depends on SOC_IMX53
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX53.
+
+	config DEBUG_IMX6Q_UART
+		bool "i.MX6Q/DL Debug UART"
+		depends on SOC_IMX6Q
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX6Q/DL.
+
+	config DEBUG_IMX6SL_UART
+		bool "i.MX6SL Debug UART"
+		depends on SOC_IMX6SL
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX6SL.
+
+	config DEBUG_IMX6SX_UART
+		bool "i.MX6SX Debug UART"
+		depends on SOC_IMX6SX
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX6SX.
+
+	config DEBUG_IMX6UL_UART
+		bool "i.MX6UL Debug UART"
+		depends on SOC_IMX6UL
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX6UL.
+
+	config DEBUG_IMX7D_UART
+		bool "i.MX7D Debug UART"
+		depends on SOC_IMX7D
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on i.MX7D.
+
+	config DEBUG_INTEGRATOR
+		bool "Kernel low-level debugging messages via ARM Integrator UART"
+		depends on ARCH_INTEGRATOR
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on ARM Integrator platforms.
+
+	config DEBUG_KEYSTONE_UART0
+		bool "Kernel low-level debugging on KEYSTONE2 using UART0"
+		depends on ARCH_KEYSTONE
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART0 serial port on KEYSTONE2 devices.
+
+	config DEBUG_KEYSTONE_UART1
+		bool "Kernel low-level debugging on KEYSTONE2 using UART1"
+		depends on ARCH_KEYSTONE
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART1 serial port on KEYSTONE2 devices.
+
+	config DEBUG_LPC18XX_UART0
+		bool "Kernel low-level debugging via LPC18xx/43xx UART0"
+		depends on ARCH_LPC18XX
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on NXP LPC18xx/43xx UART0.
+
+	config DEBUG_LPC32XX
+		bool "Kernel low-level debugging messages via NXP LPC32xx UART"
+		depends on ARCH_LPC32XX
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on NXP LPC32xx based platforms.
+
+	config DEBUG_MESON_UARTAO
+		bool "Kernel low-level debugging via Meson6 UARTAO"
+		depends on ARCH_MESON
+		help
+		  Say Y here if you want kernel low-lever debugging support
+		  on Amlogic Meson6 based platforms on the UARTAO.
+
+	config DEBUG_MMP_UART2
+		bool "Kernel low-level debugging message via MMP UART2"
+		depends on ARCH_MMP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on MMP UART2.
+
+	config DEBUG_MMP_UART3
+		bool "Kernel low-level debugging message via MMP UART3"
+		depends on ARCH_MMP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on MMP UART3.
+
+	config DEBUG_MVEBU_UART0
+		bool "Kernel low-level debugging messages via MVEBU UART0 (old bootloaders)"
+		depends on ARCH_MVEBU
+		depends on ARCH_MVEBU && CPU_V7
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on MVEBU based platforms on UART0.
+
+		  This option should be used with the old bootloaders
+		  that left the internal registers mapped at
+		  0xd0000000. As of today, this is the case on
+		  platforms such as the Globalscale Mirabox or the
+		  Plathome OpenBlocks AX3, when using the original
+		  bootloader.
+
+		  This option will not work on older Marvell platforms
+		  (Kirkwood, Dove, MV78xx0, Orion5x), which should pick
+		  the "new bootloader" variant.
+
+		  If the wrong DEBUG_MVEBU_UART* option is selected,
+		  when u-boot hands over to the kernel, the system
+		  silently crashes, with no serial output at all.
+
+	config DEBUG_MVEBU_UART0_ALTERNATE
+		bool "Kernel low-level debugging messages via MVEBU UART0 (new bootloaders)"
+		depends on ARCH_MVEBU || ARCH_DOVE || ARCH_MV78XX0 || ARCH_ORION5X
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on MVEBU based platforms on UART0. (Armada XP, Armada 3xx,
+		  Kirkwood, Dove, MV78xx0, Orion5x).
+
+
+		  This option should be used with the new bootloaders
+		  that remap the internal registers at 0xf1000000.
+
+		  If the wrong DEBUG_MVEBU_UART* option is selected,
+		  when u-boot hands over to the kernel, the system
+		  silently crashes, with no serial output at all.
+
+	config DEBUG_MVEBU_UART1_ALTERNATE
+		bool "Kernel low-level debugging messages via MVEBU UART1 (new bootloaders)"
+		depends on ARCH_MVEBU
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on MVEBU based platforms on UART1. (Armada XP, Armada 3xx,
+		  Kirkwood, Dove, MV78xx0, Orion5x).
+
+		  This option should be used with the new bootloaders
+		  that remap the internal registers at 0xf1000000.
+		  All of the older (pre Armada XP/370) platforms also use
+		  this address, regardless of the boot loader version.
+
+		  If the wrong DEBUG_MVEBU_UART* option is selected,
+		  when u-boot hands over to the kernel, the system
+		  silently crashes, with no serial output at all.
+
+	config DEBUG_MSTARV7_PMUART
+		bool "Kernel low-level debugging messages via MSTARV7 PM UART"
+		depends on ARCH_MSTARV7
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  for MSTAR ARMv7-based platforms on PM UART.
+
+	config DEBUG_MT6589_UART0
+		bool "Mediatek mt6589 UART0"
+		depends on ARCH_MEDIATEK
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  for Mediatek mt6589 based platforms on UART0.
+
+	config DEBUG_MT8127_UART0
+		bool "Mediatek mt8127/mt6592 UART0"
+		depends on ARCH_MEDIATEK
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  for Mediatek mt8127 based platforms on UART0.
+
+	config DEBUG_MT8135_UART3
+		bool "Mediatek mt8135 UART3"
+		depends on ARCH_MEDIATEK
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  for Mediatek mt8135 based platforms on UART3.
+
+	config DEBUG_NOMADIK_UART
+		bool "Kernel low-level debugging messages via NOMADIK UART"
+		depends on ARCH_NOMADIK
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on NOMADIK based platforms.
+
+	config DEBUG_NSPIRE_CLASSIC_UART
+		bool "Kernel low-level debugging via TI-NSPIRE 8250 UART"
+		depends on ARCH_NSPIRE
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on TI-NSPIRE classic models.
+
+	config DEBUG_NSPIRE_CX_UART
+		bool "Kernel low-level debugging via TI-NSPIRE PL011 UART"
+		depends on ARCH_NSPIRE
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on TI-NSPIRE CX models.
+
+	config DEBUG_OMAP1UART1
+		bool "Kernel low-level debugging via OMAP1 UART1"
+		depends on ARCH_OMAP1
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP1 based platforms (except OMAP730) on the UART1.
+
+	config DEBUG_OMAP1UART2
+		bool "Kernel low-level debugging via OMAP1 UART2"
+		depends on ARCH_OMAP1
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP1 based platforms (except OMAP730) on the UART2.
+
+	config DEBUG_OMAP1UART3
+		bool "Kernel low-level debugging via OMAP1 UART3"
+		depends on ARCH_OMAP1
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP1 based platforms (except OMAP730) on the UART3.
+
+	config DEBUG_OMAP2UART1
+		bool "OMAP2/3/4 UART1 (omap2/3 sdp boards and some omap3 boards)"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+		help
+		  This covers at least h4, 2430sdp, 3430sdp, 3630sdp,
+		  omap3 torpedo and 3530 lv som.
+
+	config DEBUG_OMAP2UART2
+		bool "Kernel low-level debugging messages via OMAP2/3/4 UART2"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_OMAP2UART3
+		bool "Kernel low-level debugging messages via OMAP2 UART3 (n8x0)"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_OMAP3UART3
+		bool "Kernel low-level debugging messages via OMAP3 UART3 (most omap3 boards)"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+		help
+		  This covers at least cm_t3x, beagle, crane, devkit8000,
+		  igep00x0, ldp, n900, n9(50), pandora, overo, touchbook,
+		  and 3517evm.
+
+	config DEBUG_OMAP4UART3
+		bool "Kernel low-level debugging messages via OMAP4/5 UART3 (omap4 blaze, panda, omap5 sevm)"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_OMAP3UART4
+		bool "Kernel low-level debugging messages via OMAP36XX UART4"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_OMAP4UART4
+		bool "Kernel low-level debugging messages via OMAP4/5 UART4"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_OMAP7XXUART1
+		bool "Kernel low-level debugging via OMAP730 UART1"
+		depends on ARCH_OMAP730
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP730 based platforms on the UART1.
+
+	config DEBUG_OMAP7XXUART2
+		bool "Kernel low-level debugging via OMAP730 UART2"
+		depends on ARCH_OMAP730
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP730 based platforms on the UART2.
+
+	config DEBUG_OMAP7XXUART3
+		bool "Kernel low-level debugging via OMAP730 UART3"
+		depends on ARCH_OMAP730
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on OMAP730 based platforms on the UART3.
+
+	config DEBUG_TI81XXUART1
+		bool "Kernel low-level debugging messages via TI81XX UART1 (ti8148evm)"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_TI81XXUART2
+		bool "Kernel low-level debugging messages via TI81XX UART2"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_TI81XXUART3
+		bool "Kernel low-level debugging messages via TI81XX UART3 (ti8168evm)"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_AM33XXUART1
+		bool "Kernel low-level debugging messages via AM33XX UART1"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_UART_8250
+
+	config DEBUG_ZOOM_UART
+		bool "Kernel low-level debugging messages via Zoom2/3 UART"
+		depends on ARCH_OMAP2PLUS
+		select DEBUG_OMAP2PLUS_UART
+
+	config DEBUG_PXA_UART1
+		depends on ARCH_PXA
+		bool "Use PXA UART1 for low-level debug"
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on PXA UART1.
+
+	config DEBUG_QCOM_UARTDM
+		bool "Kernel low-level debugging messages via QCOM UARTDM"
+		depends on ARCH_QCOM
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the serial port on Qualcomm devices.
+
+		  ARCH      DEBUG_UART_PHYS   DEBUG_UART_VIRT
+		  APQ8064   0x16640000        0xf0040000
+		  APQ8084   0xf995e000        0xfa75e000
+		  IPQ4019   0x078af000        0xf78af000
+		  MSM8X60   0x19c40000        0xf0040000
+		  MSM8960   0x16440000        0xf0040000
+		  MSM8974   0xf991e000        0xfa71e000
+
+		  Please adjust DEBUG_UART_PHYS and DEBUG_UART_BASE configuration
+		  options based on your needs.
+
+	config DEBUG_REALVIEW_STD_PORT
+		bool "RealView Default UART"
+		depends on ARCH_REALVIEW
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the serial port on RealView EB, PB11MP, PBA8
+		  and PBX platforms.
+
+	config DEBUG_REALVIEW_PB1176_PORT
+		bool "RealView PB1176 UART"
+		depends on MACH_REALVIEW_PB1176
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the standard serial port on the RealView
+		  PB1176 platform.
+
+	config DEBUG_RV1108_UART0
+		bool "Kernel low-level debugging messages via Rockchip RV1108 UART0"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+                  on Rockchip RV1108 based platforms.
+
+	config DEBUG_RV1108_UART1
+		bool "Kernel low-level debugging messages via Rockchip RV1108 UART1"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip RV1108 based platforms.
+
+	config DEBUG_RV1108_UART2
+		bool "Kernel low-level debugging messages via Rockchip RV1108 UART2"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip RV1108 based platforms.
+
+	config DEBUG_RK29_UART0
+		bool "Kernel low-level debugging messages via Rockchip RK29 UART0"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip based platforms.
+
+	config DEBUG_RK29_UART1
+		bool "Kernel low-level debugging messages via Rockchip RK29 UART1"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip based platforms.
+
+	config DEBUG_RK29_UART2
+		bool "Kernel low-level debugging messages via Rockchip RK29 UART2"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip based platforms.
+
+	config DEBUG_RK3X_UART0
+		bool "Kernel low-level debugging messages via Rockchip RK30/RK31 UART0"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip based platforms.
+
+	config DEBUG_RK3X_UART1
+		bool "Kernel low-level debugging messages via Rockchip RK30/RK31 UART1"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip based platforms.
+
+	config DEBUG_RK3X_UART2
+		bool "Kernel low-level debugging messages via Rockchip RK30/RK31 UART2"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip based platforms.
+
+	config DEBUG_RK3X_UART3
+		bool "Kernel low-level debugging messages via Rockchip RK30/RK31 UART3"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip based platforms.
+
+	config DEBUG_RK32_UART2
+		bool "Kernel low-level debugging messages via Rockchip RK32 UART2"
+		depends on ARCH_ROCKCHIP
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Rockchip RK32xx based platforms.
+
+	config DEBUG_R7S72100_SCIF2
+		bool "Kernel low-level debugging messages via SCIF2 on R7S72100"
+		depends on ARCH_R7S72100
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF2 on Renesas RZ/A1H (R7S72100).
+
+	config DEBUG_R7S9210_SCIF2
+		bool "Kernel low-level debugging messages via SCIF2 on R7S9210"
+		depends on ARCH_R7S9210
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF2 on Renesas RZ/A2M (R7S9210).
+
+	config DEBUG_R7S9210_SCIF4
+		bool "Kernel low-level debugging messages via SCIF4 on R7S9210"
+		depends on ARCH_R7S9210
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF4 on Renesas RZ/A2M (R7S9210).
+
+	config DEBUG_RCAR_GEN1_SCIF0
+		bool "Kernel low-level debugging messages via SCIF0 on R8A7778"
+		depends on ARCH_R8A7778
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF0 on Renesas R-Car M1A (R8A7778).
+
+	config DEBUG_RCAR_GEN1_SCIF2
+		bool "Kernel low-level debugging messages via SCIF2 on R8A7779"
+		depends on ARCH_R8A7779
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF2 on Renesas R-Car H1 (R8A7779).
+
+	config DEBUG_RCAR_GEN2_SCIF0
+		bool "Kernel low-level debugging messages via SCIF0 on R-Car Gen2 and RZ/G1"
+		depends on ARCH_R8A7743 || ARCH_R8A7744 || ARCH_R8A7790 || \
+			ARCH_R8A7791 || ARCH_R8A7792 || ARCH_R8A7793
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF0 on Renesas RZ/G1M (R8A7743), RZ/G1N (R8A7744),
+		  R-Car H2 (R8A7790), M2-W (R8A7791), V2H (R8A7792), or
+		  M2-N (R8A7793).
+
+	config DEBUG_RCAR_GEN2_SCIF1
+		bool "Kernel low-level debugging messages via SCIF1 on R8A77470"
+		depends on ARCH_R8A77470
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF1 on Renesas RZ/G1C (R8A77470).
+
+	config DEBUG_RCAR_GEN2_SCIF2
+		bool "Kernel low-level debugging messages via SCIF2 on R8A7794"
+		depends on ARCH_R8A7794
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF2 on Renesas R-Car E2 (R8A7794).
+
+	config DEBUG_RCAR_GEN2_SCIF4
+		bool "Kernel low-level debugging messages via SCIF4 on R8A7745"
+		depends on ARCH_R8A7745
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIF4 on Renesas RZ/G1E (R8A7745).
+
+	config DEBUG_RCAR_GEN2_SCIFA2
+		bool "Kernel low-level debugging messages via SCIFA2 on R8A7742"
+		depends on ARCH_R8A7742
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIFA2 on Renesas RZ/G1H (R8A7742).
+
+	config DEBUG_RMOBILE_SCIFA0
+		bool "Kernel low-level debugging messages via SCIFA0 on R8A73A4"
+		depends on ARCH_R8A73A4
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIFA0 on Renesas R-Mobile APE6 (R8A73A4).
+
+	config DEBUG_RMOBILE_SCIFA1
+		bool "Kernel low-level debugging messages via SCIFA1 on R8A7740"
+		depends on ARCH_R8A7740
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIFA1 on Renesas R-Mobile A1 (R8A7740).
+
+	config DEBUG_RMOBILE_SCIFA4
+		bool "Kernel low-level debugging messages via SCIFA4 on SH73A0"
+		depends on ARCH_SH73A0
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  via SCIFA4 on Renesas SH-Mobile AG5 (SH73A0).
+
+	config DEBUG_S3C_UART0
+		depends on PLAT_SAMSUNG || ARCH_S5PV210 || ARCH_EXYNOS
+		select DEBUG_EXYNOS_UART if ARCH_EXYNOS
+		select DEBUG_S3C24XX_UART if ARCH_S3C24XX
+		select DEBUG_S3C64XX_UART if ARCH_S3C64XX
+		select DEBUG_S5PV210_UART if ARCH_S5PV210
+		bool "Use Samsung S3C UART 0 for low-level debug"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART 0. The port must have been initialised
+		  by the boot-loader before use.
+
+	config DEBUG_S3C_UART1
+		depends on PLAT_SAMSUNG || ARCH_S5PV210 || ARCH_EXYNOS
+		select DEBUG_EXYNOS_UART if ARCH_EXYNOS
+		select DEBUG_S3C24XX_UART if ARCH_S3C24XX
+		select DEBUG_S3C64XX_UART if ARCH_S3C64XX
+		select DEBUG_S5PV210_UART if ARCH_S5PV210
+		bool "Use Samsung S3C UART 1 for low-level debug"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART 1. The port must have been initialised
+		  by the boot-loader before use.
+
+	config DEBUG_S3C_UART2
+		depends on PLAT_SAMSUNG || ARCH_S5PV210 || ARCH_EXYNOS
+		select DEBUG_EXYNOS_UART if ARCH_EXYNOS
+		select DEBUG_S3C24XX_UART if ARCH_S3C24XX
+		select DEBUG_S3C64XX_UART if ARCH_S3C64XX
+		select DEBUG_S5PV210_UART if ARCH_S5PV210
+		bool "Use Samsung S3C UART 2 for low-level debug"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART 2. The port must have been initialised
+		  by the boot-loader before use.
+
+	config DEBUG_S3C_UART3
+		depends on ARCH_EXYNOS || ARCH_S5PV210
+		select DEBUG_EXYNOS_UART if ARCH_EXYNOS
+		select DEBUG_S3C64XX_UART if ARCH_S3C64XX
+		select DEBUG_S5PV210_UART if ARCH_S5PV210
+		bool "Use Samsung S3C UART 3 for low-level debug"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART 3. The port must have been initialised
+		  by the boot-loader before use.
+
+	config DEBUG_S3C2410_UART0
+		depends on ARCH_S3C24XX
+		select DEBUG_S3C2410_UART
+		bool "Use S3C2410/S3C2412 UART 0 for low-level debug"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART 0. The port must have been initialised
+		  by the boot-loader before use.
+
+	config DEBUG_S3C2410_UART1
+		depends on ARCH_S3C24XX
+		select DEBUG_S3C2410_UART
+		bool "Use S3C2410/S3C2412 UART 1 for low-level debug"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART 1. The port must have been initialised
+		  by the boot-loader before use.
+
+	config DEBUG_S3C2410_UART2
+		depends on ARCH_S3C24XX
+		select DEBUG_S3C2410_UART
+		bool "Use S3C2410/S3C2412 UART 2 for low-level debug"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART 2. The port must have been initialised
+		  by the boot-loader before use.
+
+	config DEBUG_SA1100
+		depends on ARCH_SA1100
+		bool "Use SA1100 UARTs for low-level debug"
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on SA-11x0 UART ports. The kernel will check for the first
+		  enabled UART in a sequence 3-1-2.
+
+	config DEBUG_SD5203_UART
+		bool "Hisilicon SD5203 Debug UART"
+		depends on ARCH_SD5203
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on SD5203 UART.
+
+	config DEBUG_SOCFPGA_UART0
+		depends on ARCH_INTEL_SOCFPGA
+		bool "Use SOCFPGA UART0 for low-level debug"
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on SOCFPGA(Cyclone 5 and Arria 5) based platforms.
+
+	config DEBUG_SOCFPGA_ARRIA10_UART1
+		depends on ARCH_INTEL_SOCFPGA
+		bool "Use SOCFPGA Arria10 UART1 for low-level debug"
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on SOCFPGA(Arria 10) based platforms.
+
+	config DEBUG_SOCFPGA_CYCLONE5_UART1
+		depends on ARCH_INTEL_SOCFPGA
+		bool "Use SOCFPGA Cyclone 5 UART1 for low-level debug"
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on SOCFPGA(Cyclone 5 and Arria 5) based platforms.
+
+	config DEBUG_SUN9I_UART0
+		bool "Kernel low-level debugging messages via sun9i UART0"
+		depends on MACH_SUN9I
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Allwinner A80 based platforms on the UART0.
+
+	config DEBUG_SUNXI_UART0
+		bool "Kernel low-level debugging messages via sunXi UART0"
+		depends on ARCH_SUNXI
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Allwinner A1X based platforms on the UART0.
+
+	config DEBUG_SUNXI_UART1
+		bool "Kernel low-level debugging messages via sunXi UART1"
+		depends on ARCH_SUNXI
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Allwinner A1X based platforms on the UART1.
+
+	config DEBUG_SUNXI_R_UART
+		bool "Kernel low-level debugging messages via sunXi R_UART"
+		depends on MACH_SUN6I || MACH_SUN8I
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Allwinner A31/A23 based platforms on the R_UART.
+
+	config DEBUG_SPEAR3XX
+		bool "Kernel low-level debugging messages via ST SPEAr 3xx/6xx UART"
+		depends on ARCH_SPEAR3XX || ARCH_SPEAR6XX
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on ST SPEAr based platforms.
+
+	config DEBUG_SPEAR13XX
+		bool "Kernel low-level debugging messages via ST SPEAr 13xx UART"
+		depends on ARCH_SPEAR13XX
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on ST SPEAr13xx based platforms.
+
+	config DEBUG_STIH41X_ASC2
+		bool "Use StiH415/416 ASC2 UART for low-level debug"
+		depends on ARCH_STI
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on STiH415/416 based platforms like b2000, which has
+		  default UART wired up to ASC2.
+
+		  If unsure, say N.
+
+	config DEBUG_STIH41X_SBC_ASC1
+		bool "Use StiH415/416 SBC ASC1 UART for low-level debug"
+		depends on ARCH_STI
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on STiH415/416 based platforms like b2020. which has
+		  default UART wired up to SBC ASC1.
+
+		  If unsure, say N.
+
+	config DEBUG_STIH418_SBC_ASC0
+		bool "Use StiH418 SBC ASC0 UART for low-level debug"
+		depends on ARCH_STI
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on STiH418 based platforms which has default UART wired
+		  up to SBC ASC0.
+
+		  If unsure, say N.
+
+	config STM32F4_DEBUG_UART
+		bool "Use STM32F4 UART for low-level debug"
+		depends on MACH_STM32F429 || MACH_STM32F469
+		select DEBUG_STM32_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on STM32F4 based platforms, which default UART is wired on
+		  USART1, but another UART instance can be selected by modifying
+		  CONFIG_DEBUG_UART_PHYS.
+
+		  If unsure, say N.
+
+	config STM32F7_DEBUG_UART
+		bool "Use STM32F7 UART for low-level debug"
+		depends on MACH_STM32F746 || MACH_STM32F769
+		select DEBUG_STM32_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on STM32F7 based platforms, which default UART is wired on
+		  USART1, but another UART instance can be selected by modifying
+		  CONFIG_DEBUG_UART_PHYS.
+
+		  If unsure, say N.
+
+	config STM32H7_DEBUG_UART
+		bool "Use STM32H7 UART for low-level debug"
+		depends on MACH_STM32H743
+		select DEBUG_STM32_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on STM32H7 based platforms, which default UART is wired on
+		  USART1, but another UART instance can be selected by modifying
+		  CONFIG_DEBUG_UART_PHYS.
+
+		  If unsure, say N.
+
+	config STM32MP1_DEBUG_UART
+		bool "Use STM32MP1 UART for low-level debug"
+		depends on MACH_STM32MP157
+		select DEBUG_STM32_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on STM32MP1 based platforms, wich default UART is wired on
+		  UART4, but another UART instance can be selected by modifying
+		  CONFIG_DEBUG_UART_PHYS and CONFIG_DEBUG_UART_VIRT.
+
+		  If unsure, say N.
+
+	config TEGRA_DEBUG_UART_AUTO_ODMDATA
+		bool "Kernel low-level debugging messages via Tegra UART via ODMDATA"
+		depends on ARCH_TEGRA
+		select DEBUG_TEGRA_UART
+		help
+		  Automatically determines which UART to use for low-level
+		  debug based on the ODMDATA value. This value is part of
+		  the BCT, and is written to the boot memory device using
+		  nvflash, or other flashing tool.  When bits 19:18 are 3,
+		  then bits 17:15 indicate which UART to use; 0/1/2/3/4
+		  are UART A/B/C/D/E.
+
+	config TEGRA_DEBUG_UARTA
+		bool "Kernel low-level debugging messages via Tegra UART A"
+		depends on ARCH_TEGRA
+		select DEBUG_TEGRA_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Tegra based platforms.
+
+	config TEGRA_DEBUG_UARTB
+		bool "Kernel low-level debugging messages via Tegra UART B"
+		depends on ARCH_TEGRA
+		select DEBUG_TEGRA_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Tegra based platforms.
+
+	config TEGRA_DEBUG_UARTC
+		bool "Kernel low-level debugging messages via Tegra UART C"
+		depends on ARCH_TEGRA
+		select DEBUG_TEGRA_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Tegra based platforms.
+
+	config TEGRA_DEBUG_UARTD
+		bool "Kernel low-level debugging messages via Tegra UART D"
+		depends on ARCH_TEGRA
+		select DEBUG_TEGRA_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Tegra based platforms.
+
+	config TEGRA_DEBUG_UARTE
+		bool "Kernel low-level debugging messages via Tegra UART E"
+		depends on ARCH_TEGRA
+		select DEBUG_TEGRA_UART
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Tegra based platforms.
+
+	config DEBUG_UX500_UART
+		depends on ARCH_U8500
+		bool "Use Ux500 UART for low-level debug"
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Ux500 based platforms.
+
+	config DEBUG_VERSATILE
+		bool "Kernel low-level debugging messages via ARM Versatile UART"
+		depends on ARCH_VERSATILE
+		select DEBUG_UART_PL01X
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on ARM Versatile platforms.
+
+	config DEBUG_VEXPRESS_UART0_DETECT
+		bool "Autodetect UART0 on Versatile Express Cortex-A core tiles"
+		depends on ARCH_VEXPRESS && CPU_CP15_MMU
+		help
+		  This option enables a simple heuristic which tries to determine
+		  the motherboard's memory map variant (original or RS1) and then
+		  choose the relevant UART0 base address.
+
+		  Note that this will only work with standard A-class core tiles,
+		  and may fail with non-standard SMM or custom software models.
+
+	config DEBUG_VEXPRESS_UART0_CA9
+		bool "Use PL011 UART0 at 0x10009000 (V2P-CA9 core tile)"
+		depends on ARCH_VEXPRESS
+		select DEBUG_UART_PL01X
+		help
+		  This option selects UART0 at 0x10009000. Except for custom models,
+		  this applies only to the V2P-CA9 tile.
+
+	config DEBUG_VEXPRESS_UART0_RS1
+		bool "Use PL011 UART0 at 0x1c090000 (RS1 complaint tiles)"
+		depends on ARCH_VEXPRESS
+		select DEBUG_UART_PL01X
+		help
+		  This option selects UART0 at 0x1c090000. This applies to most
+		  of the tiles using the RS1 memory map, including all new A-class
+		  core tiles, FPGA-based SMMs and software models.
+
+	config DEBUG_VEXPRESS_UART0_CRX
+		bool "Use PL011 UART0 at 0xb0090000 (Cortex-R compliant tiles)"
+		depends on ARCH_VEXPRESS && !MMU
+		select DEBUG_UART_PL01X
+		help
+		  This option selects UART0 at 0xb0090000. This is appropriate for
+		  Cortex-R series tiles and SMMs, such as Cortex-R5 and Cortex-R7
+
+	config DEBUG_VF_UART
+		bool "Vybrid UART"
+		depends on SOC_VF610
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on Vybrid based platforms.
+
+	config DEBUG_VT8500_UART0
+		bool "Use UART0 on VIA/Wondermedia SoCs"
+		depends on ARCH_VT8500
+		help
+		  This option selects UART0 on VIA/Wondermedia System-on-a-chip
+		  devices, including VT8500, WM8505, WM8650 and WM8850.
+
+	config DEBUG_ZYNQ_UART0
+		bool "Kernel low-level debugging on Xilinx Zynq using UART0"
+		depends on ARCH_ZYNQ
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART0 on the Zynq platform.
+
+	config DEBUG_ZYNQ_UART1
+		bool "Kernel low-level debugging on Xilinx Zynq using UART1"
+		depends on ARCH_ZYNQ
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to UART1 on the Zynq platform.
+
+		  If you have a ZC702 board and want early boot messages to
+		  appear on the USB serial adaptor, select this option.
+
+	config DEBUG_ICEDCC
+		bool "Kernel low-level debugging via EmbeddedICE DCC channel"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the EmbeddedICE macrocell's DCC channel using
+		  co-processor 14. This is known to work on the ARM9 style ICE
+		  channel and on the XScale with the PEEDI.
+
+		  Note that the system will appear to hang during boot if there
+		  is nothing connected to read from the DCC.
+
+	config DEBUG_SEMIHOSTING
+		bool "Kernel low-level debug output via semihosting I/O"
+		help
+		  Semihosting enables code running on an ARM target to use
+		  the I/O facilities on a host debugger/emulator through a
+		  simple SVC call. The host debugger or emulator must have
+		  semihosting enabled for the special svc call to be trapped
+		  otherwise the kernel will crash.
+
+		  This is known to work with OpenOCD, as well as
+		  ARM's Fast Models, or any other controlling environment
+		  that implements semihosting.
+
+		  For more details about semihosting, please see
+		  chapter 8 of DUI0203I_rvct_developer_guide.pdf from ARM Ltd.
+
+	config DEBUG_LL_UART_8250
+		bool "Kernel low-level debugging via 8250 UART"
+		help
+		  Say Y here if you wish the debug print routes to direct
+		  their output to an 8250 UART.  You can use this option
+		  to provide the parameters for the 8250 UART rather than
+		  selecting one of the platform specific options above if
+		  you know the parameters for the port.
+
+		  This option is preferred over the platform specific
+		  options; the platform specific options are deprecated
+		  and will be soon removed.
+
+	config DEBUG_LL_UART_PL01X
+		bool "Kernel low-level debugging via ARM Ltd PL01x Primecell UART"
+		help
+		  Say Y here if you wish the debug print routes to direct
+		  their output to a PL01x Primecell UART.  You can use
+		  this option to provide the parameters for the UART
+		  rather than selecting one of the platform specific
+		  options above if you know the parameters for the port.
+
+		  This option is preferred over the platform specific
+		  options; the platform specific options are deprecated
+		  and will be soon removed.
+
+endchoice
+
+config DEBUG_AT91_UART
+	bool
+	depends on ARCH_AT91
+
+config DEBUG_EXYNOS_UART
+	bool
+
+config DEBUG_S3C2410_UART
+	bool
+	select DEBUG_S3C24XX_UART
+
+config DEBUG_S3C24XX_UART
+	bool
+
+config DEBUG_S3C64XX_UART
+	bool
+
+config DEBUG_S5PV210_UART
+	bool
+
+config DEBUG_S3C_UART
+	depends on DEBUG_S3C2410_UART || DEBUG_S3C24XX_UART || \
+		   DEBUG_S3C64XX_UART ||  DEBUG_S5PV210_UART || \
+		   DEBUG_EXYNOS_UART
+	int
+	default "0" if DEBUG_S3C_UART0
+	default "1" if DEBUG_S3C_UART1
+	default "2" if DEBUG_S3C_UART2
+	default "3" if DEBUG_S3C_UART3
+
+config DEBUG_OMAP2PLUS_UART
+	bool
+	depends on ARCH_OMAP2PLUS
+
+config DEBUG_IMX_UART_PORT
+	int "i.MX Debug UART Port Selection"
+	depends on DEBUG_IMX1_UART || \
+		   DEBUG_IMX25_UART || \
+		   DEBUG_IMX27_UART || \
+		   DEBUG_IMX31_UART || \
+		   DEBUG_IMX35_UART || \
+		   DEBUG_IMX50_UART || \
+		   DEBUG_IMX51_UART || \
+		   DEBUG_IMX53_UART || \
+		   DEBUG_IMX6Q_UART || \
+		   DEBUG_IMX6SL_UART || \
+		   DEBUG_IMX6SX_UART || \
+		   DEBUG_IMX6UL_UART || \
+		   DEBUG_IMX7D_UART
+	default 1
+	help
+	  Choose UART port on which kernel low-level debug messages
+	  should be output.
+
+config DEBUG_VF_UART_PORT
+	int "Vybrid Debug UART Port Selection" if DEBUG_VF_UART
+	default 1
+	range 0 3
+	depends on SOC_VF610
+	help
+	  Choose UART port on which kernel low-level debug messages
+	  should be output.
+
+config DEBUG_TEGRA_UART
+	bool
+	depends on ARCH_TEGRA
+
+config DEBUG_STM32_UART
+	bool
+	depends on ARCH_STM32
+
+config DEBUG_UART_FLOW_CONTROL
+	bool "Enable flow control (CTS) for the debug UART"
+	depends on DEBUG_LL
+	default y if DEBUG_FOOTBRIDGE_COM1 || DEBUG_GEMINI || ARCH_RPC
+	help
+	  Some UART ports are connected to terminals that will use modem
+	  control signals to indicate whether they are ready to receive text.
+	  In practice this means that the terminal is asserting the special
+	  control signal CTS (Clear To Send). If your debug UART supports
+	  this and your debug terminal will require it, enable this option.
+
+config DEBUG_LL_INCLUDE
+	string
+	default "debug/sa1100.S" if DEBUG_SA1100
+	default "debug/palmchip.S" if DEBUG_UART_8250_PALMCHIP
+	default "debug/8250.S" if DEBUG_LL_UART_8250 || DEBUG_UART_8250
+	default "debug/at91.S" if DEBUG_AT91_UART
+	default "debug/asm9260.S" if DEBUG_ASM9260_UART
+	default "debug/clps711x.S" if DEBUG_CLPS711X_UART1 || DEBUG_CLPS711X_UART2
+	default "debug/dc21285.S" if DEBUG_DC21285_PORT
+	default "debug/meson.S" if DEBUG_MESON_UARTAO
+	default "debug/pl01x.S" if DEBUG_LL_UART_PL01X || DEBUG_UART_PL01X
+	default "debug/exynos.S" if DEBUG_EXYNOS_UART
+	default "debug/icedcc.S" if DEBUG_ICEDCC
+	default "debug/imx.S" if DEBUG_IMX1_UART || \
+				 DEBUG_IMX25_UART || \
+				 DEBUG_IMX27_UART || \
+				 DEBUG_IMX31_UART || \
+				 DEBUG_IMX35_UART || \
+				 DEBUG_IMX50_UART || \
+				 DEBUG_IMX51_UART || \
+				 DEBUG_IMX53_UART || \
+				 DEBUG_IMX6Q_UART || \
+				 DEBUG_IMX6SL_UART || \
+				 DEBUG_IMX6SX_UART || \
+				 DEBUG_IMX6UL_UART || \
+				 DEBUG_IMX7D_UART
+	default "debug/msm.S" if DEBUG_QCOM_UARTDM
+	default "debug/omap2plus.S" if DEBUG_OMAP2PLUS_UART
+	default "debug/renesas-scif.S" if DEBUG_R7S72100_SCIF2
+	default "debug/renesas-scif.S" if DEBUG_R7S9210_SCIF2
+	default "debug/renesas-scif.S" if DEBUG_R7S9210_SCIF4
+	default "debug/renesas-scif.S" if DEBUG_RCAR_GEN1_SCIF0
+	default "debug/renesas-scif.S" if DEBUG_RCAR_GEN1_SCIF2
+	default "debug/renesas-scif.S" if DEBUG_RCAR_GEN2_SCIF0
+	default "debug/renesas-scif.S" if DEBUG_RCAR_GEN2_SCIF1
+	default "debug/renesas-scif.S" if DEBUG_RCAR_GEN2_SCIF2
+	default "debug/renesas-scif.S" if DEBUG_RCAR_GEN2_SCIF4
+	default "debug/renesas-scif.S" if DEBUG_RCAR_GEN2_SCIFA2
+	default "debug/renesas-scif.S" if DEBUG_RMOBILE_SCIFA0
+	default "debug/renesas-scif.S" if DEBUG_RMOBILE_SCIFA1
+	default "debug/renesas-scif.S" if DEBUG_RMOBILE_SCIFA4
+	default "debug/s3c24xx.S" if DEBUG_S3C24XX_UART || DEBUG_S3C64XX_UART
+	default "debug/s5pv210.S" if DEBUG_S5PV210_UART
+	default "debug/sti.S" if DEBUG_STIH41X_ASC2
+	default "debug/sti.S" if DEBUG_STIH41X_SBC_ASC1
+	default "debug/sti.S" if DEBUG_STIH418_SBC_ASC0
+	default "debug/stm32.S" if DEBUG_STM32_UART
+	default "debug/tegra.S" if DEBUG_TEGRA_UART
+	default "debug/ux500.S" if DEBUG_UX500_UART
+	default "debug/vexpress.S" if DEBUG_VEXPRESS_UART0_DETECT
+	default "debug/vf.S" if DEBUG_VF_UART
+	default "debug/vt8500.S" if DEBUG_VT8500_UART0
+	default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1
+	default "debug/bcm63xx.S" if DEBUG_BCM63XX_UART
+	default "debug/digicolor.S" if DEBUG_DIGICOLOR_UA0
+	default "debug/brcmstb.S" if DEBUG_BRCMSTB_UART
+	default "mach/debug-macro.S"
+
+# Compatibility options for PL01x
+config DEBUG_UART_PL01X
+	bool
+
+# Compatibility options for 8250
+config DEBUG_UART_8250
+	def_bool ARCH_IOP32X || ARCH_IXP4XX || ARCH_RPC
+
+config DEBUG_UART_PHYS
+	hex "Physical base address of debug UART"
+	default 0x01c20000 if DEBUG_DAVINCI_DMx_UART0
+	default 0x01c28000 if DEBUG_SUNXI_UART0
+	default 0x01c28400 if DEBUG_SUNXI_UART1
+	default 0x01d0c000 if DEBUG_DAVINCI_DA8XX_UART1
+	default 0x01d0d000 if DEBUG_DAVINCI_DA8XX_UART2
+	default 0x01f02800 if DEBUG_SUNXI_R_UART
+	default 0x02530c00 if DEBUG_KEYSTONE_UART0
+	default 0x02531000 if DEBUG_KEYSTONE_UART1
+	default 0x03010fe0 if ARCH_RPC
+	default 0x07000000 if DEBUG_SUN9I_UART0
+	default 0x09530000 if DEBUG_STIH418_SBC_ASC0
+	default 0x10009000 if DEBUG_REALVIEW_STD_PORT || \
+				DEBUG_VEXPRESS_UART0_CA9
+	default 0x1010c000 if DEBUG_REALVIEW_PB1176_PORT
+	default 0x10124000 if DEBUG_RK3X_UART0
+	default 0x10126000 if DEBUG_RK3X_UART1
+	default 0x101f1000 if DEBUG_VERSATILE
+	default 0x101fb000 if DEBUG_NOMADIK_UART
+	default 0x10210000 if DEBUG_RV1108_UART2
+	default 0x10220000 if DEBUG_RV1108_UART1
+	default 0x10230000 if DEBUG_RV1108_UART0
+	default 0x11002000 if DEBUG_MT8127_UART0
+	default 0x11006000 if DEBUG_MT6589_UART0
+	default 0x11009000 if DEBUG_MT8135_UART3
+	default 0x16000000 if DEBUG_INTEGRATOR
+	default 0x1600d000 if DEBUG_SD5203_UART
+	default 0x18000300 if DEBUG_BCM_5301X
+	default 0x18000400 if DEBUG_BCM_HR2
+	default 0x18023000 if DEBUG_BCM_IPROC_UART3
+	default 0x1c090000 if DEBUG_VEXPRESS_UART0_RS1
+	default 0x1f221000 if DEBUG_MSTARV7_PMUART
+	default 0x20001000 if DEBUG_HIP01_UART
+	default 0x20060000 if DEBUG_RK29_UART0
+	default 0x20064000 if DEBUG_RK29_UART1 || DEBUG_RK3X_UART2
+	default 0x20068000 if DEBUG_RK29_UART2 || DEBUG_RK3X_UART3
+	default 0x20201000 if DEBUG_BCM2835
+	default 0x3e000000 if DEBUG_BCM_KONA_UART
+	default 0x3f201000 if DEBUG_BCM2836
+	default 0x40010000 if STM32MP1_DEBUG_UART
+	default 0x40011000 if STM32F4_DEBUG_UART || STM32F7_DEBUG_UART || \
+				STM32H7_DEBUG_UART
+	default 0x40028000 if DEBUG_AT91_SAMV7_USART1
+	default 0x40081000 if DEBUG_LPC18XX_UART0
+	default 0x40090000 if DEBUG_LPC32XX
+	default 0x40100000 if DEBUG_PXA_UART1
+	default 0x42000000 if DEBUG_GEMINI
+	default 0x44e09000 if DEBUG_AM33XXUART1
+	default 0x48020000 if DEBUG_OMAP4UART3 || DEBUG_TI81XXUART1
+	default 0x48022000 if DEBUG_TI81XXUART2
+	default 0x48024000 if DEBUG_TI81XXUART3
+	default 0x4806a000 if DEBUG_OMAP2UART1 || DEBUG_OMAP3UART1 || \
+				DEBUG_OMAP4UART1 || DEBUG_OMAP5UART1
+	default 0x4806c000 if DEBUG_OMAP2UART2 || DEBUG_OMAP3UART2 || \
+				DEBUG_OMAP4UART2 || DEBUG_OMAP5UART2
+	default 0x4806e000 if DEBUG_OMAP2UART3 || DEBUG_OMAP4UART4
+	default 0x49020000 if DEBUG_OMAP3UART3
+	default 0x49042000 if DEBUG_OMAP3UART4
+	default 0x50000000 if DEBUG_S3C24XX_UART && (DEBUG_S3C_UART0 || \
+				DEBUG_S3C2410_UART0)
+	default 0x50004000 if DEBUG_S3C24XX_UART && (DEBUG_S3C_UART1 || \
+				DEBUG_S3C2410_UART1)
+	default 0x50008000 if DEBUG_S3C24XX_UART && (DEBUG_S3C_UART2 || \
+				DEBUG_S3C2410_UART2)
+	default 0x78000000 if DEBUG_CNS3XXX
+	default 0x7c0003f8 if DEBUG_FOOTBRIDGE_COM1
+	default 0x7f005000 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART0
+	default 0x7f005400 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART1
+	default 0x7f005800 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART2
+	default 0x7f005c00 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART3
+	default 0x80010000 if DEBUG_ASM9260_UART
+	default 0x80070000 if DEBUG_IMX23_UART
+	default 0x80074000 if DEBUG_IMX28_UART
+	default 0x808c0000 if DEBUG_EP93XX || ARCH_EP93XX
+	default 0x90020000 if DEBUG_NSPIRE_CLASSIC_UART || DEBUG_NSPIRE_CX_UART
+	default 0xb0090000 if DEBUG_VEXPRESS_UART0_CRX
+	default 0xc8000000 if ARCH_IXP4XX && !CPU_BIG_ENDIAN
+	default 0xc8000003 if ARCH_IXP4XX && CPU_BIG_ENDIAN
+	default 0xd0000000 if DEBUG_SPEAR3XX
+	default 0xd0012000 if DEBUG_MVEBU_UART0
+	default 0xc81004c0 if DEBUG_MESON_UARTAO
+	default 0xd4017000 if DEBUG_MMP_UART2
+	default 0xd4018000 if DEBUG_MMP_UART3
+	default 0xe0000000 if DEBUG_SPEAR13XX
+	default 0xe1824200 if DEBUG_AT91_SAMA7G5_FLEXCOM3
+	default 0xe4007000 if DEBUG_HIP04_UART
+	default 0xe6c40000 if DEBUG_RMOBILE_SCIFA0
+	default 0xe6c50000 if DEBUG_RMOBILE_SCIFA1
+	default 0xe6c60000 if DEBUG_RCAR_GEN2_SCIFA2
+	default 0xe6c80000 if DEBUG_RMOBILE_SCIFA4
+	default 0xe6e58000 if DEBUG_RCAR_GEN2_SCIF2
+	default 0xe6e60000 if DEBUG_RCAR_GEN2_SCIF0
+	default 0xe6e68000 if DEBUG_RCAR_GEN2_SCIF1
+	default 0xe6ee0000 if DEBUG_RCAR_GEN2_SCIF4
+	default 0xe8008000 if DEBUG_R7S72100_SCIF2 || DEBUG_R7S9210_SCIF2
+	default 0xe8009000 if DEBUG_R7S9210_SCIF4
+	default 0xf0000000 if DEBUG_DIGICOLOR_UA0
+	default 0xf1012000 if DEBUG_MVEBU_UART0_ALTERNATE
+	default 0xf1012100 if DEBUG_MVEBU_UART1_ALTERNATE
+	default 0xf7fc9000 if DEBUG_BERLIN_UART
+	default 0xf8020000 if DEBUG_AT91_SAMA5D2_UART1
+	default 0xf8b00000 if DEBUG_HIX5HD2_UART
+	default 0xf991e000 if DEBUG_QCOM_UARTDM
+	default 0xfc00c000 if DEBUG_AT91_SAMA5D4_USART3
+	default 0xfcb00000 if DEBUG_HI3620_UART
+	default 0xfd883000 if DEBUG_ALPINE_UART0
+	default 0xfe531000 if DEBUG_STIH41X_SBC_ASC1
+	default 0xfe800000 if ARCH_IOP32X
+	default 0xfed32000 if DEBUG_STIH41X_ASC2
+	default 0xff690000 if DEBUG_RK32_UART2
+	default 0xffc02000 if DEBUG_SOCFPGA_UART0
+	default 0xffc02100 if DEBUG_SOCFPGA_ARRIA10_UART1
+	default 0xffc03000 if DEBUG_SOCFPGA_CYCLONE5_UART1
+	default 0xffe40000 if DEBUG_RCAR_GEN1_SCIF0
+	default 0xffe42000 if DEBUG_RCAR_GEN1_SCIF2
+	default 0xfff36000 if DEBUG_HIGHBANK_UART
+	default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1
+	default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2
+	default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3
+	default 0xfffe8600 if DEBUG_BCM63XX_UART
+	default 0xffffee00 if DEBUG_AT91_SAM9263_DBGU
+	default 0xfffff200 if DEBUG_AT91_RM9200_DBGU
+	depends on ARCH_EP93XX || \
+	        DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
+		DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \
+		DEBUG_QCOM_UARTDM || DEBUG_R7S72100_SCIF2 || \
+		DEBUG_R7S9210_SCIF2 || DEBUG_R7S9210_SCIF4 || \
+		DEBUG_RCAR_GEN1_SCIF0 || DEBUG_RCAR_GEN1_SCIF2 || \
+		DEBUG_RCAR_GEN2_SCIF0 || DEBUG_RCAR_GEN2_SCIF1 || \
+		DEBUG_RCAR_GEN2_SCIF2 || DEBUG_RCAR_GEN2_SCIF4 || \
+		DEBUG_RCAR_GEN2_SCIFA2 || \
+		DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \
+		DEBUG_RMOBILE_SCIFA4 || DEBUG_S3C24XX_UART || \
+		DEBUG_S3C64XX_UART || \
+		DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
+		DEBUG_DIGICOLOR_UA0 || \
+		DEBUG_AT91_UART || DEBUG_STM32_UART || \
+		DEBUG_STIH41X_ASC2 || DEBUG_STIH41X_SBC_ASC1 || \
+		DEBUG_STIH418_SBC_ASC0
+
+config DEBUG_UART_VIRT
+	hex "Virtual base address of debug UART"
+	default 0xc881f000 if DEBUG_RV1108_UART2
+	default 0xc8821000 if DEBUG_RV1108_UART1
+	default 0xc8912000 if DEBUG_RV1108_UART0
+	default 0xe0010fe0 if ARCH_RPC
+	default 0xe0824200 if DEBUG_AT91_SAMA7G5_FLEXCOM3
+	default 0xf0010000 if DEBUG_ASM9260_UART
+	default 0xf0100000 if DEBUG_DIGICOLOR_UA0
+	default 0xf01fb000 if DEBUG_NOMADIK_UART
+	default 0xf0201000 if DEBUG_BCM2835 || DEBUG_BCM2836
+	default 0xf0221000 if DEBUG_MSTARV7_PMUART
+	default 0xf1000300 if DEBUG_BCM_5301X
+	default 0xf1000400 if DEBUG_BCM_HR2
+	default 0xf1002000 if DEBUG_MT8127_UART0
+	default 0xf1006000 if DEBUG_MT6589_UART0
+	default 0xf1009000 if DEBUG_MT8135_UART3
+	default 0xf1023000 if DEBUG_BCM_IPROC_UART3
+	default 0xf11f1000 if DEBUG_VERSATILE
+	default 0xf1600000 if DEBUG_INTEGRATOR
+	default 0xf1c28000 if DEBUG_SUNXI_UART0
+	default 0xf1c28400 if DEBUG_SUNXI_UART1
+	default 0xf1f02800 if DEBUG_SUNXI_R_UART
+	default 0xf31004c0 if DEBUG_MESON_UARTAO
+	default 0xf4090000 if DEBUG_LPC32XX
+	default 0xf4200000 if DEBUG_GEMINI
+	default 0xf6200000 if DEBUG_PXA_UART1
+	default 0xf7000000 if DEBUG_SUN9I_UART0
+	default 0xf7000000 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART0
+	default 0xf7000000 if DEBUG_S3C24XX_UART && (DEBUG_S3C_UART0 || \
+				DEBUG_S3C2410_UART0)
+	default 0xf7000400 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART1
+	default 0xf7000800 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART2
+	default 0xf7000c00 if DEBUG_S3C64XX_UART && DEBUG_S3C_UART3
+	default 0xf7004000 if DEBUG_S3C24XX_UART && (DEBUG_S3C_UART1 || \
+				DEBUG_S3C2410_UART1)
+	default 0xf7008000 if DEBUG_S3C24XX_UART && (DEBUG_S3C_UART2 || \
+				DEBUG_S3C2410_UART2)
+	default 0xf7020000 if DEBUG_AT91_SAMA5D2_UART1
+	default 0xf7fc9000 if DEBUG_BERLIN_UART
+	default 0xf8007000 if DEBUG_HIP04_UART
+	default 0xf8009000 if DEBUG_VEXPRESS_UART0_CA9
+	default 0xf8090000 if DEBUG_VEXPRESS_UART0_RS1
+	default 0xf8ffee00 if DEBUG_AT91_SAM9263_DBGU
+	default 0xf8fff200 if DEBUG_AT91_RM9200_DBGU
+	default 0xf9530000 if DEBUG_STIH418_SBC_ASC0
+	default 0xf9e09000 if DEBUG_AM33XXUART1
+	default 0xfa020000 if DEBUG_OMAP4UART3 || DEBUG_TI81XXUART1
+	default 0xfa022000 if DEBUG_TI81XXUART2
+	default 0xfa024000 if DEBUG_TI81XXUART3
+	default 0xfa06a000 if DEBUG_OMAP2UART1 || DEBUG_OMAP3UART1 || \
+				DEBUG_OMAP4UART1 || DEBUG_OMAP5UART1
+	default 0xfa06c000 if DEBUG_OMAP2UART2 || DEBUG_OMAP3UART2 || \
+				DEBUG_OMAP4UART2 || DEBUG_OMAP5UART2
+	default 0xfa06e000 if DEBUG_OMAP2UART3 || DEBUG_OMAP4UART4
+	default 0xfa71e000 if DEBUG_QCOM_UARTDM
+	default 0xfb002000 if DEBUG_CNS3XXX
+	default 0xfb009000 if DEBUG_REALVIEW_STD_PORT
+	default 0xfb00c000 if DEBUG_AT91_SAMA5D4_USART3
+	default 0xfb020000 if DEBUG_OMAP3UART3
+	default 0xfb042000 if DEBUG_OMAP3UART4
+	default 0xfb10c000 if DEBUG_REALVIEW_PB1176_PORT
+	default 0xfcfe8600 if DEBUG_BCM63XX_UART
+	default 0xfd000000 if DEBUG_SPEAR3XX || DEBUG_SPEAR13XX
+	default 0xfd531000 if DEBUG_STIH41X_SBC_ASC1
+	default 0xfd883000 if DEBUG_ALPINE_UART0
+	default 0xfdd32000 if DEBUG_STIH41X_ASC2
+	default 0xfe010000 if STM32MP1_DEBUG_UART
+	default 0xfe017000 if DEBUG_MMP_UART2
+	default 0xfe018000 if DEBUG_MMP_UART3
+	default 0xfe100000 if DEBUG_IMX23_UART || DEBUG_IMX28_UART
+	default 0xfe300000 if DEBUG_BCM_KONA_UART
+	default 0xfe800000 if ARCH_IOP32X
+	default 0xfeb00000 if DEBUG_HI3620_UART || DEBUG_HIX5HD2_UART
+	default 0xfeb24000 if DEBUG_RK3X_UART0
+	default 0xfeb26000 if DEBUG_RK3X_UART1
+	default 0xfeb30c00 if DEBUG_KEYSTONE_UART0
+	default 0xfeb31000 if DEBUG_KEYSTONE_UART1
+	default 0xfec02000 if DEBUG_SOCFPGA_UART0
+	default 0xfec02100 if DEBUG_SOCFPGA_ARRIA10_UART1
+	default 0xfec03000 if DEBUG_SOCFPGA_CYCLONE5_UART1
+	default 0xfec12000 if DEBUG_MVEBU_UART0 || DEBUG_MVEBU_UART0_ALTERNATE
+	default 0xfec12100 if DEBUG_MVEBU_UART1_ALTERNATE
+	default 0xfec20000 if DEBUG_DAVINCI_DMx_UART0
+	default 0xfec90000 if DEBUG_RK32_UART2
+	default 0xfed0c000 if DEBUG_DAVINCI_DA8XX_UART1
+	default 0xfed0d000 if DEBUG_DAVINCI_DA8XX_UART2 || DEBUG_SD5203_UART
+	default 0xfed60000 if DEBUG_RK29_UART0
+	default 0xfed64000 if DEBUG_RK29_UART1 || DEBUG_RK3X_UART2
+	default 0xfed68000 if DEBUG_RK29_UART2 || DEBUG_RK3X_UART3
+	default 0xfedc0000 if DEBUG_EP93XX
+	default 0xfee003f8 if DEBUG_FOOTBRIDGE_COM1
+	default 0xfee20000 if DEBUG_NSPIRE_CLASSIC_UART || DEBUG_NSPIRE_CX_UART
+	default 0xfec00000 if ARCH_IXP4XX && !CPU_BIG_ENDIAN
+	default 0xfec00003 if ARCH_IXP4XX && CPU_BIG_ENDIAN
+	default 0xfef36000 if DEBUG_HIGHBANK_UART
+	default 0xfefb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1
+	default 0xfefb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2
+	default 0xfefb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3
+	default 0xffd01000 if DEBUG_HIP01_UART
+	default DEBUG_UART_PHYS if !MMU
+	depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
+		DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \
+		DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \
+		DEBUG_S3C64XX_UART || \
+		DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
+		DEBUG_DIGICOLOR_UA0 || \
+		DEBUG_AT91_UART || DEBUG_STM32_UART || \
+		DEBUG_STIH41X_ASC2 || DEBUG_STIH41X_SBC_ASC1 || \
+		DEBUG_STIH418_SBC_ASC0
+
+config DEBUG_UART_8250_SHIFT
+	int "Register offset shift for the 8250 debug UART"
+	depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250
+	default 0 if DEBUG_FOOTBRIDGE_COM1 || ARCH_IOP32X || DEBUG_BCM_5301X || \
+		DEBUG_BCM_HR2 || DEBUG_OMAP7XXUART1 || DEBUG_OMAP7XXUART2 || \
+		DEBUG_OMAP7XXUART3
+	default 3 if DEBUG_MSTARV7_PMUART
+	default 2
+
+config DEBUG_UART_8250_WORD
+	bool "Use 32-bit accesses for 8250 UART"
+	depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250
+	depends on DEBUG_UART_8250_SHIFT >= 2
+	default y if DEBUG_SOCFPGA_UART0 || DEBUG_SOCFPGA_ARRIA10_UART1 || \
+		DEBUG_SOCFPGA_CYCLONE5_UART1 || DEBUG_KEYSTONE_UART0 || \
+		DEBUG_KEYSTONE_UART1 || DEBUG_ALPINE_UART0 || \
+		DEBUG_DAVINCI_DMx_UART0 || DEBUG_DAVINCI_DA8XX_UART1 || \
+		DEBUG_DAVINCI_DA8XX_UART2 || DEBUG_BCM_IPROC_UART3 || \
+		DEBUG_BCM_KONA_UART || DEBUG_RK32_UART2
+
+config DEBUG_UART_8250_PALMCHIP
+	bool "8250 UART is Palmchip BK-310x"
+	depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250
+	help
+	  Palmchip provides a UART implementation compatible with 16550
+	  except for having a different register layout.  Say Y here if
+	  the debug UART is of this type.
+
+config DEBUG_UNCOMPRESS
+	bool "Enable decompressor debugging via DEBUG_LL output"
+	depends on ARCH_MULTIPLATFORM || PLAT_SAMSUNG || ARM_SINGLE_ARMV7M
+	depends on DEBUG_LL && !DEBUG_OMAP2PLUS_UART && \
+		     (!DEBUG_TEGRA_UART || !ZBOOT_ROM) && \
+		     !DEBUG_BRCMSTB_UART && !DEBUG_SEMIHOSTING
+	help
+	  This option influences the normal decompressor output for
+	  multiplatform kernels.  Normally, multiplatform kernels disable
+	  decompressor output because it is not possible to know where to
+	  send the decompressor output.
+
+	  When this option is set, the selected DEBUG_LL output method
+	  will be re-used for normal decompressor output on multiplatform
+	  kernels.
+	  
+
+config UNCOMPRESS_INCLUDE
+	string
+	default "debug/uncompress.h" if ARCH_MULTIPLATFORM || ARCH_MSM || \
+					PLAT_SAMSUNG || ARM_SINGLE_ARMV7M
+	default "mach/uncompress.h"
+
+config EARLY_PRINTK
+	bool "Early printk"
+	depends on DEBUG_LL
+	help
+	  Say Y here if you want to have an early console using the
+	  kernel low-level debugging functions. Add earlyprintk to your
+	  kernel parameters to enable this console.
+
+config ARM_KPROBES_TEST
+	tristate "Kprobes test module"
+	depends on KPROBES && MODULES
+	help
+	  Perform tests of kprobes API and instruction set simulation.
+
+config PID_IN_CONTEXTIDR
+	bool "Write the current PID to the CONTEXTIDR register"
+	depends on CPU_COPY_V6
+	help
+	  Enabling this option causes the kernel to write the current PID to
+	  the PROCID field of the CONTEXTIDR register, at the expense of some
+	  additional instructions during context switch. Say Y here only if you
+	  are planning to use hardware trace tools with this kernel.
+
+source "drivers/hwtracing/coresight/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
new file mode 100644
index 0000000000..77172d555c
--- /dev/null
+++ b/arch/arm/Makefile
@@ -0,0 +1,339 @@
+#
+# arch/arm/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995-2001 by Russell King
+
+LDFLAGS_vmlinux	:= --no-undefined -X --pic-veneer -z norelro
+ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
+LDFLAGS_vmlinux	+= --be8
+KBUILD_LDFLAGS_MODULE	+= --be8
+endif
+
+GZFLAGS		:=-9
+#KBUILD_CFLAGS	+=-pipe
+
+# Never generate .eh_frame
+KBUILD_CFLAGS	+= $(call cc-option,-fno-dwarf2-cfi-asm)
+
+# This should work on most of the modern platforms
+KBUILD_DEFCONFIG := multi_v7_defconfig
+
+# defines filename extension depending memory management type.
+ifeq ($(CONFIG_MMU),)
+MMUEXT		:= -nommu
+KBUILD_CFLAGS	+= $(call cc-option,-mno-unaligned-access)
+endif
+
+ifeq ($(CONFIG_FRAME_POINTER),y)
+KBUILD_CFLAGS	+=-fno-omit-frame-pointer
+ifeq ($(CONFIG_CC_IS_GCC),y)
+KBUILD_CFLAGS += -mapcs -mno-sched-prolog
+endif
+endif
+
+ifeq ($(CONFIG_CPU_BIG_ENDIAN),y)
+KBUILD_CPPFLAGS	+= -mbig-endian
+CHECKFLAGS	+= -D__ARMEB__
+KBUILD_LDFLAGS	+= -EB
+else
+KBUILD_CPPFLAGS	+= -mlittle-endian
+CHECKFLAGS	+= -D__ARMEL__
+KBUILD_LDFLAGS	+= -EL
+endif
+
+#
+# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and
+# later may result in code being generated that handles signed short and signed
+# char struct members incorrectly. So disable it.
+# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932)
+#
+KBUILD_CFLAGS	+= $(call cc-option,-fno-ipa-sra)
+
+# This selects which instruction set is used.
+# Note that GCC does not numerically define an architecture version
+# macro, but instead defines a whole series of macros which makes
+# testing for a specific architecture or later rather impossible.
+arch-$(CONFIG_CPU_32v7M)	=-D__LINUX_ARM_ARCH__=7 -march=armv7-m
+arch-$(CONFIG_CPU_32v7)		=-D__LINUX_ARM_ARCH__=7 -march=armv7-a
+arch-$(CONFIG_CPU_32v6)		=-D__LINUX_ARM_ARCH__=6 -march=armv6
+# Only override the compiler option if ARMv6. The ARMv6K extensions are
+# always available in ARMv7
+ifeq ($(CONFIG_CPU_32v6),y)
+arch-$(CONFIG_CPU_32v6K)	=-D__LINUX_ARM_ARCH__=6 -march=armv6k
+endif
+arch-$(CONFIG_CPU_32v5)		=-D__LINUX_ARM_ARCH__=5 -march=armv5te
+arch-$(CONFIG_CPU_32v4T)	=-D__LINUX_ARM_ARCH__=4 -march=armv4t
+arch-$(CONFIG_CPU_32v4)		=-D__LINUX_ARM_ARCH__=4 -march=armv4
+arch-$(CONFIG_CPU_32v3)		=-D__LINUX_ARM_ARCH__=3 -march=armv3m
+
+# Evaluate arch cc-option calls now
+arch-y := $(arch-y)
+
+# This selects how we optimise for the processor.
+tune-$(CONFIG_CPU_ARM7TDMI)	=-mtune=arm7tdmi
+tune-$(CONFIG_CPU_ARM720T)	=-mtune=arm7tdmi
+tune-$(CONFIG_CPU_ARM740T)	=-mtune=arm7tdmi
+tune-$(CONFIG_CPU_ARM9TDMI)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM940T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM946E)	=-mtune=arm9e
+tune-$(CONFIG_CPU_ARM920T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM922T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM925T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_ARM926T)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_FA526)	=-mtune=arm9tdmi
+tune-$(CONFIG_CPU_SA110)	=-mtune=strongarm110
+tune-$(CONFIG_CPU_SA1100)	=-mtune=strongarm1100
+tune-$(CONFIG_CPU_XSCALE)	=-mtune=xscale
+tune-$(CONFIG_CPU_XSC3)		=-mtune=xscale
+tune-$(CONFIG_CPU_FEROCEON)	=-mtune=xscale
+tune-$(CONFIG_CPU_V6)		=-mtune=arm1136j-s
+tune-$(CONFIG_CPU_V6K)		=-mtune=arm1136j-s
+
+# Evaluate tune cc-option calls now
+tune-y := $(tune-y)
+
+ifeq ($(CONFIG_AEABI),y)
+CFLAGS_ABI	:=-mabi=aapcs-linux -mfpu=vfp
+else
+CFLAGS_ABI	:=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,)
+endif
+
+ifeq ($(CONFIG_ARM_UNWIND),y)
+CFLAGS_ABI	+=-funwind-tables
+endif
+
+ifeq ($(CONFIG_CC_IS_CLANG),y)
+CFLAGS_ABI	+= -meabi gnu
+endif
+
+ifeq ($(CONFIG_CURRENT_POINTER_IN_TPIDRURO),y)
+CFLAGS_ABI	+= -mtp=cp15
+endif
+
+# Accept old syntax despite ".syntax unified"
+AFLAGS_NOWARN	:=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
+
+ifeq ($(CONFIG_THUMB2_KERNEL),y)
+CFLAGS_ISA	:=-mthumb -Wa,-mimplicit-it=always $(AFLAGS_NOWARN)
+AFLAGS_ISA	:=$(CFLAGS_ISA) -Wa$(comma)-mthumb
+else
+CFLAGS_ISA	:=$(call cc-option,-marm,) $(AFLAGS_NOWARN)
+AFLAGS_ISA	:=$(CFLAGS_ISA)
+endif
+
+# Need -Uarm for gcc < 3.x
+KBUILD_CFLAGS	+=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
+KBUILD_AFLAGS	+=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float
+
+CHECKFLAGS	+= -D__arm__
+
+#Default value
+head-y		:= arch/arm/kernel/head$(MMUEXT).o
+
+# Text offset. This list is sorted numerically by address in order to
+# provide a means to avoid/resolve conflicts in multi-arch kernels.
+# Note: the 32kB below this value is reserved for use by the kernel
+# during boot, and this offset is critical to the functioning of
+# kexec-tools.
+textofs-y	:= 0x00008000
+# We don't want the htc bootloader to corrupt kernel during resume
+textofs-$(CONFIG_PM_H1940)      := 0x00108000
+# RTD1195 has Boot ROM at start of address space
+textofs-$(CONFIG_ARCH_REALTEK)  := 0x00108000
+# SA1111 DMA bug: we don't want the kernel to live in precious DMA-able memory
+ifeq ($(CONFIG_ARCH_SA1100),y)
+textofs-$(CONFIG_SA1111) := 0x00208000
+endif
+textofs-$(CONFIG_ARCH_IPQ40XX) := 0x00208000
+textofs-$(CONFIG_ARCH_MSM8X60) := 0x00208000
+textofs-$(CONFIG_ARCH_MSM8960) := 0x00208000
+textofs-$(CONFIG_ARCH_MESON) := 0x00208000
+textofs-$(CONFIG_ARCH_AXXIA) := 0x00308000
+
+# Machine directory name.  This list is sorted alphanumerically
+# by CONFIG_* macro name.
+machine-$(CONFIG_ARCH_ACTIONS)		+= actions
+machine-$(CONFIG_ARCH_ALPINE)		+= alpine
+machine-$(CONFIG_ARCH_ARTPEC)		+= artpec
+machine-$(CONFIG_ARCH_ASPEED)           += aspeed
+machine-$(CONFIG_ARCH_AT91)		+= at91
+machine-$(CONFIG_ARCH_AXXIA)		+= axxia
+machine-$(CONFIG_ARCH_BCM)		+= bcm
+machine-$(CONFIG_ARCH_BERLIN)		+= berlin
+machine-$(CONFIG_ARCH_CLPS711X)		+= clps711x
+machine-$(CONFIG_ARCH_CNS3XXX)		+= cns3xxx
+machine-$(CONFIG_ARCH_DAVINCI)		+= davinci
+machine-$(CONFIG_ARCH_DIGICOLOR)	+= digicolor
+machine-$(CONFIG_ARCH_DOVE)		+= dove
+machine-$(CONFIG_ARCH_EP93XX)		+= ep93xx
+machine-$(CONFIG_ARCH_EXYNOS)		+= exynos
+machine-$(CONFIG_ARCH_FOOTBRIDGE)	+= footbridge
+machine-$(CONFIG_ARCH_GEMINI)		+= gemini
+machine-$(CONFIG_ARCH_HIGHBANK)		+= highbank
+machine-$(CONFIG_ARCH_HISI)		+= hisi
+machine-$(CONFIG_ARCH_INTEGRATOR)	+= integrator
+machine-$(CONFIG_ARCH_IOP32X)		+= iop32x
+machine-$(CONFIG_ARCH_IXP4XX)		+= ixp4xx
+machine-$(CONFIG_ARCH_KEYSTONE)		+= keystone
+machine-$(CONFIG_ARCH_LPC18XX)		+= lpc18xx
+machine-$(CONFIG_ARCH_LPC32XX)		+= lpc32xx
+machine-$(CONFIG_ARCH_MESON)		+= meson
+machine-$(CONFIG_ARCH_MMP)		+= mmp
+machine-$(CONFIG_ARCH_MPS2)		+= vexpress
+machine-$(CONFIG_ARCH_MOXART)		+= moxart
+machine-$(CONFIG_ARCH_MV78XX0)		+= mv78xx0
+machine-$(CONFIG_ARCH_MVEBU)		+= mvebu
+machine-$(CONFIG_ARCH_MXC)		+= imx
+machine-$(CONFIG_ARCH_MEDIATEK)		+= mediatek
+machine-$(CONFIG_ARCH_MILBEAUT)		+= milbeaut
+machine-$(CONFIG_ARCH_MXS)		+= mxs
+machine-$(CONFIG_ARCH_MSTARV7)		+= mstar
+machine-$(CONFIG_ARCH_NOMADIK)		+= nomadik
+machine-$(CONFIG_ARCH_NPCM)		+= npcm
+machine-$(CONFIG_ARCH_NSPIRE)		+= nspire
+machine-$(CONFIG_ARCH_OXNAS)		+= oxnas
+machine-$(CONFIG_ARCH_OMAP1)		+= omap1
+machine-$(CONFIG_ARCH_OMAP2PLUS)	+= omap2
+machine-$(CONFIG_ARCH_ORION5X)		+= orion5x
+machine-$(CONFIG_ARCH_PXA)		+= pxa
+machine-$(CONFIG_ARCH_QCOM)		+= qcom
+machine-$(CONFIG_ARCH_RDA)		+= rda
+machine-$(CONFIG_ARCH_REALTEK)		+= realtek
+machine-$(CONFIG_ARCH_REALVIEW)		+= realview
+machine-$(CONFIG_ARCH_ROCKCHIP)		+= rockchip
+machine-$(CONFIG_ARCH_RPC)		+= rpc
+machine-$(CONFIG_PLAT_SAMSUNG)		+= s3c
+machine-$(CONFIG_ARCH_S5PV210)		+= s5pv210
+machine-$(CONFIG_ARCH_SA1100)		+= sa1100
+machine-$(CONFIG_ARCH_RENESAS)	 	+= shmobile
+machine-$(CONFIG_ARCH_INTEL_SOCFPGA)	+= socfpga
+machine-$(CONFIG_ARCH_STI)		+= sti
+machine-$(CONFIG_ARCH_STM32)		+= stm32
+machine-$(CONFIG_ARCH_SUNXI)		+= sunxi
+machine-$(CONFIG_ARCH_TEGRA)		+= tegra
+machine-$(CONFIG_ARCH_U8500)		+= ux500
+machine-$(CONFIG_ARCH_VERSATILE)	+= versatile
+machine-$(CONFIG_ARCH_VEXPRESS)		+= vexpress
+machine-$(CONFIG_ARCH_VT8500)		+= vt8500
+machine-$(CONFIG_ARCH_ZYNQ)		+= zynq
+machine-$(CONFIG_PLAT_SPEAR)		+= spear
+
+# Platform directory name.  This list is sorted alphanumerically
+# by CONFIG_* macro name.
+plat-$(CONFIG_ARCH_OMAP)	+= omap
+plat-$(CONFIG_PLAT_ORION)	+= orion
+plat-$(CONFIG_PLAT_PXA)		+= pxa
+plat-$(CONFIG_PLAT_VERSATILE)	+= versatile
+
+# The byte offset of the kernel image in RAM from the start of RAM.
+TEXT_OFFSET := $(textofs-y)
+
+# The first directory contains additional information for the boot setup code
+ifneq ($(machine-y),)
+MACHINE  := arch/arm/mach-$(word 1,$(machine-y))/
+else
+MACHINE  :=
+endif
+ifeq ($(CONFIG_ARCH_MULTIPLATFORM),y)
+MACHINE  :=
+endif
+
+machdirs := $(patsubst %,arch/arm/mach-%/,$(machine-y))
+platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y)))
+
+ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y)
+ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y)
+KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
+endif
+endif
+
+export	TEXT_OFFSET GZFLAGS MMUEXT
+
+# If we have a machine-specific directory, then include it in the build.
+core-y				+= $(machdirs) $(platdirs)
+
+# For cleaning
+core-				+= $(patsubst %,arch/arm/mach-%/, $(machine-))
+core-				+= $(patsubst %,arch/arm/plat-%/, $(plat-))
+
+libs-y				:= arch/arm/lib/ $(libs-y)
+
+# Default target when executing plain make
+boot := arch/arm/boot
+ifeq ($(CONFIG_XIP_KERNEL),y)
+KBUILD_IMAGE := $(boot)/xipImage
+else
+KBUILD_IMAGE := $(boot)/zImage
+endif
+
+ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
+prepare: stack_protector_prepare
+stack_protector_prepare: prepare0
+	$(eval SSP_PLUGIN_CFLAGS := \
+		-fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell	\
+			awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}'\
+				include/generated/asm-offsets.h))
+	$(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS))
+	$(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS))
+endif
+
+all:	$(notdir $(KBUILD_IMAGE))
+
+
+archheaders:
+	$(Q)$(MAKE) $(build)=arch/arm/tools uapi
+
+archprepare:
+	$(Q)$(MAKE) $(build)=arch/arm/tools kapi
+
+# Convert bzImage to zImage
+bzImage: zImage
+
+BOOT_TARGETS	= zImage Image xipImage bootpImage uImage
+INSTALL_TARGETS	= zinstall uinstall install
+
+PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS)
+
+bootpImage uImage: zImage
+zImage: Image
+
+$(BOOT_TARGETS): vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
+	@$(kecho) '  Kernel: $(boot)/$@ is ready'
+
+$(INSTALL_TARGETS):
+	$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" \
+	$(boot)/$(patsubst %install,%Image,$@) System.map "$(INSTALL_PATH)"
+
+PHONY += vdso_install
+vdso_install:
+ifeq ($(CONFIG_VDSO),y)
+	$(Q)$(MAKE) $(build)=arch/arm/vdso $@
+endif
+
+# My testing targets (bypasses dependencies)
+bp:;	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/bootpImage
+
+
+define archhelp
+  echo  '* zImage        - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
+  echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
+  echo  '* xipImage      - XIP kernel image, if configured (arch/$(ARCH)/boot/xipImage)'
+  echo  '  uImage        - U-Boot wrapped zImage'
+  echo  '  bootpImage    - Combined zImage and initial RAM disk'
+  echo  '                  (supply initrd image via make variable INITRD=<path>)'
+  echo  '  install       - Install uncompressed kernel'
+  echo  '  zinstall      - Install compressed kernel'
+  echo  '  uinstall      - Install U-Boot wrapped compressed kernel'
+  echo  '                  Install using (your) ~/bin/$(INSTALLKERNEL) or'
+  echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
+  echo  '                  install to $$(INSTALL_PATH) and run lilo'
+  echo  '  vdso_install  - Install unstripped vdso.so to $$(INSTALL_MOD_PATH)/vdso'
+endef