From 5f5dc83a871c6ac7c34665654f0b323c094c9888 Mon Sep 17 00:00:00 2001 From: Boris Verkhovskiy Date: Tue, 9 Jan 2024 23:08:10 -0800 Subject: [PATCH 1/4] Capitalize "Arm" language name modern-ly --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 58547b6..25b4e63 100644 --- a/package.json +++ b/package.json @@ -27,9 +27,9 @@ { "id": "arm", "aliases": [ - "ARM", + "Arm", "arm", - "Arm" + "ARM" ], "extensions": [ ".s", From 41a287ac2697d38fefefadfa0c0b39f093d4daca Mon Sep 17 00:00:00 2001 From: Boris Verkhovskiy Date: Wed, 10 Jan 2024 03:42:36 -0800 Subject: [PATCH 2/4] Add more armv8-a instructions --- package.json | 2 +- syntaxes/arm.tmlanguage.json | 2 +- syntaxes/armv8-a.tmlanguage.json | 221 +++++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 syntaxes/armv8-a.tmlanguage.json diff --git a/package.json b/package.json index 25b4e63..e6d4fdd 100644 --- a/package.json +++ b/package.json @@ -44,7 +44,7 @@ { "language": "arm", "scopeName": "source.arm", - "path": "./syntaxes/arm.tmlanguage.json" + "path": "./syntaxes/armv8-a.tmlanguage.json" } ] } diff --git a/syntaxes/arm.tmlanguage.json b/syntaxes/arm.tmlanguage.json index a884eee..bf8a9ec 100644 --- a/syntaxes/arm.tmlanguage.json +++ b/syntaxes/arm.tmlanguage.json @@ -1,5 +1,5 @@ { - "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", "scopeName": "source.arm", "name": "Arm Assembly", "fileTypes": [ diff --git a/syntaxes/armv8-a.tmlanguage.json b/syntaxes/armv8-a.tmlanguage.json new file mode 100644 index 0000000..06a6fdf --- /dev/null +++ b/syntaxes/armv8-a.tmlanguage.json @@ -0,0 +1,221 @@ +{ + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "scopeName": "source.arm", + "name": "Arm Assembly", + "fileTypes": [ + "s", + "S", + "asm", + "sx" + ], + "patterns": [ + { + "match": "^\\s*[#\\.](define|include|(end|el|else)?if|if(def|ndef)?|else)(\\s+(defined\\(\\w+\\)|\\w+)(\\s+(&&|\\|\\|)\\s+defined\\(\\w+\\)|\\w]+)*)?\\b", + "name": "meta.preprocessor" + }, + { + "match": "^\\s*\\.?\\w+:\\s*(?=$|;)", + "name": "routine.arm" + }, + { + "match": "\\b(?i)(wf[ei]t?)(?-i)\\b", + "name": "keyword.control.cpu.arm" + }, + { + "match": "\\.(?i)(globl|global|extern|weak|macro|endm|purgem|section|text|data|bss|arm|align|balign|irp|rept|endr|(un)?req|error|short|(end)?func|hidden|type|cpu|fpu|arch|code|syntax|altmacro|object_arch|word|int|string|thumb(_set)?|set|pragma|undef|line|get)(?-i)\\b", + "name": "keyword.control.directive.arm" + }, + { + "match": "\\b\\=", + "name": "keyword.control.evaluation" + }, + { + "match": "\\b(?i)(abs|adclb|adclt|adcs|adc|addg|addha|addhn2|addhnb|addhnt|addhn|addpl|addp|addqv|addspl|addsvl|adds|addva|addvl|addv|add|adrp|adr|aesd|aese|aesimc|aesmc|andqv|ands|andv|and|asrd|asrr|asrv|asr|at|autda|autdb|autdza|autdzb|autia1716|autiasp|autiaz|autia|autib1716|autibsp|autibz|autib|autiza|autizb|axflag|bcax|bc|bdep|bext|bfadd|bfclamp|bfcvtn2|bfcvtnt|bfcvtn|bfcvt|bfc|bfdot|bfi|bfmaxnm|bfmax|bfminnm|bfmin|bfmlalb|bfmlalt|bfmlal|bfmla|bfmlslb|bfmlslt|bfmlsl|bfmls|bfmmla|bfmopa|bfmops|bfmul|bfm|bfsub|bfvdot|bfxil|bgrp|bics|bic|bif|bit|blraaz|blraa|blrabz|blrab|blr|bl|bmopa|bmops|braaz|braa|brabz|brab|brb|brkas|brka|brkbs|brkb|brkns|brkn|brkpas|brkpa|brkpbs|brkpb|brk|br|bsl1n|bsl2n|bsl|bti|b|cadd|casab|casah|casalb|casalh|casal|casa|casb|cash|caslb|caslh|casl|caspal|caspa|caspl|casp|cas|cbnz|cbz|ccmn|ccmp|cdot|cfinv|cfp|chkfeat|cinc|cinv|clasta|clastb|clrbhb|clrex|cls|clz|cmeq|cmge|cmgt|cmhi|cmhs|cmla|cmle|cmlt|cmn|cmpal|cmpcc|cmpcs|cmpeq|cmpge|cmpgt|cmphi|cmple|cmplo|cmpls|cmplt|cmpmi|cmpne|cmppl|cmpp|cmpvc|cmpvs|cmp|cmtst|cneg|cnot|cntb|cntd|cnth|cntp|cntw|cnt|compact|cosp|cpp|cpyen|cpyern|cpyertn|cpyertrn|cpyertwn|cpyert|cpyetn|cpyetrn|cpyetwn|cpyet|cpyewn|cpyewtn|cpyewtrn|cpyewtwn|cpyewt|cpye|cpyfen|cpyfern|cpyfertn|cpyfertrn|cpyfertwn|cpyfert|cpyfetn|cpyfetrn|cpyfetwn|cpyfet|cpyfewn|cpyfewtn|cpyfewtrn|cpyfewtwn|cpyfewt|cpyfe|cpyfmn|cpyfmrn|cpyfmrtn|cpyfmrtrn|cpyfmrtwn|cpyfmrt|cpyfmtn|cpyfmtrn|cpyfmtwn|cpyfmt|cpyfmwn|cpyfmwtn|cpyfmwtrn|cpyfmwtwn|cpyfmwt|cpyfm|cpyfpn|cpyfprn|cpyfprtn|cpyfprtrn|cpyfprtwn|cpyfprt|cpyfptn|cpyfptrn|cpyfptwn|cpyfpt|cpyfpwn|cpyfpwtn|cpyfpwtrn|cpyfpwtwn|cpyfpwt|cpyfp|cpymn|cpymrn|cpymrtn|cpymrtrn|cpymrtwn|cpymrt|cpymtn|cpymtrn|cpymtwn|cpymt|cpymwn|cpymwtn|cpymwtrn|cpymwtwn|cpymwt|cpym|cpypn|cpyprn|cpyprtn|cpyprtrn|cpyprtwn|cpyprt|cpyptn|cpyptrn|cpyptwn|cpypt|cpypwn|cpypwtn|cpypwtrn|cpypwtwn|cpypwt|cpyp|cpy|crc32b|crc32cb|crc32ch|crc32cw|crc32cx|crc32h|crc32w|crc32x|csdb|csel|csetm|cset|csinc|csinv|csneg|ctermeq|ctermne|ctz|dcps1|dcps2|dcps3|dc|decb|decd|dech|decp|decw|dgh|dmb|dsb|dupm|dupq|dup|dvp|eon|eor3|eorbt|eorqv|eors|eortb|eorv|eor|eretaa|eretab|eret|esb|extq|extr|ext|fabd|fabs|facal|faccc|faccs|faceq|facge|facgt|fachi|facle|facls|faclt|facmi|facne|facpl|facvc|facvs|fadda|faddp|faddqv|faddv|fadd|fcadd|fccmpe|fccmp|fclamp|fcmal|fcmcc|fcmcs|fcmeq|fcmge|fcmgt|fcmhi|fcmla|fcmle|fcmls|fcmlt|fcmmi|fcmne|fcmpe|fcmpl|fcmp|fcmvc|fcmvs|fcpy|fcsel|fcvtas|fcvtau|fcvtl2|fcvtlt|fcvtl|fcvtms|fcvtmu|fcvtn2|fcvtns|fcvtnt|fcvtnu|fcvtn|fcvtps|fcvtpu|fcvtxn2|fcvtxnt|fcvtxn|fcvtx|fcvtzs|fcvtzu|fcvt|fdivr|fdiv|fdot|fdup|fexpa|fjcvtzs|flogb|fmadd|fmad|fmaxnmp|fmaxnmqv|fmaxnmv|fmaxnm|fmaxp|fmaxqv|fmaxv|fmax|fminnmp|fminnmqv|fminnmv|fminnm|fminp|fminqv|fminv|fmin|fmlal2|fmlalb|fmlalt|fmlal|fmla|fmlsl2|fmlslb|fmlslt|fmlsl|fmls|fmmla|fmopa|fmops|fmov|fmsb|fmsub|fmulx|fmul|fneg|fnmadd|fnmad|fnmla|fnmls|fnmsb|fnmsub|fnmul|frecpe|frecps|frecpx|frint32x|frint32z|frint64x|frint64z|frinta|frinti|frintm|frintn|frintp|frintx|frintz|frsqrte|frsqrts|fscale|fsqrt|fsubr|fsub|ftmad|ftsmul|ftssel|fvdot|gcsbdsync|gcspopcx|gcspopm|gcspopx|gcspushm|gcspushx|gcsss1|gcsss2|gcsstr|gcssttr|gmi|hint|histcnt|histseg|hlt|hvc|ic|incb|incd|inch|incp|incw|index|insr|ins|irg|isb|lasta|lastb|ld1b|ld1d|ld1h|ld1q|ld1rb|ld1rd|ld1rh|ld1rob|ld1rod|ld1roh|ld1row|ld1rqb|ld1rqd|ld1rqh|ld1rqw|ld1rsb|ld1rsh|ld1rsw|ld1rw|ld1r|ld1sb|ld1sh|ld1sw|ld1w|ld1|ld2b|ld2d|ld2h|ld2q|ld2r|ld2w|ld2|ld3b|ld3d|ld3h|ld3q|ld3r|ld3w|ld3|ld4b|ld4d|ld4h|ld4q|ld4r|ld4w|ld4|ld64b|ldaddab|ldaddah|ldaddalb|ldaddalh|ldaddal|ldadda|ldaddb|ldaddh|ldaddlb|ldaddlh|ldaddl|ldadd|ldap1|ldaprb|ldaprh|ldapr|ldapurb|ldapurh|ldapursb|ldapursh|ldapursw|ldapur|ldarb|ldarh|ldar|ldaxp|ldaxrb|ldaxrh|ldaxr|ldclrab|ldclrah|ldclralb|ldclralh|ldclral|ldclra|ldclrb|ldclrh|ldclrlb|ldclrlh|ldclrl|ldclrpal|ldclrpa|ldclrpl|ldclrp|ldclr|ldeorab|ldeorah|ldeoralb|ldeoralh|ldeoral|ldeora|ldeorb|ldeorh|ldeorlb|ldeorlh|ldeorl|ldeor|ldff1b|ldff1d|ldff1h|ldff1sb|ldff1sh|ldff1sw|ldff1w|ldgm|ldg|ldiapp|ldlarb|ldlarh|ldlar|ldnf1b|ldnf1d|ldnf1h|ldnf1sb|ldnf1sh|ldnf1sw|ldnf1w|ldnp|ldnt1b|ldnt1d|ldnt1h|ldnt1sb|ldnt1sh|ldnt1sw|ldnt1w|ldpsw|ldp|ldraa|ldrab|ldrb|ldrh|ldrsb|ldrsh|ldrsw|ldr|ldsetab|ldsetah|ldsetalb|ldsetalh|ldsetal|ldseta|ldsetb|ldseth|ldsetlb|ldsetlh|ldsetl|ldsetpal|ldsetpa|ldsetpl|ldsetp|ldset|ldsmaxab|ldsmaxah|ldsmaxalb|ldsmaxalh|ldsmaxal|ldsmaxa|ldsmaxb|ldsmaxh|ldsmaxlb|ldsmaxlh|ldsmaxl|ldsmax|ldsminab|ldsminah|ldsminalb|ldsminalh|ldsminal|ldsmina|ldsminb|ldsminh|ldsminlb|ldsminlh|ldsminl|ldsmin|ldtrb|ldtrh|ldtrsb|ldtrsh|ldtrsw|ldtr|ldumaxab|ldumaxah|ldumaxalb|ldumaxalh|ldumaxal|ldumaxa|ldumaxb|ldumaxh|ldumaxlb|ldumaxlh|ldumaxl|ldumax|lduminab|lduminah|lduminalb|lduminalh|lduminal|ldumina|lduminb|lduminh|lduminlb|lduminlh|lduminl|ldumin|ldurb|ldurh|ldursb|ldursh|ldursw|ldur|ldxp|ldxrb|ldxrh|ldxr|lslr|lslv|lsl|lsrr|lsrv|lsr|luti2|luti4|madd|mad|match|mla|mls|mneg|movaz|mova|movi|movk|movn|movprfx|movs|movt|movz|mov|mrrs|mrs|msb|msrr|msr|msub|mul|mvni|mvn|nands|nand|nbsl|negs|neg|ngcs|ngc|nmatch|nors|nor|nots|not|orns|orn|orqv|orrs|orr|orv|pacda|pacdb|pacdza|pacdzb|pacga|pacia1716|paciasp|paciaz|pacia|pacib1716|pacibsp|pacibz|pacib|paciza|pacizb|pext|pfalse|pfirst|pmov|pmull2|pmullb|pmullt|pmull|pmul|pnext|prfb|prfd|prfh|prfm|prfum|prfw|psbcsync|psel|pssbb|ptest|ptrues|ptrue|punpkhi|punpklo|raddhn2|raddhnb|raddhnt|raddhn|rax1|rbit|rcwcasal|rcwcasa|rcwcasl|rcwcaspal|rcwcaspa|rcwcaspl|rcwcasp|rcwcas|rcwclral|rcwclra|rcwclrl|rcwclrpal|rcwclrpa|rcwclrpl|rcwclrp|rcwclr|rcwscasal|rcwscasa|rcwscasl|rcwscaspal|rcwscaspa|rcwscaspl|rcwscasp|rcwscas|rcwsclral|rcwsclra|rcwsclrl|rcwsclrpal|rcwsclrpa|rcwsclrpl|rcwsclrp|rcwsclr|rcwsetal|rcwseta|rcwsetl|rcwsetpal|rcwsetpa|rcwsetpl|rcwsetp|rcwset|rcwssetal|rcwsseta|rcwssetl|rcwssetpal|rcwssetpa|rcwssetpl|rcwssetp|rcwsset|rcwsswpal|rcwsswpa|rcwsswpl|rcwsswppal|rcwsswppa|rcwsswppl|rcwsswpp|rcwsswp|rcwswpal|rcwswpa|rcwswpl|rcwswppal|rcwswppa|rcwswppl|rcwswpp|rcwswp|rdffrs|rdffr|rdsvl|rdvl|retaa|retab|ret|rev16|rev32|rev64|revb|revd|revh|revw|rev|rmif|rorv|ror|rprfm|rshrn2|rshrnb|rshrnt|rshrn|rsubhn2|rsubhnb|rsubhnt|rsubhn|sabal2|sabalb|sabalt|sabal|saba|sabdl2|sabdlb|sabdlt|sabdl|sabd|sadalp|saddl2|saddlbt|saddlb|saddlp|saddlt|saddlv|saddl|saddv|saddw2|saddwb|saddwt|saddw|sbclb|sbclt|sbcs|sbc|sbfiz|sbfm|sbfx|sb|sclamp|scvtf|sdivr|sdiv|sdot|sel|seten|setetn|setet|sete|setf16|setf8|setffr|setgen|setgetn|setget|setge|setgmn|setgmtn|setgmt|setgm|setgpn|setgptn|setgpt|setgp|setmn|setmtn|setmt|setm|setpn|setptn|setpt|setp|sevl|sev|sha1c|sha1h|sha1m|sha1p|sha1su0|sha1su1|sha256h2|sha256h|sha256su0|sha256su1|sha512h2|sha512h|sha512su0|sha512su1|shadd|shll2|shll|shl|shrn2|shrnb|shrnt|shrn|shsubr|shsub|sli|sm3partw1|sm3partw2|sm3ss1|sm3tt1a|sm3tt1b|sm3tt2a|sm3tt2b|sm4ekey|sm4e|smaddl|smaxp|smaxqv|smaxv|smax|smc|sminp|sminqv|sminv|smin|smlal2|smlalb|smlall|smlalt|smlal|smlsl2|smlslb|smlsll|smlslt|smlsl|smmla|smnegl|smopa|smops|smov|smstart|smstop|smsubl|smulh|smull2|smullb|smullt|smull|splice|sqabs|sqadd|sqcadd|sqcvtn|sqcvtun|sqcvtu|sqcvt|sqdecb|sqdecd|sqdech|sqdecp|sqdecw|sqdmlal2|sqdmlalbt|sqdmlalb|sqdmlalt|sqdmlal|sqdmlsl2|sqdmlslbt|sqdmlslb|sqdmlslt|sqdmlsl|sqdmulh|sqdmull2|sqdmullb|sqdmullt|sqdmull|sqincb|sqincd|sqinch|sqincp|sqincw|sqneg|sqrdcmlah|sqrdmlah|sqrdmlsh|sqrdmulh|sqrshlr|sqrshl|sqrshrn2|sqrshrnb|sqrshrnt|sqrshrn|sqrshrun2|sqrshrunb|sqrshrunt|sqrshrun|sqrshru|sqrshr|sqshlr|sqshlu|sqshl|sqshrn2|sqshrnb|sqshrnt|sqshrn|sqshrun2|sqshrunb|sqshrunt|sqshrun|sqsubr|sqsub|sqxtn2|sqxtnb|sqxtnt|sqxtn|sqxtun2|sqxtunb|sqxtunt|sqxtun|srhadd|sri|srshlr|srshl|srshr|srsra|ssbb|sshll2|sshllb|sshllt|sshll|sshl|sshr|ssra|ssubl2|ssublbt|ssublb|ssubltb|ssublt|ssubl|ssubw2|ssubwb|ssubwt|ssubw|st1b|st1d|st1h|st1q|st1w|st1|st2b|st2d|st2g|st2h|st2q|st2w|st2|st3b|st3d|st3h|st3q|st3w|st3|st4b|st4d|st4h|st4q|st4w|st4|st64bv0|st64bv|st64b|staddb|staddh|staddlb|staddlh|staddl|stadd|stclrb|stclrh|stclrlb|stclrlh|stclrl|stclr|steorb|steorh|steorlb|steorlh|steorl|steor|stgm|stgp|stg|stilp|stl1|stllrb|stllrh|stllr|stlrb|stlrh|stlr|stlurb|stlurh|stlur|stlxp|stlxrb|stlxrh|stlxr|stnp|stnt1b|stnt1d|stnt1h|stnt1w|stp|strb|strh|str|stsetb|stseth|stsetlb|stsetlh|stsetl|stset|stsmaxb|stsmaxh|stsmaxlb|stsmaxlh|stsmaxl|stsmax|stsminb|stsminh|stsminlb|stsminlh|stsminl|stsmin|sttrb|sttrh|sttr|stumaxb|stumaxh|stumaxlb|stumaxlh|stumaxl|stumax|stuminb|stuminh|stuminlb|stuminlh|stuminl|stumin|sturb|sturh|stur|stxp|stxrb|stxrh|stxr|stz2g|stzgm|stzg|subg|subhn2|subhnb|subhnt|subhn|subps|subp|subr|subs|sub|sudot|sumlall|sumopa|sumops|sunpkhi|sunpklo|sunpk|suqadd|suvdot|svc|svdot|swpab|swpah|swpalb|swpalh|swpal|swpa|swpb|swph|swplb|swplh|swpl|swppal|swppa|swppl|swpp|swp|sxtb|sxth|sxtl2|sxtl|sxtw|sysl|sysp|sys|tblq|tbl|tbnz|tbxq|tbx|tbz|tcancel|tcommit|tlbip|tlbi|trcit|trn1|trn2|tsbcsync|tstart|tst|ttest|uabal2|uabalb|uabalt|uabal|uaba|uabdl2|uabdlb|uabdlt|uabdl|uabd|uadalp|uaddl2|uaddlb|uaddlp|uaddlt|uaddlv|uaddl|uaddv|uaddw2|uaddwb|uaddwt|uaddw|ubfiz|ubfm|ubfx|uclamp|ucvtf|udf|udivr|udiv|udot|uhadd|uhsubr|uhsub|umaddl|umaxp|umaxqv|umaxv|umax|uminp|uminqv|uminv|umin|umlal2|umlalb|umlall|umlalt|umlal|umlsl2|umlslb|umlsll|umlslt|umlsl|ummla|umnegl|umopa|umops|umov|umsubl|umulh|umull2|umullb|umullt|umull|uqadd|uqcvtn|uqcvt|uqdecb|uqdecd|uqdech|uqdecp|uqdecw|uqincb|uqincd|uqinch|uqincp|uqincw|uqrshlr|uqrshl|uqrshrn2|uqrshrnb|uqrshrnt|uqrshrn|uqrshr|uqshlr|uqshl|uqshrn2|uqshrnb|uqshrnt|uqshrn|uqsubr|uqsub|uqxtn2|uqxtnb|uqxtnt|uqxtn|urecpe|urhadd|urshlr|urshl|urshr|ursqrte|ursra|usdot|ushll2|ushllb|ushllt|ushll|ushl|ushr|usmlall|usmmla|usmopa|usmops|usqadd|usra|usubl2|usublb|usublt|usubl|usubw2|usubwb|usubwt|usubw|usvdot|uunpkhi|uunpklo|uunpk|uvdot|uxtb|uxth|uxtl2|uxtl|uxtw|uzp1|uzp2|uzpq1|uzpq2|uzp|whilege|whilegt|whilehi|whilehs|whilele|whilelo|whilels|whilelt|whilerw|whilewr|wrffr|xaflag|xar|xpacd|xpaci|xpaclri|xtn2|xtn|yield|zero|zip1|zip2|zipq1|zipq2|zip)(?-i)\\b", + "name": "support.function.mnemonic.multiple.arm" + }, + { + "match": "\\b(?i)nop(\\w+)?(?-i)\\b", + "name": "comment.nop.arm", + "captures": { + "1": { + "patterns": [ + { + "include": "#conditions", + "name": "comment.nop.arm" + }, + { + "match": "\\w+", + "name": "invalid.illegal.condition.arm" + } + ] + } + } + }, + { + "name": "storage.memaddress.arm", + "match": "\\[[ \\t]*(\\w+),[ \\t]*([a-zA-Z0-9#-_]+)[ \\t]*\\]!?", + "captures": { + "1": { + "patterns": [ + { + "include": "#registers" + } + ] + }, + "2": { + "patterns": [ + { + "include": "#numerics" + }, + { + "include": "#registers" + }, + { + "include": "#variables" + } + ] + } + } + }, + { + "name": "storage.stack.arm", + "begin": "\\{\\h*", + "end": "\\h*\\}\\^?", + "patterns": [ + { + "include": "#registers_list" + } + ] + }, + { + "include": "#registers" + }, + { + "include": "#variables" + }, + { + "include": "#numerics" + }, + { + "include": "#strings" + }, + { + "include": "#comments" + } + ], + "repository": { + "comments": { + "patterns": [ + { + "match": "([;@]|//|#).*$", + "name": "comment.arm" + }, + { + "begin": "\\/\\*", + "beginCaptures": { + "0": { + "name": "comment.begin.arm" + } + }, + "endCaptures": { + "0": { + "name": "comment.end.arm" + } + }, + "end": "\\*\\/", + "name": "comment.arm" + } + ] + }, + "conditions": { + "patterns": [ + { + "match": "ne|eq|cs|hs|cc|lo|mi|pl|vs|vc|hi|ls|lt|le|gt|ge|al" + } + ] + }, + "numerics": { + "patterns": [ + { + "match": "#?-?(0x|&)[0-9a-fA-F_]+\\b", + "name": "constant.numeric.hex.arm" + }, + { + "match": "#?[0-9]+\\b", + "name": "constant.numeric.dec.arm" + }, + { + "match": "#?0b[01]+\\b", + "name": "constant.numeric.bin.arm" + } + ] + }, + "registers": { + "patterns": [ + { + "match": "\\b(?i)([rcp]([0-9]|1[0-5])|[xwbhsdq]([0-9]|1[0-9]|2[0-9]|3[0-1])|wzr|xzr|wsp|fpsr|fpcr|a[1-4]|v([0-9]|1[0-9]|2[0-9]|3[0-1])\\.(16b|8[b|h]|4[s|h]|2[s|d])|sl|sb|fp|ip|sp|lr|(c|s)psr(_c)?|pc|fpsid|fpscr|fpexc|APSR_nzcv|sy)(?-i)(!|\\b)", + "name": "storage.register.arm" + } + ] + }, + "registers_list": { + "patterns": [ + { + "match": "(\\w+)(?:\\h*\\-\\h*(\\w+))?(?:,\\h*([a-zA-Z0-9,\\-\\h]+))?", + "captures": { + "1": { + "patterns": [ + { + "include": "#registers" + } + ] + }, + "2": { + "patterns": [ + { + "include": "#registers" + } + ] + }, + "3": { + "patterns": [ + { + "include": "#registers_list" + } + ] + } + } + } + ] + }, + "strings": { + "patterns": [ + { + "name": "string.quoted.arm", + "begin": "\"", + "end": "\"" + }, + { + "match": "\\'\\S\\'", + "name": "string.char.arm" + }, + { + "name": "invalid.illegal.string.arm", + "match": "\"[^\"]+$" + }, + { + "name": "invalid.illegal.char.arm", + "match": "\\'\\S{2,}\\'" + } + ] + }, + "variables": { + "patterns": [ + { + "match": "(\\b|#)-?[a-zA-Z_][0-9a-zA-Z_]*\\b", + "name": "variable.named.arm" + } + ] + } + } +} From 74ae5e2aca11f261c006c4689e8967ea7a7ea0a2 Mon Sep 17 00:00:00 2001 From: Boris Verkhovskiy Date: Wed, 10 Jan 2024 04:08:57 -0800 Subject: [PATCH 3/4] Add tooltip with instruction and register docs --- .gitignore | 5 +- .vscode/launch.json | 29 +- .vscode/settings.json | 11 + .vscode/tasks.json | 20 + package.json | 33 +- src/asm-docs-aarch64.ts | 14710 ++++++++++++++++++++++++++++++++++++++ src/extension.ts | 83 + tsconfig.json | 17 + 8 files changed, 14887 insertions(+), 21 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 .vscode/tasks.json create mode 100644 src/asm-docs-aarch64.ts create mode 100644 src/extension.ts create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore index 90074a6..dac585e 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,7 @@ node_modules *.vsix # Workspace file -vscode-arm.code-workspace \ No newline at end of file +vscode-arm.code-workspace + +package-lock.json +out diff --git a/.vscode/launch.json b/.vscode/launch.json index a2a793b..8880465 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,18 +1,21 @@ -// A launch configuration that launches the extension inside a new window +// A launch configuration that compiles the extension and then opens it inside a new window // Use IntelliSense to learn about possible attributes. // Hover to view descriptions of existing attributes. // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 { - "version": "0.2.0", - "configurations": [ - { - "name": "Extension", - "type": "extensionHost", - "request": "launch", - "runtimeExecutable": "${execPath}", - "args": [ - "--extensionDevelopmentPath=${workspaceFolder}" - ] - } - ] + "version": "0.2.0", + "configurations": [ + { + "name": "Run Extension", + "type": "extensionHost", + "request": "launch", + "args": [ + "--extensionDevelopmentPath=${workspaceFolder}" + ], + "outFiles": [ + "${workspaceFolder}/out/**/*.js" + ], + "preLaunchTask": "${defaultBuildTask}" + } + ] } diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..30bf8c2 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +// Place your settings in this file to overwrite default and user settings. +{ + "files.exclude": { + "out": false // set this to true to hide the "out" folder with the compiled JS files + }, + "search.exclude": { + "out": true // set this to false to include "out" folder in search results + }, + // Turn off tsc task auto detection since we have the necessary tasks as npm scripts + "typescript.tsc.autoDetect": "off" +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..3b17e53 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,20 @@ +// See https://go.microsoft.com/fwlink/?LinkId=733558 +// for the documentation about the tasks.json format +{ + "version": "2.0.0", + "tasks": [ + { + "type": "npm", + "script": "watch", + "problemMatcher": "$tsc-watch", + "isBackground": true, + "presentation": { + "reveal": "never" + }, + "group": { + "kind": "build", + "isDefault": true + } + } + ] +} diff --git a/package.json b/package.json index e6d4fdd..34c87c2 100644 --- a/package.json +++ b/package.json @@ -1,19 +1,13 @@ { "name": "arm", "displayName": "Arm Assembly", - "description": "Arm assembly syntax support for Visual Studio Code", + "description": "Arm assembly syntax highlighting and documentation tooltips", "version": "1.7.4", "publisher": "dan-c-underwood", "icon": "images/icon.png", "author": { "name": "Dan Underwood" }, - "categories": [ - "Programming Languages" - ], - "engines": { - "vscode": "^0.10.1" - }, "galleryBanner": { "color": "#0084A9", "theme": "dark" @@ -22,6 +16,14 @@ "type": "git", "url": "https://github.com/dan-c-underwood/vscode-arm.git" }, + "engines": { + "vscode": "^1.85.0" + }, + "categories": [ + "Programming Languages" + ], + "activationEvents": [], + "main": "./out/extension.js", "contributes": { "languages": [ { @@ -47,5 +49,22 @@ "path": "./syntaxes/armv8-a.tmlanguage.json" } ] + }, + "scripts": { + "vscode:prepublish": "npm run compile", + "compile": "tsc -p ./", + "watch": "tsc -watch -p ./", + "lint": "eslint src --ext ts" + }, + "devDependencies": { + "@types/vscode": "^1.85.0", + "@types/mocha": "^10.0.6", + "@types/node": "18.x", + "@typescript-eslint/eslint-plugin": "^6.15.0", + "@typescript-eslint/parser": "^6.15.0", + "eslint": "^8.56.0", + "typescript": "^5.3.3", + "@vscode/test-cli": "^0.0.4", + "@vscode/test-electron": "^2.3.8" } } diff --git a/src/asm-docs-aarch64.ts b/src/asm-docs-aarch64.ts new file mode 100644 index 0000000..cd0fd62 --- /dev/null +++ b/src/asm-docs-aarch64.ts @@ -0,0 +1,14710 @@ +/* +BSD 2-Clause License + +Copyright (c) 2012-2022, Compiler Explorer Authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +type AssemblyInstructionInfo = { + tooltip: string, + html: string, + url: string +} + +export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined { + if (!opcode) return; + switch (opcode) { + case "ABS": + return { + "tooltip": "Absolute value computes the absolute value of the signed integer value in the source register, and writes the result to the destination register.", + "html": "

Absolute value computes the absolute value of the signed integer value in the source register, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ABS": + return { + "tooltip": "Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ABS": + return { + "tooltip": "Compute the absolute value of the signed integer in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Compute the absolute value of the signed integer in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADC": + return { + "tooltip": "Add with Carry adds two register values and the Carry flag value, and writes the result to the destination register.", + "html": "

Add with Carry adds two register values and the Carry flag value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADCLB": + return { + "tooltip": "Add the even-numbered elements of the first source vector and the 1-bit carry from the least-significant bit of the odd-numbered elements of the second source vector to the even-numbered elements of the destination and accumulator vector. The 1-bit carry output is placed in the corresponding odd-numbered element of the destination vector.", + "html": "

Add the even-numbered elements of the first source vector and the 1-bit carry from the least-significant bit of the odd-numbered elements of the second source vector to the even-numbered elements of the destination and accumulator vector. The 1-bit carry output is placed in the corresponding odd-numbered element of the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADCLT": + return { + "tooltip": "Add the odd-numbered elements of the first source vector and the 1-bit carry from the least-significant bit of the odd-numbered elements of the second source vector to the even-numbered elements of the destination and accumulator vector. The 1-bit carry output is placed in the corresponding odd-numbered element of the destination vector.", + "html": "

Add the odd-numbered elements of the first source vector and the 1-bit carry from the least-significant bit of the odd-numbered elements of the second source vector to the even-numbered elements of the destination and accumulator vector. The 1-bit carry output is placed in the corresponding odd-numbered element of the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADCS": + return { + "tooltip": "Add with Carry, setting flags, adds two register values and the Carry flag value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Add with Carry, setting flags, adds two register values and the Carry flag value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add (extended register) adds a register value and a sign or zero-extended register value, followed by an optional left shift amount, and writes the result to the destination register. The argument that is extended from the register can be a byte, halfword, word, or doubleword.", + "html": "

Add (extended register) adds a register value and a sign or zero-extended register value, followed by an optional left shift amount, and writes the result to the destination register. The argument that is extended from the <Rm> register can be a byte, halfword, word, or doubleword.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add (immediate) adds a register value and an optionally-shifted immediate value, and writes the result to the destination register.", + "html": "

Add (immediate) adds a register value and an optionally-shifted immediate value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add (shifted register) adds a register value and an optionally-shifted register value, and writes the result to the destination register.", + "html": "

Add (shifted register) adds a register value and an optionally-shifted register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Add (vector). This instruction adds corresponding elements in the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add elements of the second source vector to the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Add elements of the second source vector to the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add active elements of the second source vector to corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active elements of the second source vector to corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add an unsigned immediate to each element of the source vector, and destructively place the results in the corresponding elements of the source vector. This instruction is unpredicated.", + "html": "

Add an unsigned immediate to each element of the source vector, and destructively place the results in the corresponding elements of the source vector. This instruction is unpredicated.

The immediate is an unsigned value in the range 0 to 255, and for element widths of 16 bits or higher it may also be a positive multiple of 256 in the range 256 to 65280.

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<uimm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add all elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Add all elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Destructively add all elements of the two or four source vectors to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Destructively add all elements of the two or four source vectors to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 64-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add all corresponding elements of the second source vector and the two or four first source vectors and place the results in the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Add all corresponding elements of the second source vector and the two or four first source vectors and place the results in the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 64-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADD": + return { + "tooltip": "Add all corresponding elements of the two or four second source vectors and first source vectors and place the results in the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Add all corresponding elements of the two or four second source vectors and first source vectors and place the results in the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 64-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDG": + return { + "tooltip": "Add with Tag adds an immediate value scaled by the Tag granule to the address in the source register, modifies the Logical Address Tag of the address using an immediate value, and writes the result to the destination register. Tags specified in GCR_EL1.Exclude are excluded from the possible outputs when modifying the Logical Address Tag.", + "html": "

Add with Tag adds an immediate value scaled by the Tag granule to the address in the source register, modifies the Logical Address Tag of the address using an immediate value, and writes the result to the destination register. Tags specified in GCR_EL1.Exclude are excluded from the possible outputs when modifying the Logical Address Tag.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDHA": + return { + "tooltip": "Add each element of the source vector to the corresponding active element of each horizontal slice of a ZA tile. The tile elements are predicated by a pair of governing predicates. An element of a horizontal slice is considered active if its corresponding element in the second governing predicate is TRUE and the element corresponding to its horizontal slice number in the first governing predicate is TRUE. Inactive elements in the destination tile remain unmodified.", + "html": "

Add each element of the source vector to the corresponding active element of each horizontal slice of a ZA tile. The tile elements are predicated by a pair of governing predicates. An element of a horizontal slice is considered active if its corresponding element in the second governing predicate is TRUE and the element corresponding to its horizontal slice number in the first governing predicate is TRUE. Inactive elements in the destination tile remain unmodified.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 64-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDHN": + case "ADDHN2": + return { + "tooltip": "Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.", + "html": "

Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

The results are truncated. For rounded results, see RADDHN.

The ADDHN instruction writes the vector to the lower half of the destination register and clears the upper half, while the ADDHN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDHNB": + return { + "tooltip": "Add each vector element of the first source vector to the corresponding vector element of the second source vector, and place the most significant half of the result in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. This instruction is unpredicated.", + "html": "

Add each vector element of the first source vector to the corresponding vector element of the second source vector, and place the most significant half of the result in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDHNT": + return { + "tooltip": "Add each vector element of the first source vector to the corresponding vector element of the second source vector, and place the most significant half of the result in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. This instruction is unpredicated.", + "html": "

Add each vector element of the first source vector to the corresponding vector element of the second source vector, and place the most significant half of the result in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDP": + return { + "tooltip": "Add Pair of elements (scalar). This instruction adds two vector elements in the source SIMD&FP register and writes the scalar result into the destination SIMD&FP register.", + "html": "

Add Pair of elements (scalar). This instruction adds two vector elements in the source SIMD&FP register and writes the scalar result into the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDP": + return { + "tooltip": "Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDP": + return { + "tooltip": "Add pairs of adjacent elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Add pairs of adjacent elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDPL": + return { + "tooltip": "Add the current predicate register size in bytes multiplied by an immediate in the range -32 to 31 to the 64-bit source general-purpose register or current stack pointer and place the result in the 64-bit destination general-purpose register or current stack pointer.", + "html": "

Add the current predicate register size in bytes multiplied by an immediate in the range -32 to 31 to the 64-bit source general-purpose register or current stack pointer and place the result in the 64-bit destination general-purpose register or current stack pointer.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDQV": + return { + "tooltip": "Unsigned addition of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as zero.", + "html": "

Unsigned addition of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDS": + return { + "tooltip": "Add (extended register), setting flags, adds a register value and a sign or zero-extended register value, followed by an optional left shift amount, and writes the result to the destination register. The argument that is extended from the register can be a byte, halfword, word, or doubleword. It updates the condition flags based on the result.", + "html": "

Add (extended register), setting flags, adds a register value and a sign or zero-extended register value, followed by an optional left shift amount, and writes the result to the destination register. The argument that is extended from the <Rm> register can be a byte, halfword, word, or doubleword. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDS": + return { + "tooltip": "Add (immediate), setting flags, adds a register value and an optionally-shifted immediate value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Add (immediate), setting flags, adds a register value and an optionally-shifted immediate value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDS": + return { + "tooltip": "Add (shifted register), setting flags, adds a register value and an optionally-shifted register value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Add (shifted register), setting flags, adds a register value and an optionally-shifted register value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDSPL": + return { + "tooltip": "Add the Streaming SVE predicate register size in bytes multiplied by an immediate in the range -32 to 31 to the 64-bit source general-purpose register or current stack pointer and place the result in the 64-bit destination general-purpose register or current stack pointer.", + "html": "

Add the Streaming SVE predicate register size in bytes multiplied by an immediate in the range -32 to 31 to the 64-bit source general-purpose register or current stack pointer and place the result in the 64-bit destination general-purpose register or current stack pointer.

This instruction does not require the PE to be in Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDSVL": + return { + "tooltip": "Add the Streaming SVE vector register size in bytes multiplied by an immediate in the range -32 to 31 to the 64-bit source general-purpose register or current stack pointer, and place the result in the 64-bit destination general-purpose register or current stack pointer.", + "html": "

Add the Streaming SVE vector register size in bytes multiplied by an immediate in the range -32 to 31 to the 64-bit source general-purpose register or current stack pointer, and place the result in the 64-bit destination general-purpose register or current stack pointer.

This instruction does not require the PE to be in Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDV": + return { + "tooltip": "Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.", + "html": "

Add across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDVA": + return { + "tooltip": "Add each element of the source vector to the corresponding active element of each vertical slice of a ZA tile. The tile elements are predicated by a pair of governing predicates. An element of a vertical slice is considered active if its corresponding element in the first governing predicate is TRUE and the element corresponding to its vertical slice number in the second governing predicate is TRUE. Inactive elements in the destination tile remain unmodified.", + "html": "

Add each element of the source vector to the corresponding active element of each vertical slice of a ZA tile. The tile elements are predicated by a pair of governing predicates. An element of a vertical slice is considered active if its corresponding element in the first governing predicate is TRUE and the element corresponding to its vertical slice number in the second governing predicate is TRUE. Inactive elements in the destination tile remain unmodified.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 64-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADDVL": + return { + "tooltip": "Add the current vector register size in bytes multiplied by an immediate in the range -32 to 31 to the 64-bit source general-purpose register or current stack pointer, and place the result in the 64-bit destination general-purpose register or current stack pointer.", + "html": "

Add the current vector register size in bytes multiplied by an immediate in the range -32 to 31 to the 64-bit source general-purpose register or current stack pointer, and place the result in the 64-bit destination general-purpose register or current stack pointer.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADR": + return { + "tooltip": "Form PC-relative address adds an immediate value to the PC value to form a PC-relative address, and writes the result to the destination register.", + "html": "

Form PC-relative address adds an immediate value to the PC value to form a PC-relative address, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADR": + return { + "tooltip": "Optionally sign or zero-extend the least significant 32-bits of each element from a vector of offsets or indices in the second source vector, scale each index by 2, 4 or 8, add to a vector of base addresses from the first source vector, and place the resulting addresses in the destination vector. This instruction is unpredicated.", + "html": "

Optionally sign or zero-extend the least significant 32-bits of each element from a vector of offsets or indices in the second source vector, scale each index by 2, 4 or 8, add to a vector of base addresses from the first source vector, and place the resulting addresses in the destination vector. This instruction is unpredicated.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ADRP": + return { + "tooltip": "Form PC-relative address to 4KB page adds an immediate value that is shifted left by 12 bits, to the PC value to form a PC-relative address, with the bottom 12 bits masked out, and writes the result to the destination register.", + "html": "

Form PC-relative address to 4KB page adds an immediate value that is shifted left by 12 bits, to the PC value to form a PC-relative address, with the bottom 12 bits masked out, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AESD": + return { + "tooltip": "AES single round decryption.", + "html": "

AES single round decryption.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AESD": + return { + "tooltip": "The AESD instruction reads a 16-byte state array from each 128-bit segment of the first source vector, together with a round key from the corresponding 128-bit segment of the second source vector. Each state array undergoes a single round of the AddRoundKey(), InvSubBytes() and InvShiftRows() transformations in accordance with the AES standard. Each updated state array is destructively placed in the corresponding segment of the first source vector. This instruction is unpredicated.", + "html": "

The AESD instruction reads a 16-byte state array from each 128-bit segment of the first source vector, together with a round key from the corresponding 128-bit segment of the second source vector. Each state array undergoes a single round of the AddRoundKey(), InvSubBytes() and InvShiftRows() transformations in accordance with the AES standard. Each updated state array is destructively placed in the corresponding segment of the first source vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.AES indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AESE": + return { + "tooltip": "AES single round encryption.", + "html": "

AES single round encryption.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AESE": + return { + "tooltip": "The AESE instruction reads a 16-byte state array from each 128-bit segment of the first source vector together with a round key from the corresponding 128-bit segment of the second source vector. Each state array undergoes a single round of the AddRoundKey(), SubBytes() and ShiftRows() transformations in accordance with the AES standard. Each updated state array is destructively placed in the corresponding segment of the first source vector. This instruction is unpredicated.", + "html": "

The AESE instruction reads a 16-byte state array from each 128-bit segment of the first source vector together with a round key from the corresponding 128-bit segment of the second source vector. Each state array undergoes a single round of the AddRoundKey(), SubBytes() and ShiftRows() transformations in accordance with the AES standard. Each updated state array is destructively placed in the corresponding segment of the first source vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.AES indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AESIMC": + return { + "tooltip": "AES inverse mix columns.", + "html": "

AES inverse mix columns.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AESIMC": + return { + "tooltip": "The AESIMC instruction reads a 16-byte state array from each 128-bit segment of the source register, and performs a single round of the InvMixColumns() transformation on each state array in accordance with the AES standard. Each updated state array is destructively placed in the corresponding segment of the first source vector. This instruction is unpredicated.", + "html": "

The AESIMC instruction reads a 16-byte state array from each 128-bit segment of the source register, and performs a single round of the InvMixColumns() transformation on each state array in accordance with the AES standard. Each updated state array is destructively placed in the corresponding segment of the first source vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.AES indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AESMC": + return { + "tooltip": "AES mix columns.", + "html": "

AES mix columns.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AESMC": + return { + "tooltip": "The AESMC instruction reads a 16-byte state array from each 128-bit segment of the source register, and performs a single round of the MixColumns() transformation on each state array in accordance with the AES standard. Each updated state array is destructively placed in the corresponding segment of the first source vector. This instruction is unpredicated.", + "html": "

The AESMC instruction reads a 16-byte state array from each 128-bit segment of the source register, and performs a single round of the MixColumns() transformation on each state array in accordance with the AES standard. Each updated state array is destructively placed in the corresponding segment of the first source vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.AES indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AND": + return { + "tooltip": "Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.", + "html": "

Bitwise AND (vector). This instruction performs a bitwise AND between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AND": + return { + "tooltip": "Bitwise AND (immediate) performs a bitwise AND of a register value and an immediate value, and writes the result to the destination register.", + "html": "

Bitwise AND (immediate) performs a bitwise AND of a register value and an immediate value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AND": + return { + "tooltip": "Bitwise AND (shifted register) performs a bitwise AND of a register value and an optionally-shifted register value, and writes the result to the destination register.", + "html": "

Bitwise AND (shifted register) performs a bitwise AND of a register value and an optionally-shifted register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AND": + return { + "tooltip": "Bitwise AND active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Bitwise AND active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AND": + return { + "tooltip": "Bitwise AND active elements of the second source vector with corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Bitwise AND active elements of the second source vector with corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AND": + return { + "tooltip": "Bitwise AND an immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.", + "html": "

Bitwise AND an immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AND": + return { + "tooltip": "Bitwise AND all elements of the second source vector with corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Bitwise AND all elements of the second source vector with corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ANDQV": + return { + "tooltip": "Bitwise AND of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as all ones.", + "html": "

Bitwise AND of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as all ones.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ANDS": + return { + "tooltip": "Bitwise AND (immediate), setting flags, performs a bitwise AND of a register value and an immediate value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Bitwise AND (immediate), setting flags, performs a bitwise AND of a register value and an immediate value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ANDS": + return { + "tooltip": "Bitwise AND (shifted register), setting flags, performs a bitwise AND of a register value and an optionally-shifted register value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Bitwise AND (shifted register), setting flags, performs a bitwise AND of a register value and an optionally-shifted register value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ANDS": + return { + "tooltip": "Bitwise AND active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Bitwise AND active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ANDV": + return { + "tooltip": "Bitwise AND horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as all ones.", + "html": "

Bitwise AND horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as all ones.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASR": + return { + "tooltip": "Arithmetic Shift Right (register) shifts a register value right by a variable number of bits, shifting in copies of its sign bit, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.", + "html": "

Arithmetic Shift Right (register) shifts a register value right by a variable number of bits, shifting in copies of its sign bit, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASR": + return { + "tooltip": "Arithmetic Shift Right (immediate) shifts a register value right by an immediate number of bits, shifting in copies of the sign bit in the upper bits and zeros in the lower bits, and writes the result to the destination register.", + "html": "

Arithmetic Shift Right (immediate) shifts a register value right by an immediate number of bits, shifting in copies of the sign bit in the upper bits and zeros in the lower bits, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASR": + return { + "tooltip": "Shift right by immediate, preserving the sign bit, each active element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right by immediate, preserving the sign bit, each active element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASR": + return { + "tooltip": "Shift right, preserving the sign bit, active elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right, preserving the sign bit, active elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASR": + return { + "tooltip": "Shift right, preserving the sign bit, active elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right, preserving the sign bit, active elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASR": + return { + "tooltip": "Shift right by immediate, preserving the sign bit, each element of the source vector, and place the results in the corresponding elements of the destination vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift right by immediate, preserving the sign bit, each element of the source vector, and place the results in the corresponding elements of the destination vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASR": + return { + "tooltip": "Shift right, preserving the sign bit, all elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and place the first in the corresponding elements of the destination vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. This instruction is unpredicated.", + "html": "

Shift right, preserving the sign bit, all elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and place the first in the corresponding elements of the destination vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASRD": + return { + "tooltip": "Shift right by immediate, preserving the sign bit, each active element of the source vector, and destructively place the results in the corresponding elements of the source vector. The result rounds toward zero as in a signed division. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right by immediate, preserving the sign bit, each active element of the source vector, and destructively place the results in the corresponding elements of the source vector. The result rounds toward zero as in a signed division. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASRR": + return { + "tooltip": "Reversed shift right, preserving the sign bit, active elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reversed shift right, preserving the sign bit, active elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ASRV": + return { + "tooltip": "Arithmetic Shift Right Variable shifts a register value right by a variable number of bits, shifting in copies of its sign bit, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.", + "html": "

Arithmetic Shift Right Variable shifts a register value right by a variable number of bits, shifting in copies of its sign bit, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AT": + return { + "tooltip": "Address Translate. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.", + "html": "

Address Translate. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AUTDA": + case "AUTDZA": + return { + "tooltip": "Authenticate Data address, using key A. This instruction authenticates a data address, using a modifier and key A.", + "html": "

Authenticate Data address, using key A. This instruction authenticates a data address, using a modifier and key A.

The address is in the general-purpose register that is specified by <Xd>.

The modifier is:

If the authentication passes, the upper bits of the address are restored to enable subsequent use of the address. For information on behavior if the authentication fails, see Faulting on pointer authentication.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AUTDB": + case "AUTDZB": + return { + "tooltip": "Authenticate Data address, using key B. This instruction authenticates a data address, using a modifier and key B.", + "html": "

Authenticate Data address, using key B. This instruction authenticates a data address, using a modifier and key B.

The address is in the general-purpose register that is specified by <Xd>.

The modifier is:

If the authentication passes, the upper bits of the address are restored to enable subsequent use of the address. For information on behavior if the authentication fails, see Faulting on pointer authentication.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AUTIA": + case "AUTIA1716": + case "AUTIASP": + case "AUTIAZ": + case "AUTIZA": + return { + "tooltip": "Authenticate Instruction address, using key A. This instruction authenticates an instruction address, using a modifier and key A.", + "html": "

Authenticate Instruction address, using key A. This instruction authenticates an instruction address, using a modifier and key A.

The address is:

The modifier is:

If the authentication passes, the upper bits of the address are restored to enable subsequent use of the address. For information on behavior if the authentication fails, see Faulting on pointer authentication.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AUTIB": + case "AUTIB1716": + case "AUTIBSP": + case "AUTIBZ": + case "AUTIZB": + return { + "tooltip": "Authenticate Instruction address, using key B. This instruction authenticates an instruction address, using a modifier and key B.", + "html": "

Authenticate Instruction address, using key B. This instruction authenticates an instruction address, using a modifier and key B.

The address is:

The modifier is:

If the authentication passes, the upper bits of the address are restored to enable subsequent use of the address. For information on behavior if the authentication fails, see Faulting on pointer authentication.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "AXFLAG": + return { + "tooltip": "Convert floating-point condition flags from Arm to external format. This instruction converts the state of the PSTATE.{N,Z,C,V} flags from a form representing the result of an Arm floating-point scalar compare instruction to an alternative representation required by some software.", + "html": "

Convert floating-point condition flags from Arm to external format. This instruction converts the state of the PSTATE.{N,Z,C,V} flags from a form representing the result of an Arm floating-point scalar compare instruction to an alternative representation required by some software.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "B.AL": + case "B.CC": + case "B.CS": + case "B.EQ": + case "B.GE": + case "B.GT": + case "B.HI": + case "B.LE": + case "B.LS": + case "B.LT": + case "B.MI": + case "B.NE": + case "B.PL": + case "B.VC": + case "B.VS": + case "B.cond": + return { + "tooltip": "Branch conditionally to a label at a PC-relative offset, with a hint that this is not a subroutine call or return.", + "html": "

Branch conditionally to a label at a PC-relative offset, with a hint that this is not a subroutine call or return.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "B": + return { + "tooltip": "Branch causes an unconditional branch to a label at a PC-relative offset, with a hint that this is not a subroutine call or return.", + "html": "

Branch causes an unconditional branch to a label at a PC-relative offset, with a hint that this is not a subroutine call or return.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BC.AL": + case "BC.CC": + case "BC.CS": + case "BC.EQ": + case "BC.GE": + case "BC.GT": + case "BC.HI": + case "BC.LE": + case "BC.LS": + case "BC.LT": + case "BC.MI": + case "BC.NE": + case "BC.PL": + case "BC.VC": + case "BC.VS": + case "BC.cond": + return { + "tooltip": "Branch Consistent conditionally to a label at a PC-relative offset, with a hint that this branch will behave very consistently and is very unlikely to change direction.", + "html": "

Branch Consistent conditionally to a label at a PC-relative offset, with a hint that this branch will behave very consistently and is very unlikely to change direction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BCAX": + return { + "tooltip": "Bit Clear and exclusive-OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive-OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.", + "html": "

Bit Clear and exclusive-OR performs a bitwise AND of the 128-bit vector in a source SIMD&FP register and the complement of the vector in another source SIMD&FP register, then performs a bitwise exclusive-OR of the resulting vector and the vector in a third source SIMD&FP register, and writes the result to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SHA3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BCAX": + return { + "tooltip": "Bitwise AND elements of the second source vector with the corresponding inverted elements of the third source vector, then exclusive OR the results with corresponding elements of the first source vector. The final results are destructively placed in the corresponding elements of the destination and first source vector. This instruction is unpredicated.", + "html": "

Bitwise AND elements of the second source vector with the corresponding inverted elements of the third source vector, then exclusive OR the results with corresponding elements of the first source vector. The final results are destructively placed in the corresponding elements of the destination and first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BDEP": + return { + "tooltip": "This instruction scatters the lowest-numbered contiguous bits within each element of the first source vector to the bit positions indicated by non-zero bits in the corresponding mask element of the second source vector, preserving their order, and set the bits corresponding to a zero mask bit to zero. This instruction is unpredicated.", + "html": "

This instruction scatters the lowest-numbered contiguous bits within each element of the first source vector to the bit positions indicated by non-zero bits in the corresponding mask element of the second source vector, preserving their order, and set the bits corresponding to a zero mask bit to zero. This instruction is unpredicated.

ID_AA64ZFR0_EL1.BitPerm indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BEXT": + return { + "tooltip": "This instruction gathers bits in each element of the first source vector from the bit positions indicated by non-zero bits in the corresponding mask element of the second source vector to the lowest-numbered contiguous bits of the corresponding destination element, preserving their order, and sets the remaining higher-numbered bits to zero. This instruction is unpredicated.", + "html": "

This instruction gathers bits in each element of the first source vector from the bit positions indicated by non-zero bits in the corresponding mask element of the second source vector to the lowest-numbered contiguous bits of the corresponding destination element, preserving their order, and sets the remaining higher-numbered bits to zero. This instruction is unpredicated.

ID_AA64ZFR0_EL1.BitPerm indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFADD": + return { + "tooltip": "Add active BFloat16 elements of the second source vector to corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active BFloat16 elements of the second source vector to corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFADD": + return { + "tooltip": "Add all BFloat16 elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector.", + "html": "

Add all BFloat16 elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFADD": + return { + "tooltip": "Destructively add all elements of the two or four source vectors to the corresponding BFloat16 elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Destructively add all elements of the two or four source vectors to the corresponding BFloat16 elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFC": + return { + "tooltip": "Bitfield Clear sets a bitfield of bits at bit position of the destination register to zero, leaving the other destination bits unchanged.", + "html": "

Bitfield Clear sets a bitfield of <width> bits at bit position <lsb> of the destination register to zero, leaving the other destination bits unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFCLAMP": + return { + "tooltip": "Clamp each BFloat16 element in the two or four destination vectors to between the BFloat16 minimum value in the corresponding element of the first source vector and the BFloat16 maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the two or four destination vectors.", + "html": "

Clamp each BFloat16 element in the two or four destination vectors to between the BFloat16 minimum value in the corresponding element of the first source vector and the BFloat16 maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the two or four destination vectors.

Regardless of the value of FPCR.AH, the behavior is as follows for each minimum number and maximum number operation:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFCLAMP": + return { + "tooltip": "Clamp each BFloat16 element in the destination vector to between the BFloat16 minimum value in the corresponding element of the first source vector and the BFloat16 maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the destination vector.", + "html": "

Clamp each BFloat16 element in the destination vector to between the BFloat16 minimum value in the corresponding element of the first source vector and the BFloat16 maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the destination vector.

Regardless of the value of FPCR.AH, the behavior is as follows for each mininum number and maximum number operation:

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFCVT": + return { + "tooltip": "Floating-point convert from single-precision to BFloat16 format (scalar) converts the single-precision floating-point value in the 32-bit SIMD&FP source register to BFloat16 format and writes the result in the 16-bit SIMD&FP destination register.", + "html": "

Floating-point convert from single-precision to BFloat16 format (scalar) converts the single-precision floating-point value in the 32-bit SIMD&FP source register to BFloat16 format and writes the result in the 16-bit SIMD&FP destination register.

ID_AA64ISAR1_EL1.BF16 indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFCVT": + return { + "tooltip": "Convert to BFloat16 from single-precision, each element of the two source vectors, and place the results in the half-width destination elements.", + "html": "

Convert to BFloat16 from single-precision, each element of the two source vectors, and place the results in the half-width destination elements.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFCVT": + return { + "tooltip": "Convert to BFloat16 from single-precision in each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert to BFloat16 from single-precision in each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

Since the result type is smaller than the input type, the results are zero-extended to fill each destination element.

ID_AA64ZFR0_EL1.BF16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFCVTN": + case "BFCVTN2": + return { + "tooltip": "Floating-point convert from single-precision to BFloat16 format (vector) reads each single-precision element in the SIMD&FP source vector, converts each value to BFloat16 format, and writes the results in the lower or upper half of the SIMD&FP destination vector. The result elements are half the width of the source elements.", + "html": "

Floating-point convert from single-precision to BFloat16 format (vector) reads each single-precision element in the SIMD&FP source vector, converts each value to BFloat16 format, and writes the results in the lower or upper half of the SIMD&FP destination vector. The result elements are half the width of the source elements.

The BFCVTN instruction writes the half-width results to the lower half of the destination vector and clears the upper half to zero, while the BFCVTN2 instruction writes the results to the upper half of the destination vector without affecting the other bits in the register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFCVTN": + return { + "tooltip": "Convert to BFloat16 from single-precision, each element of the two source vectors, and place the two-way interleaved results in the half-width destination elements.", + "html": "

Convert to BFloat16 from single-precision, each element of the two source vectors, and place the two-way interleaved results in the half-width destination elements.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFCVTNT": + return { + "tooltip": "Convert to BFloat16 from single-precision in each active floating-point element of the source vector, and place the results in the odd-numbered 16-bit elements of the destination vector, leaving the even-numbered elements unchanged. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert to BFloat16 from single-precision in each active floating-point element of the source vector, and place the results in the odd-numbered 16-bit elements of the destination vector, leaving the even-numbered elements unchanged. Inactive elements in the destination vector register remain unmodified.

ID_AA64ZFR0_EL1.BF16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFDOT": + return { + "tooltip": "BFloat16 floating-point dot product (vector, by element). This instruction delimits the source vectors into pairs of BFloat16 elements. The BFloat16 pair within the second source vector is specified using an immediate index. The index range is from 0 to 3 inclusive.", + "html": "

BFloat16 floating-point dot product (vector, by element). This instruction delimits the source vectors into pairs of BFloat16 elements. The BFloat16 pair within the second source vector is specified using an immediate index. The index range is from 0 to 3 inclusive.

If FEAT_EBF16 is not implemented or FPCR.EBF is 0, this instruction:

If FEAT_EBF16 is implemented and FPCR.EBF is 1, then this instruction:

Irrespective of FEAT_EBF16 and FPCR.EBF, this instruction:

ID_AA64ISAR1_EL1.BF16 indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFDOT": + return { + "tooltip": "BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of BFloat16 elements.", + "html": "

BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of BFloat16 elements.

If FEAT_EBF16 is not implemented or FPCR.EBF is 0, this instruction:

If FEAT_EBF16 is implemented and FPCR.EBF is 1, then this instruction:

Irrespective of FEAT_EBF16 and FPCR.EBF, this instruction:

ID_AA64ISAR1_EL1.BF16 indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFDOT": + return { + "tooltip": "This instruction delimits the source vectors into pairs of BFloat16 elements.", + "html": "

This instruction delimits the source vectors into pairs of BFloat16 elements.

If FEAT_EBF16 is not implemented or FPCR.EBF is 0, this instruction:

If FEAT_EBF16 is implemented and FPCR.EBF is 1, then this instruction:

Irrespective of FEAT_EBF16 and FPCR.EBF, this instruction:

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFDOT": + return { + "tooltip": "This instruction delimits the source vectors into pairs of BFloat16 elements. The BFloat16 pairs within the second source vector are specified using an immediate index which selects the same BFloat16 pair position within each 128-bit vector segment. The index range is from 0 to 3.", + "html": "

This instruction delimits the source vectors into pairs of BFloat16 elements. The BFloat16 pairs within the second source vector are specified using an immediate index which selects the same BFloat16 pair position within each 128-bit vector segment. The index range is from 0 to 3.

If FEAT_EBF16 is not implemented or FPCR.EBF is 0, this instruction:

If FEAT_EBF16 is implemented and FPCR.EBF is 1, then this instruction:

Irrespective of FEAT_EBF16 and FPCR.EBF, this instruction:

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFDOT": + return { + "tooltip": "The instruction computes the dot product of a pair of BF16 values held in the corresponding 32-bit elements of the two or four first source vectors and the indexed 32-bit element of the second source vector. The single-precision dot product results are destructively added to the corresponding single-precision elements of the ZA single-vector groups.", + "html": "

The instruction computes the dot product of a pair of BF16 values held in the corresponding 32-bit elements of the two or four first source vectors and the indexed 32-bit element of the second source vector. The single-precision dot product results are destructively added to the corresponding single-precision elements of the ZA single-vector groups.

The BF16 pairs within the second source vector are specified using an immediate index which selects the same BF16 pair position within each 128-bit vector segment. The element index range is from 0 to 3. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2 ZA-targeting BFloat16 numerical behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFDOT": + return { + "tooltip": "The instruction computes the dot product of a pair of BF16 values held in the corresponding 32-bit elements of the two or four first source vectors and the second source vector. The single-precision dot product results are destructively added to the corresponding single-precision elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The instruction computes the dot product of a pair of BF16 values held in the corresponding 32-bit elements of the two or four first source vectors and the second source vector. The single-precision dot product results are destructively added to the corresponding single-precision elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2 ZA-targeting BFloat16 numerical behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFDOT": + return { + "tooltip": "The instruction computes the dot product of a pair of BF16 values held in the corresponding 32-bit elements of the two or four first and second source vectors. The single-precision dot product results are destructively added to the corresponding single-precision elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The instruction computes the dot product of a pair of BF16 values held in the corresponding 32-bit elements of the two or four first and second source vectors. The single-precision dot product results are destructively added to the corresponding single-precision elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2 ZA-targeting BFloat16 numerical behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFI": + return { + "tooltip": "Bitfield Insert copies a bitfield of bits from the least significant bits of the source register to bit position of the destination register, leaving the other destination bits unchanged.", + "html": "

Bitfield Insert copies a bitfield of <width> bits from the least significant bits of the source register to bit position <lsb> of the destination register, leaving the other destination bits unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFM": + return { + "tooltip": "Bitfield Move is usually accessed via one of its aliases, which are always preferred for disassembly.", + "html": "

Bitfield Move is usually accessed via one of its aliases, which are always preferred for disassembly.

If <imms> is greater than or equal to <immr>, this copies a bitfield of (<imms>-<immr>+1) bits starting from bit position <immr> in the source register to the least significant bits of the destination register.

If <imms> is less than <immr>, this copies a bitfield of (<imms>+1) bits from the least significant bits of the source register to bit position (regsize-<immr>) of the destination register, where regsize is the destination register size of 32 or 64 bits.

In both cases the other bits of the destination register remain unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMAX": + return { + "tooltip": "Determine the maximum of BFloat16 elements of the second source vector and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the maximum of BFloat16 elements of the second source vector and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMAX": + return { + "tooltip": "Determine the maximum of BFloat16 elements of the two or four second source vectors and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the maximum of BFloat16 elements of the two or four second source vectors and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMAX": + return { + "tooltip": "Determine the maximum of active BFloat16 elements of the second source vector and corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.", + "html": "

Determine the maximum of active BFloat16 elements of the second source vector and corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMAXNM": + return { + "tooltip": "Determine the maximum number value of BFloat16 elements of the second source vector and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the maximum number value of BFloat16 elements of the second source vector and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMAXNM": + return { + "tooltip": "Determine the maximum number value of BFloat16 elements of the two or four second source vectors and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the maximum number value of BFloat16 elements of the two or four second source vectors and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMAXNM": + return { + "tooltip": "Determine the maximum number value of active BFloat16 elements of the second source vector and corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.", + "html": "

Determine the maximum number value of active BFloat16 elements of the second source vector and corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.

Regardless of the value of FPCR.AH, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMIN": + return { + "tooltip": "Determine the mininum of BFloat16 elements of the second source vector and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the mininum of BFloat16 elements of the second source vector and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMIN": + return { + "tooltip": "Determine the mininum of BFloat16 elements of the two or four second source vectors and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the mininum of BFloat16 elements of the two or four second source vectors and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMIN": + return { + "tooltip": "Determine the minimum of active BFloat16 elements of the second source vector and corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.", + "html": "

Determine the minimum of active BFloat16 elements of the second source vector and corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMINNM": + return { + "tooltip": "Determine the minimum number value of BFloat16 elements of the second source vector and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the minimum number value of BFloat16 elements of the second source vector and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMINNM": + return { + "tooltip": "Determine the minimum number value of BFloat16 elements of the two or four second source vectors and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the minimum number value of BFloat16 elements of the two or four second source vectors and the corresponding BFloat16 elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction follows SME2.1 non-widening BFloat16 numerical behaviors corresponding to instructions that place their results in two or four SVE Z vectors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMINNM": + return { + "tooltip": "Determine the minimum number value of active BFloat16 elements of the second source vector and corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.", + "html": "

Determine the minimum number value of active BFloat16 elements of the second source vector and corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.

Regardless of the value of FPCR.AH, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLA": + return { + "tooltip": "Multiply the corresponding active BFloat16 elements of the first and second source vectors and add to elements of the third source (addend) vector without intermediate rounding. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active BFloat16 elements of the first and second source vectors and add to elements of the third source (addend) vector without intermediate rounding. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLA": + return { + "tooltip": "Multiply all BFloat16 elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively added without intermediate rounding to the corresponding elements of the addend and destination vector.", + "html": "

Multiply all BFloat16 elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively added without intermediate rounding to the corresponding elements of the addend and destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLA": + return { + "tooltip": "Multiply the indexed element of the second source vector by the corresponding BFloat16 floating-point elements of the two or four first source vectors and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups.", + "html": "

Multiply the indexed element of the second source vector by the corresponding BFloat16 floating-point elements of the two or four first source vectors and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7, encoded in 3 bits. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLA": + return { + "tooltip": "Multiply the corresponding BFloat16 floating-point elements of the two or four first source vector with corresponding elements of the second source vector and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Multiply the corresponding BFloat16 floating-point elements of the two or four first source vector with corresponding elements of the second source vector and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLA": + return { + "tooltip": "Multiply the corresponding BFloat16 floating-point elements of the two or four first and second source vectors and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Multiply the corresponding BFloat16 floating-point elements of the two or four first and second source vectors and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLALB": + case "BFMLALT": + return { + "tooltip": "BFloat16 floating-point widening multiply-add long (by element) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first source vector, and the indexed element in the second source vector from Bfloat16 to single-precision format. The instruction then multiplies and adds these values without intermediate rounding to single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector.", + "html": "

BFloat16 floating-point widening multiply-add long (by element) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first source vector, and the indexed element in the second source vector from Bfloat16 to single-precision format. The instruction then multiplies and adds these values without intermediate rounding to single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector.

ID_AA64ISAR1_EL1.BF16 indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLALB": + case "BFMLALT": + return { + "tooltip": "BFloat16 floating-point widening multiply-add long (vector) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first and second source vectors from Bfloat16 to single-precision format. The instruction then multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors.", + "html": "

BFloat16 floating-point widening multiply-add long (vector) widens the even-numbered (bottom) or odd-numbered (top) 16-bit elements in the first and second source vectors from Bfloat16 to single-precision format. The instruction then multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors.

ID_AA64ISAR1_EL1.BF16 indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLAL": + return { + "tooltip": "This BFloat16 floating-point multiply-add long instruction widens all 16-bit BFloat16 elements in the one, two, or four first source vectors and the indexed element of the second source vector to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups.", + "html": "

This BFloat16 floating-point multiply-add long instruction widens all 16-bit BFloat16 elements in the one, two, or four first source vectors and the indexed element of the second source vector to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups.

The BF16 elements within the second source vector are specified using a 3-bit immediate index which selects the same element position within each 128-bit vector segment.

The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLAL": + return { + "tooltip": "This BFloat16 floating-point multiply-add long instruction widens all 16-bit BFloat16 elements in the one, two, or four first source vectors and the second source vector to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This BFloat16 floating-point multiply-add long instruction widens all 16-bit BFloat16 elements in the one, two, or four first source vectors and the second source vector to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLAL": + return { + "tooltip": "This BFloat16 floating-point multiply-add long instruction widens all 16-bit BFloat16 elements in the two or four first and second source vectors to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This BFloat16 floating-point multiply-add long instruction widens all 16-bit BFloat16 elements in the two or four first and second source vectors to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLALB": + return { + "tooltip": "This BFloat16 floating-point multiply-add long instruction widens the even-numbered BFloat16 elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors. This instruction is unpredicated.", + "html": "

This BFloat16 floating-point multiply-add long instruction widens the even-numbered BFloat16 elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors. This instruction is unpredicated.

ID_AA64ZFR0_EL1.BF16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLALB": + return { + "tooltip": "This BFloat16 floating-point multiply-add long instruction widens the even-numbered BFloat16 elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector. This instruction is unpredicated.", + "html": "

This BFloat16 floating-point multiply-add long instruction widens the even-numbered BFloat16 elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.BF16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLALT": + return { + "tooltip": "This BFloat16 floating-point multiply-add long instruction widens the odd-numbered BFloat16 elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors. This instruction is unpredicated.", + "html": "

This BFloat16 floating-point multiply-add long instruction widens the odd-numbered BFloat16 elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors. This instruction is unpredicated.

ID_AA64ZFR0_EL1.BF16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLALT": + return { + "tooltip": "This BFloat16 floating-point multiply-add long instruction widens the odd-numbered BFloat16 elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector. This instruction is unpredicated.", + "html": "

This BFloat16 floating-point multiply-add long instruction widens the odd-numbered BFloat16 elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.BF16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLS": + return { + "tooltip": "Multiply the corresponding active BFloat16 elements of the first and second source vectors and subtract from elements of the third source (addend) vector without intermediate rounding. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active BFloat16 elements of the first and second source vectors and subtract from elements of the third source (addend) vector without intermediate rounding. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLS": + return { + "tooltip": "Multiply all BFloat16 elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively subtracted without intermediate rounding from the corresponding elements of the addend and destination vector.", + "html": "

Multiply all BFloat16 elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively subtracted without intermediate rounding from the corresponding elements of the addend and destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLS": + return { + "tooltip": "Multiply the indexed element of the second source vector by the corresponding BFloat16 floating-point elements of the two or four first source vectors and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups.", + "html": "

Multiply the indexed element of the second source vector by the corresponding BFloat16 floating-point elements of the two or four first source vectors and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7, encoded in 3 bits. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLS": + return { + "tooltip": "Multiply the corresponding BFloat16 floating-point elements of the two or four first source vector with corresponding elements of the second source vector and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Multiply the corresponding BFloat16 floating-point elements of the two or four first source vector with corresponding elements of the second source vector and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLS": + return { + "tooltip": "Multiply the corresponding BFloat16 floating-point elements of the two or four first and second source vectors and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Multiply the corresponding BFloat16 floating-point elements of the two or four first and second source vectors and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLSL": + return { + "tooltip": "This BFloat16 floating-point multiply-subtract long instruction widens all 16-bit BFloat16 elements in the one, two, or four first source vectors and the indexed element of the second source vector to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups.", + "html": "

This BFloat16 floating-point multiply-subtract long instruction widens all 16-bit BFloat16 elements in the one, two, or four first source vectors and the indexed element of the second source vector to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups.

The BF16 elements within the second source vector are specified using a 3-bit immediate index which selects the same element position within each 128-bit vector segment.

The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLSL": + return { + "tooltip": "This BFloat16 floating-point multiply-subtract long instruction widens all 16-bit BFloat16 elements in the one, two, or four first source vectors and the second source vector to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This BFloat16 floating-point multiply-subtract long instruction widens all 16-bit BFloat16 elements in the one, two, or four first source vectors and the second source vector to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLSL": + return { + "tooltip": "This BFloat16 floating-point multiply-subtract long instruction widens all 16-bit BFloat16 elements in the two or four first and second source vectors to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This BFloat16 floating-point multiply-subtract long instruction widens all 16-bit BFloat16 elements in the two or four first and second source vectors to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLSLB": + return { + "tooltip": "This BFloat16 floating-point multiply-subtract long instruction widens the even-numbered BFloat16 elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors. This instruction is unpredicated.", + "html": "

This BFloat16 floating-point multiply-subtract long instruction widens the even-numbered BFloat16 elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLSLB": + return { + "tooltip": "This BFloat16 floating-point multiply-subtract long instruction widens the even-numbered BFloat16 elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector. This instruction is unpredicated.", + "html": "

This BFloat16 floating-point multiply-subtract long instruction widens the even-numbered BFloat16 elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLSLT": + return { + "tooltip": "This BFloat16 floating-point multiply-subtract long instruction widens the odd-numbered BFloat16 elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors. This instruction is unpredicated.", + "html": "

This BFloat16 floating-point multiply-subtract long instruction widens the odd-numbered BFloat16 elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the source vectors. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMLSLT": + return { + "tooltip": "This BFloat16 floating-point multiply-subtract long instruction widens the odd-numbered BFloat16 elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector. This instruction is unpredicated.", + "html": "

This BFloat16 floating-point multiply-subtract long instruction widens the odd-numbered BFloat16 elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding BFloat16 elements in the first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMMLA": + return { + "tooltip": "BFloat16 floating-point matrix multiply-accumulate into 2x2 matrix.", + "html": "

BFloat16 floating-point matrix multiply-accumulate into 2x2 matrix.

If FEAT_EBF16 is not implemented or FPCR.EBF is 0, this instruction:

If FEAT_EBF16 is implemented and FPCR.EBF is 1, then this instruction:

Irrespective of FEAT_EBF16 and FPCR.EBF, this instruction:

ID_AA64ISAR1_EL1.BF16 indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMMLA": + return { + "tooltip": "If FEAT_EBF16 is not implemented or FPCR.EBF is 0, this instruction", + "html": "

If FEAT_EBF16 is not implemented or FPCR.EBF is 0, this instruction:

If FEAT_EBF16 is implemented and FPCR.EBF is 1, then this instruction:

Irrespective of FEAT_EBF16 and FPCR.EBF, this instruction:

This instruction is unpredicated and vector length agnostic.

ID_AA64ZFR0_EL1.BF16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMOPA": + return { + "tooltip": "The BFloat16 floating-point sum of outer products and accumulate instruction works with a 32-bit element ZA tile.", + "html": "

The BFloat16 floating-point sum of outer products and accumulate instruction works with a 32-bit element ZA tile.

This instruction multiplies the SVLS\u00d72 sub-matrix of BFloat16 values held in the first source vector by the 2\u00d7SVLS sub-matrix of BFloat16 values in the second source vector.

Each source vector is independently predicated by a corresponding governing predicate. When a 16-bit source element is Inactive it is treated as having the value +0.0, but if both pairs of source vector elements that correspond to a 32-bit destination element contain Inactive elements, then the destination element remains unmodified.

The resulting SVLS\u00d7SVLS single-precision floating-point sum of outer products is then destructively added to the single-precision floating-point destination tile. This is equivalent to performing a 2-way dot product and accumulate to each of the destination tile elements.

Each 32-bit container of the first source vector holds 2 consecutive column elements of each row of a SVLS\u00d72 sub-matrix. Similarly, each 32-bit container of the second source vector holds 2 consecutive row elements of each column of a 2\u00d7SVLS sub-matrix.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMOPA": + return { + "tooltip": "This instruction works with a 16-bit element ZA tile.", + "html": "

This instruction works with a 16-bit element ZA tile.

These instructions generate an outer product of the first source vector and the second source vector. The first source is SVLH\u00d71 vector and the second source is 1\u00d7SVLH vector.

Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is Inactive the corresponding destination tile element remains unmodified.

The resulting outer product, SVLH\u00d7SVLH, is then destructively added to the destination tile. This is equivalent to performing a single multiply-accumulate to each of the destination tile elements.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMOPS": + return { + "tooltip": "The BFloat16 floating-point sum of outer products and subtract instruction works with a 32-bit element ZA tile.", + "html": "

The BFloat16 floating-point sum of outer products and subtract instruction works with a 32-bit element ZA tile.

This instruction multiplies the SVLS\u00d72 sub-matrix of BFloat16 values held in the first source vector by the 2\u00d7SVLS sub-matrix of BFloat16 values in the second source vector.

Each source vector is independently predicated by a corresponding governing predicate. When a 16-bit source element is Inactive it is treated as having the value +0.0, but if both pairs of source vector elements that correspond to a 32-bit destination element contain Inactive elements, then the destination element remains unmodified.

The resulting SVLS\u00d7SVLS single-precision floating-point sum of outer products is then destructively subtracted from the single-precision floating-point destination tile. This is equivalent to performing a 2-way dot product and subtract from each of the destination tile elements.

Each 32-bit container of the first source vector holds 2 consecutive column elements of each row of a SVLS\u00d72 sub-matrix. Similarly, each 32-bit container of the second source vector holds 2 consecutive row elements of each column of a 2\u00d7SVLS sub-matrix.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMOPS": + return { + "tooltip": "This instruction works with a 16-bit element ZA tile.", + "html": "

This instruction works with a 16-bit element ZA tile.

These instructions generate an outer product of the first source vector and the second source vector. The first source is SVLH\u00d71 vector and the second source is 1\u00d7SVLH vector.

Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is Inactive the corresponding destination tile element remains unmodified.

The resulting outer product, SVLH\u00d7SVLH, is then destructively subtracted from the destination tile. This is equivalent to performing a single multiply-subtract from each of the destination tile elements.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMUL": + return { + "tooltip": "Multiply active BFloat16 elements of the second source vector to corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply active BFloat16 elements of the second source vector to corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMUL": + return { + "tooltip": "Multiply all BFloat16 elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector.", + "html": "

Multiply all BFloat16 elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFMUL": + return { + "tooltip": "Multiply all BFloat16 elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment and place the results in the corresponding elements of the destination vector.", + "html": "

Multiply all BFloat16 elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment and place the results in the corresponding elements of the destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFSUB": + return { + "tooltip": "Subtract active BFloat16 elements of the second source vector from corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active BFloat16 elements of the second source vector from corresponding BFloat16 elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFSUB": + return { + "tooltip": "Subtract all BFloat16 elements of the second source vector from corresponding BFloat16 elements of the first source vector and place the results in the corresponding elements of the destination vector.", + "html": "

Subtract all BFloat16 elements of the second source vector from corresponding BFloat16 elements of the first source vector and place the results in the corresponding elements of the destination vector.

This instruction follows SVE2.1 non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFSUB": + return { + "tooltip": "Destructively subtract all elements of the two or four source vectors from the corresponding BFloat16 elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Destructively subtract all elements of the two or four source vectors from the corresponding BFloat16 elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFVDOT": + return { + "tooltip": "The instruction computes the sum-of-products of each vertical pair of BFloat16 values in the corresponding elements of the two first source vectors with the pair of BFloat16 values in the indexed 32-bit group of the corresponding 128-bit segment of the second source vector. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the two ZA single-vector groups.", + "html": "

The instruction computes the sum-of-products of each vertical pair of BFloat16 values in the corresponding elements of the two first source vectors with the pair of BFloat16 values in the indexed 32-bit group of the corresponding 128-bit segment of the second source vector. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the two ZA single-vector groups.

The BF16 pairs within the second source vector are specified using an immediate index which selects the same BF16 pair position within each 128-bit vector segment. The element index range is from 0 to 3.

The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the ZA operand consists of two ZA single-vector groups. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME2 ZA-targeting BFloat16 numerical behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BFXIL": + return { + "tooltip": "Bitfield Extract and Insert Low copies a bitfield of bits starting from bit position in the source register to the least significant bits of the destination register, leaving the other destination bits unchanged.", + "html": "

Bitfield Extract and Insert Low copies a bitfield of <width> bits starting from bit position <lsb> in the source register to the least significant bits of the destination register, leaving the other destination bits unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BGRP": + return { + "tooltip": "This instruction separates bits in each element of the first source vector by gathering from the bit positions indicated by non-zero bits in the corresponding mask element of the second source vector to the lowest-numbered contiguous bits of the corresponding destination element, and from positions indicated by zero bits to the highest-numbered bits of the destination element, preserving the bit order within each group. This instruction is unpredicated.", + "html": "

This instruction separates bits in each element of the first source vector by gathering from the bit positions indicated by non-zero bits in the corresponding mask element of the second source vector to the lowest-numbered contiguous bits of the corresponding destination element, and from positions indicated by zero bits to the highest-numbered bits of the destination element, preserving the bit order within each group. This instruction is unpredicated.

ID_AA64ZFR0_EL1.BitPerm indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIC": + return { + "tooltip": "Bitwise bit Clear (vector, immediate). This instruction reads each vector element from the destination SIMD&FP register, performs a bitwise AND between each result and the complement of an immediate constant, places the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Bitwise bit Clear (vector, immediate). This instruction reads each vector element from the destination SIMD&FP register, performs a bitwise AND between each result and the complement of an immediate constant, places the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIC": + return { + "tooltip": "Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.", + "html": "

Bitwise bit Clear (vector, register). This instruction performs a bitwise AND between the first source SIMD&FP register and the complement of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIC": + return { + "tooltip": "Bitwise clear bits using immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.", + "html": "

Bitwise clear bits using immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIC": + return { + "tooltip": "Bitwise Bit Clear (shifted register) performs a bitwise AND of a register value and the complement of an optionally-shifted register value, and writes the result to the destination register.", + "html": "

Bitwise Bit Clear (shifted register) performs a bitwise AND of a register value and the complement of an optionally-shifted register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIC": + return { + "tooltip": "Bitwise AND inverted active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Bitwise AND inverted active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIC": + return { + "tooltip": "Bitwise AND inverted active elements of the second source vector with corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Bitwise AND inverted active elements of the second source vector with corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIC": + return { + "tooltip": "Bitwise AND inverted all elements of the second source vector with corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Bitwise AND inverted all elements of the second source vector with corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BICS": + return { + "tooltip": "Bitwise Bit Clear (shifted register), setting flags, performs a bitwise AND of a register value and the complement of an optionally-shifted register value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Bitwise Bit Clear (shifted register), setting flags, performs a bitwise AND of a register value and the complement of an optionally-shifted register value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BICS": + return { + "tooltip": "Bitwise AND inverted active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Bitwise AND inverted active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIF": + return { + "tooltip": "Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&FP register into the destination SIMD&FP register if the corresponding bit of the second source SIMD&FP register is 0, otherwise leaves the bit in the destination register unchanged.", + "html": "

Bitwise Insert if False. This instruction inserts each bit from the first source SIMD&FP register into the destination SIMD&FP register if the corresponding bit of the second source SIMD&FP register is 0, otherwise leaves the bit in the destination register unchanged.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BIT": + return { + "tooltip": "Bitwise Insert if True. This instruction inserts each bit from the first source SIMD&FP register into the SIMD&FP destination register if the corresponding bit of the second source SIMD&FP register is 1, otherwise leaves the bit in the destination register unchanged.", + "html": "

Bitwise Insert if True. This instruction inserts each bit from the first source SIMD&FP register into the SIMD&FP destination register if the corresponding bit of the second source SIMD&FP register is 1, otherwise leaves the bit in the destination register unchanged.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BL": + return { + "tooltip": "Branch with Link branches to a PC-relative offset, setting the register X30 to PC+4. It provides a hint that this is a subroutine call.", + "html": "

Branch with Link branches to a PC-relative offset, setting the register X30 to PC+4. It provides a hint that this is a subroutine call.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BLR": + return { + "tooltip": "Branch with Link to Register calls a subroutine at an address in a register, setting register X30 to PC+4.", + "html": "

Branch with Link to Register calls a subroutine at an address in a register, setting register X30 to PC+4.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BLRAA": + case "BLRAAZ": + case "BLRAB": + case "BLRABZ": + return { + "tooltip": "Branch with Link to Register, with pointer authentication. This instruction authenticates the address in the general-purpose register that is specified by , using a modifier and the specified key, and calls a subroutine at the authenticated address, setting register X30 to PC+4.", + "html": "

Branch with Link to Register, with pointer authentication. This instruction authenticates the address in the general-purpose register that is specified by <Xn>, using a modifier and the specified key, and calls a subroutine at the authenticated address, setting register X30 to PC+4.

The modifier is:

Key A is used for BLRAA and BLRAAZ. Key B is used for BLRAB and BLRABZ.

If the authentication passes, the PE continues execution at the target of the branch. For information on behavior if the authentication fails, see Faulting on pointer authentication.

The authenticated address is not written back to the general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BMOPA": + return { + "tooltip": "This instruction works with 32-bit element ZA tile. This instruction generates an outer product of the first source SVLS\u00d71 vector and the second source 1\u00d7SVLS vector. Each outer product element is obtained as population count of the bitwise XNOR result of the corresponding 32-bit elements of the first source vector and the second source vector. Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is inactive the corresponding destination tile element remains unmodified. The resulting SVLS\u00d7SVLS product is then destructively added to the destination tile.", + "html": "

This instruction works with 32-bit element ZA tile. This instruction generates an outer product of the first source SVLS\u00d71 vector and the second source 1\u00d7SVLS vector. Each outer product element is obtained as population count of the bitwise XNOR result of the corresponding 32-bit elements of the first source vector and the second source vector. Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is inactive the corresponding destination tile element remains unmodified. The resulting SVLS\u00d7SVLS product is then destructively added to the destination tile.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BMOPS": + return { + "tooltip": "This instruction works with 32-bit element ZA tile. This instruction generates an outer product of the first source SVLS\u00d71 vector and the second source 1\u00d7SVLS vector. Each outer product element is obtained as population count of the bitwise XNOR result of the corresponding 32-bit elements of the first source vector and the second source vector. Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is inactive the corresponding destination tile element remains unmodified. The resulting SVLS\u00d7SVLS product is then destructively subtracted from the destination tile.", + "html": "

This instruction works with 32-bit element ZA tile. This instruction generates an outer product of the first source SVLS\u00d71 vector and the second source 1\u00d7SVLS vector. Each outer product element is obtained as population count of the bitwise XNOR result of the corresponding 32-bit elements of the first source vector and the second source vector. Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is inactive the corresponding destination tile element remains unmodified. The resulting SVLS\u00d7SVLS product is then destructively subtracted from the destination tile.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BR": + return { + "tooltip": "Branch to Register branches unconditionally to an address in a register, with a hint that this is not a subroutine return.", + "html": "

Branch to Register branches unconditionally to an address in a register, with a hint that this is not a subroutine return.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRAA": + case "BRAAZ": + case "BRAB": + case "BRABZ": + return { + "tooltip": "Branch to Register, with pointer authentication. This instruction authenticates the address in the general-purpose register that is specified by , using a modifier and the specified key, and branches to the authenticated address.", + "html": "

Branch to Register, with pointer authentication. This instruction authenticates the address in the general-purpose register that is specified by <Xn>, using a modifier and the specified key, and branches to the authenticated address.

The modifier is:

Key A is used for BRAA and BRAAZ. Key B is used for BRAB and BRABZ.

If the authentication passes, the PE continues execution at the target of the branch. For information on behavior if the authentication fails, see Faulting on pointer authentication.

The authenticated address is not written back to the general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRB": + return { + "tooltip": "Branch Record Buffer. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.", + "html": "

Branch Record Buffer. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRK": + return { + "tooltip": "Breakpoint instruction. A BRK instruction generates a Breakpoint Instruction exception. The PE records the exception in ESR_ELx, using the EC value 0x3c, and captures the value of the immediate argument in ESR_ELx.ISS.", + "html": "

Breakpoint instruction. A BRK instruction generates a Breakpoint Instruction exception. The PE records the exception in ESR_ELx, using the EC value 0x3c, and captures the value of the immediate argument in ESR_ELx.ISS.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKA": + return { + "tooltip": "Sets destination predicate elements up to and including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register remain unmodified or are set to zero, depending on whether merging or zeroing predication is selected. Does not set the condition flags.", + "html": "

Sets destination predicate elements up to and including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register remain unmodified or are set to zero, depending on whether merging or zeroing predication is selected. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKAS": + return { + "tooltip": "Sets destination predicate elements up to and including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Sets destination predicate elements up to and including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKB": + return { + "tooltip": "Sets destination predicate elements up to but not including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register remain unmodified or are set to zero, depending on whether merging or zeroing predication is selected. Does not set the condition flags.", + "html": "

Sets destination predicate elements up to but not including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register remain unmodified or are set to zero, depending on whether merging or zeroing predication is selected. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKBS": + return { + "tooltip": "Sets destination predicate elements up to but not including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Sets destination predicate elements up to but not including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKN": + return { + "tooltip": "If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise leaves the destination and second source predicate unchanged. Does not set the condition flags.", + "html": "

If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise leaves the destination and second source predicate unchanged. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKNS": + return { + "tooltip": "If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise leaves the destination and second source predicate unchanged. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise leaves the destination and second source predicate unchanged. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKPA": + return { + "tooltip": "If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise sets destination predicate elements up to and including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise sets destination predicate elements up to and including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKPAS": + return { + "tooltip": "If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise sets destination predicate elements up to and including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise sets destination predicate elements up to and including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKPB": + return { + "tooltip": "If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise sets destination predicate elements up to but not including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise sets destination predicate elements up to but not including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BRKPBS": + return { + "tooltip": "If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise sets destination predicate elements up to but not including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

If the last active element of the first source predicate is false then set the destination predicate to all-false. Otherwise sets destination predicate elements up to but not including the first active and true source element to true, then sets subsequent elements to false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BSL1N": + return { + "tooltip": "Selects bits from the inverted first source vector where the corresponding bit in the third source vector is '1', and from the second source vector where the corresponding bit in the third source vector is '0'. The result is placed destructively in the destination and first source vector. This instruction is unpredicated.", + "html": "

Selects bits from the inverted first source vector where the corresponding bit in the third source vector is '1', and from the second source vector where the corresponding bit in the third source vector is '0'. The result is placed destructively in the destination and first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BSL2N": + return { + "tooltip": "Selects bits from the first source vector where the corresponding bit in the third source vector is '1', and from the inverted second source vector where the corresponding bit in the third source vector is '0'. The result is placed destructively in the destination and first source vector. This instruction is unpredicated.", + "html": "

Selects bits from the first source vector where the corresponding bit in the third source vector is '1', and from the inverted second source vector where the corresponding bit in the third source vector is '0'. The result is placed destructively in the destination and first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BSL": + return { + "tooltip": "Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.", + "html": "

Bitwise Select. This instruction sets each bit in the destination SIMD&FP register to the corresponding bit from the first source SIMD&FP register when the original destination bit was 1, otherwise from the second source SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BSL": + return { + "tooltip": "Selects bits from the first source vector where the corresponding bit in the third source vector is '1', and from the second source vector where the corresponding bit in the third source vector is '0'. The result is placed destructively in the destination and first source vector. This instruction is unpredicated.", + "html": "

Selects bits from the first source vector where the corresponding bit in the third source vector is '1', and from the second source vector where the corresponding bit in the third source vector is '0'. The result is placed destructively in the destination and first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "BTI": + return { + "tooltip": "Branch Target Identification. A BTI instruction is used to guard against the execution of instructions which are not the intended target of a branch.", + "html": "

Branch Target Identification. A BTI instruction is used to guard against the execution of instructions which are not the intended target of a branch.

Outside of a guarded memory region, a BTI instruction executes as a NOP. Within a guarded memory region while PSTATE.BTYPE != 0b00, a BTI instruction compatible with the current value of PSTATE.BTYPE will not generate a Branch Target Exception and will allow execution of subsequent instructions within the memory region.

The operand <targets> passed to a BTI instruction determines the values of PSTATE.BTYPE which the BTI instruction is compatible with.

Within a guarded memory region, when PSTATE.BTYPE != 0b00, all instructions will generate a Branch Target Exception, other than BRK, BTI, HLT, PACIASP, and PACIBSP, which might not. See the individual instructions for more information.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CADD": + return { + "tooltip": "Add the real and imaginary components of the integral complex numbers from the first source vector to the complex numbers from the second source vector which have first been rotated by 90 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation, equivalent to multiplying the complex numbers in the second source vector by \u00b1j beforehand. Destructively place the results in the corresponding elements of the first source vector. This instruction is unpredicated.", + "html": "

Add the real and imaginary components of the integral complex numbers from the first source vector to the complex numbers from the second source vector which have first been rotated by 90 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation, equivalent to multiplying the complex numbers in the second source vector by \u00b1j beforehand. Destructively place the results in the corresponding elements of the first source vector. This instruction is unpredicated.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CAS": + case "CASA": + case "CASAL": + case "CASL": + return { + "tooltip": "Compare and Swap word or doubleword in memory reads a 32-bit word or 64-bit doubleword from memory, and compares it against the value held in a first register. If the comparison is equal, the value in a second register is written to memory. If the write is performed, the read and write occur atomically such that no other modification of the memory location can take place between the read and write.", + "html": "

Compare and Swap word or doubleword in memory reads a 32-bit word or 64-bit doubleword from memory, and compares it against the value held in a first register. If the comparison is equal, the value in a second register is written to memory. If the write is performed, the read and write occur atomically such that no other modification of the memory location can take place between the read and write.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

The architecture permits that the data read clears any exclusive monitors associated with that location, even if the compare subsequently fails.

If the instruction generates a synchronous Data Abort, the register which is compared and loaded, that is <Ws>, or <Xs>, is restored to the value held in the register before the instruction was executed.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CASAB": + case "CASALB": + case "CASB": + case "CASLB": + return { + "tooltip": "Compare and Swap byte in memory reads an 8-bit byte from memory, and compares it against the value held in a first register. If the comparison is equal, the value in a second register is written to memory. If the write is performed, the read and write occur atomically such that no other modification of the memory location can take place between the read and write.", + "html": "

Compare and Swap byte in memory reads an 8-bit byte from memory, and compares it against the value held in a first register. If the comparison is equal, the value in a second register is written to memory. If the write is performed, the read and write occur atomically such that no other modification of the memory location can take place between the read and write.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

The architecture permits that the data read clears any exclusive monitors associated with that location, even if the compare subsequently fails.

If the instruction generates a synchronous Data Abort, the register which is compared and loaded, that is <Ws>, is restored to the values held in the register before the instruction was executed.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CASAH": + case "CASALH": + case "CASH": + case "CASLH": + return { + "tooltip": "Compare and Swap halfword in memory reads a 16-bit halfword from memory, and compares it against the value held in a first register. If the comparison is equal, the value in a second register is written to memory. If the write is performed, the read and write occur atomically such that no other modification of the memory location can take place between the read and write.", + "html": "

Compare and Swap halfword in memory reads a 16-bit halfword from memory, and compares it against the value held in a first register. If the comparison is equal, the value in a second register is written to memory. If the write is performed, the read and write occur atomically such that no other modification of the memory location can take place between the read and write.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

The architecture permits that the data read clears any exclusive monitors associated with that location, even if the compare subsequently fails.

If the instruction generates a synchronous Data Abort, the register which is compared and loaded, that is <Ws>, is restored to the values held in the register before the instruction was executed.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CASP": + case "CASPA": + case "CASPAL": + case "CASPL": + return { + "tooltip": "Compare and Swap Pair of words or doublewords in memory reads a pair of 32-bit words or 64-bit doublewords from memory, and compares them against the values held in the first pair of registers. If the comparison is equal, the values in the second pair of registers are written to memory. If the writes are performed, the reads and writes occur atomically such that no other modification of the memory location can take place between the reads and writes.", + "html": "

Compare and Swap Pair of words or doublewords in memory reads a pair of 32-bit words or 64-bit doublewords from memory, and compares them against the values held in the first pair of registers. If the comparison is equal, the values in the second pair of registers are written to memory. If the writes are performed, the reads and writes occur atomically such that no other modification of the memory location can take place between the reads and writes.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

The architecture permits that the data read clears any exclusive monitors associated with that location, even if the compare subsequently fails.

If the instruction generates a synchronous Data Abort, the registers which are compared and loaded, that is <Ws> and <W(s+1)>, or <Xs> and <X(s+1)>, are restored to the values held in the registers before the instruction was executed.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CBNZ": + return { + "tooltip": "Compare and Branch on Nonzero compares the value in a register with zero, and conditionally branches to a label at a PC-relative offset if the comparison is not equal. It provides a hint that this is not a subroutine call or return. This instruction does not affect the condition flags.", + "html": "

Compare and Branch on Nonzero compares the value in a register with zero, and conditionally branches to a label at a PC-relative offset if the comparison is not equal. It provides a hint that this is not a subroutine call or return. This instruction does not affect the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CBZ": + return { + "tooltip": "Compare and Branch on Zero compares the value in a register with zero, and conditionally branches to a label at a PC-relative offset if the comparison is equal. It provides a hint that this is not a subroutine call or return. This instruction does not affect condition flags.", + "html": "

Compare and Branch on Zero compares the value in a register with zero, and conditionally branches to a label at a PC-relative offset if the comparison is equal. It provides a hint that this is not a subroutine call or return. This instruction does not affect condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CCMN": + return { + "tooltip": "Conditional Compare Negative (immediate) sets the value of the condition flags to the result of the comparison of a register value and a negated immediate value if the condition is TRUE, and an immediate value otherwise.", + "html": "

Conditional Compare Negative (immediate) sets the value of the condition flags to the result of the comparison of a register value and a negated immediate value if the condition is TRUE, and an immediate value otherwise.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CCMN": + return { + "tooltip": "Conditional Compare Negative (register) sets the value of the condition flags to the result of the comparison of a register value and the inverse of another register value if the condition is TRUE, and an immediate value otherwise.", + "html": "

Conditional Compare Negative (register) sets the value of the condition flags to the result of the comparison of a register value and the inverse of another register value if the condition is TRUE, and an immediate value otherwise.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CCMP": + return { + "tooltip": "Conditional Compare (immediate) sets the value of the condition flags to the result of the comparison of a register value and an immediate value if the condition is TRUE, and an immediate value otherwise.", + "html": "

Conditional Compare (immediate) sets the value of the condition flags to the result of the comparison of a register value and an immediate value if the condition is TRUE, and an immediate value otherwise.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CCMP": + return { + "tooltip": "Conditional Compare (register) sets the value of the condition flags to the result of the comparison of two registers if the condition is TRUE, and an immediate value otherwise.", + "html": "

Conditional Compare (register) sets the value of the condition flags to the result of the comparison of two registers if the condition is TRUE, and an immediate value otherwise.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CDOT": + return { + "tooltip": "The complex integer dot product instructions delimit the source vectors into pairs of 8-bit or 16-bit signed integer complex numbers. Within each pair, the complex numbers in the first source vector are multiplied by the corresponding complex numbers in the second source vector and the resulting wide real or wide imaginary part of the product is accumulated into a 32-bit or 64-bit destination vector element which overlaps all four of the elements that comprise a pair of complex number values in the first source vector.", + "html": "

The complex integer dot product instructions delimit the source vectors into pairs of 8-bit or 16-bit signed integer complex numbers. Within each pair, the complex numbers in the first source vector are multiplied by the corresponding complex numbers in the second source vector and the resulting wide real or wide imaginary part of the product is accumulated into a 32-bit or 64-bit destination vector element which overlaps all four of the elements that comprise a pair of complex number values in the first source vector.

As a result each instruction implicitly deinterleaves the real and imaginary components of their complex number inputs, so that the destination vector accumulates 4\u00d7wide real sums or 4\u00d7wide imaginary sums.

The complex numbers in the second source vector are rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation, by performing the following transformations prior to the dot product operations:

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CDOT": + return { + "tooltip": "The complex integer dot product instructions delimit the source vectors into pairs of 8-bit or 16-bit signed integer complex numbers. Within each pair, the complex numbers in the first source vector are multiplied by the corresponding complex numbers in the second source vector and the resulting wide real or wide imaginary part of the product is accumulated into a 32-bit or 64-bit destination vector element which overlaps all four of the elements that comprise a pair of complex number values in the first source vector.", + "html": "

The complex integer dot product instructions delimit the source vectors into pairs of 8-bit or 16-bit signed integer complex numbers. Within each pair, the complex numbers in the first source vector are multiplied by the corresponding complex numbers in the second source vector and the resulting wide real or wide imaginary part of the product is accumulated into a 32-bit or 64-bit destination vector element which overlaps all four of the elements that comprise a pair of complex number values in the first source vector.

As a result each instruction implicitly deinterleaves the real and imaginary components of their complex number inputs, so that the destination vector accumulates 4\u00d7wide real sums or 4\u00d7wide imaginary sums.

The complex numbers in the second source vector are rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation, by performing the following transformations prior to the dot product operations:

The indexed form of these instructions select a single pair of complex numbers within each 128-bit segment of the second source vector as the multiplier for all pairs of complex numbers within the corresponding 128-bit segment of the first source vector. The complex number pairs within the second source vector are specified using an immediate index which selects the same complex number pair position within each 128-bit vector segment. The index range is from 0 to one less than the number of complex number pairs per 128-bit segment, encoded in 1 or 2 bits depending on the size of the complex number pair.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CFINV": + return { + "tooltip": "Invert Carry Flag. This instruction inverts the value of the PSTATE.C flag.", + "html": "

Invert Carry Flag. This instruction inverts the value of the PSTATE.C flag.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CFP": + return { + "tooltip": "Control Flow Prediction Restriction by Context prevents control flow predictions that predict execution addresses based on information gathered from earlier execution within a particular execution context. Control flow predictions determined by the actions of code in the target execution context or contexts appearing in program order before the instruction cannot be used to exploitatively control speculative execution occurring after the instruction is complete and synchronized.", + "html": "

Control Flow Prediction Restriction by Context prevents control flow predictions that predict execution addresses based on information gathered from earlier execution within a particular execution context. Control flow predictions determined by the actions of code in the target execution context or contexts appearing in program order before the instruction cannot be used to exploitatively control speculative execution occurring after the instruction is complete and synchronized.

For more information, see CFP RCTX, Control Flow Prediction Restriction by Context.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CHKFEAT": + return { + "tooltip": "Check feature status. This instruction indicates the status of features.", + "html": "

Check feature status. This instruction indicates the status of features.

If FEAT_CHK is not implemented, this instruction executes as a NOP.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CINC": + return { + "tooltip": "Conditional Increment returns, in the destination register, the value of the source register incremented by 1 if the condition is TRUE, and otherwise returns the value of the source register.", + "html": "

Conditional Increment returns, in the destination register, the value of the source register incremented by 1 if the condition is TRUE, and otherwise returns the value of the source register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CINV": + return { + "tooltip": "Conditional Invert returns, in the destination register, the bitwise inversion of the value of the source register if the condition is TRUE, and otherwise returns the value of the source register.", + "html": "

Conditional Invert returns, in the destination register, the bitwise inversion of the value of the source register if the condition is TRUE, and otherwise returns the value of the source register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLASTA": + return { + "tooltip": "From the source vector register extract the element after the last active element, or if the last active element is the final element extract element zero, and then zero-extend that element to destructively place in the destination and first source general-purpose register. If there are no active elements then destructively zero-extend the least significant element-size bits of the destination and first source general-purpose register.", + "html": "

From the source vector register extract the element after the last active element, or if the last active element is the final element extract element zero, and then zero-extend that element to destructively place in the destination and first source general-purpose register. If there are no active elements then destructively zero-extend the least significant element-size bits of the destination and first source general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLASTA": + return { + "tooltip": "From the source vector register extract the element after the last active element, or if the last active element is the final element extract element zero, and then zero-extend that element to destructively place in the destination and first source SIMD & floating-point scalar register. If there are no active elements then destructively zero-extend the least significant element-size bits of the destination and first source SIMD & floating-point scalar register.", + "html": "

From the source vector register extract the element after the last active element, or if the last active element is the final element extract element zero, and then zero-extend that element to destructively place in the destination and first source SIMD & floating-point scalar register. If there are no active elements then destructively zero-extend the least significant element-size bits of the destination and first source SIMD & floating-point scalar register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLASTA": + return { + "tooltip": "From the second source vector register extract the element after the last active element, or if the last active element is the final element extract element zero, and then replicate that element to destructively fill the destination and first source vector.", + "html": "

From the second source vector register extract the element after the last active element, or if the last active element is the final element extract element zero, and then replicate that element to destructively fill the destination and first source vector.

If there are no active elements then leave the destination and source vector unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLASTB": + return { + "tooltip": "From the source vector register extract the last active element, and then zero-extend that element to destructively place in the destination and first source general-purpose register. If there are no active elements then destructively zero-extend the least significant element-size bits of the destination and first source general-purpose register.", + "html": "

From the source vector register extract the last active element, and then zero-extend that element to destructively place in the destination and first source general-purpose register. If there are no active elements then destructively zero-extend the least significant element-size bits of the destination and first source general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLASTB": + return { + "tooltip": "From the source vector register extract the last active element, and then zero-extend that element to destructively place in the destination and first source SIMD & floating-point scalar register. If there are no active elements then destructively zero-extend the least significant element-size bits of the destination and first source SIMD & floating-point scalar register.", + "html": "

From the source vector register extract the last active element, and then zero-extend that element to destructively place in the destination and first source SIMD & floating-point scalar register. If there are no active elements then destructively zero-extend the least significant element-size bits of the destination and first source SIMD & floating-point scalar register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLASTB": + return { + "tooltip": "From the second source vector register extract the last active element, and then replicate that element to destructively fill the destination and first source vector.", + "html": "

From the second source vector register extract the last active element, and then replicate that element to destructively fill the destination and first source vector.

If there are no active elements then leave the destination and source vector unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLRBHB": + return { + "tooltip": "Clear Branch History clears the branch history for the current context to the extent that branch history information created before the CLRBHB instruction cannot be used by code before the CLRBHB instruction to exploitatively control the execution of any indirect branches in code in the current context that appear in program order after the instruction.", + "html": "

Clear Branch History clears the branch history for the current context to the extent that branch history information created before the CLRBHB instruction cannot be used by code before the CLRBHB instruction to exploitatively control the execution of any indirect branches in code in the current context that appear in program order after the instruction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLREX": + return { + "tooltip": "Clear Exclusive clears the local monitor of the executing PE.", + "html": "

Clear Exclusive clears the local monitor of the executing PE.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLS": + return { + "tooltip": "Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.", + "html": "

Count Leading Sign bits (vector). This instruction counts the number of consecutive bits following the most significant bit that are the same as the most significant bit in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The count does not include the most significant bit itself.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLS": + return { + "tooltip": "Count Leading Sign bits counts the number of leading bits of the source register that have the same value as the most significant bit of the register, and writes the result to the destination register. This count does not include the most significant bit of the source register.", + "html": "

Count Leading Sign bits counts the number of leading bits of the source register that have the same value as the most significant bit of the register, and writes the result to the destination register. This count does not include the most significant bit of the source register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLS": + return { + "tooltip": "Count the number of consecutive sign bits, starting from the most significant bit in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Count the number of consecutive sign bits, starting from the most significant bit in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLZ": + return { + "tooltip": "Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Count Leading Zero bits (vector). This instruction counts the number of consecutive zeros, starting from the most significant bit, in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLZ": + return { + "tooltip": "Count Leading Zeros counts the number of consecutive binary zero bits, starting from the most significant bit in the source register, and places the count in the destination register.", + "html": "

Count Leading Zeros counts the number of consecutive binary zero bits, starting from the most significant bit in the source register, and places the count in the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CLZ": + return { + "tooltip": "Count the number of consecutive binary zero bits, starting from the most significant bit in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Count the number of consecutive binary zero bits, starting from the most significant bit in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMEQ": + return { + "tooltip": "Compare bitwise Equal (vector). This instruction compares each vector element from the first source SIMD&FP register with the corresponding vector element from the second source SIMD&FP register, and if the comparison is equal sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare bitwise Equal (vector). This instruction compares each vector element from the first source SIMD&FP register with the corresponding vector element from the second source SIMD&FP register, and if the comparison is equal sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMEQ": + return { + "tooltip": "Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare bitwise Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMGE": + return { + "tooltip": "Compare signed Greater than or Equal (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first signed integer value is greater than or equal to the second signed integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare signed Greater than or Equal (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first signed integer value is greater than or equal to the second signed integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMGE": + return { + "tooltip": "Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare signed Greater than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMGT": + return { + "tooltip": "Compare signed Greater than (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first signed integer value is greater than the second signed integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare signed Greater than (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first signed integer value is greater than the second signed integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMGT": + return { + "tooltip": "Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare signed Greater than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMHI": + return { + "tooltip": "Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare unsigned Higher (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMHS": + return { + "tooltip": "Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare unsigned Higher or Same (vector). This instruction compares each vector element in the first source SIMD&FP register with the corresponding vector element in the second source SIMD&FP register and if the first unsigned integer value is greater than or equal to the second unsigned integer value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMLA": + return { + "tooltip": "Multiply the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the integral numbers in the first source vector by the corresponding complex number in the second source vector rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.", + "html": "

Multiply the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the integral numbers in the first source vector by the corresponding complex number in the second source vector rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.

Then add the products to the corresponding components of the complex numbers in the addend vector. Destructively place the results in the corresponding elements of the addend vector. This instruction is unpredicated.

These transformations permit the creation of a variety of multiply-add and multiply-subtract operations on complex numbers by combining two of these instructions with the same vector operands but with rotations that are 90 degrees apart.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMLA": + return { + "tooltip": "Multiply the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the integral numbers in each 128-bit segment of the first source vector by the specified complex number in the corresponding the second source vector segment rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.", + "html": "

Multiply the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the integral numbers in each 128-bit segment of the first source vector by the specified complex number in the corresponding the second source vector segment rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.

Then add the products to the corresponding components of the complex numbers in the addend vector. Destructively place the results in the corresponding elements of the addend vector. This instruction is unpredicated.

These transformations permit the creation of a variety of multiply-add and multiply-subtract operations on complex numbers by combining two of these instructions with the same vector operands but with rotations that are 90 degrees apart.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMLE": + return { + "tooltip": "Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare signed Less than or Equal to zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMLT": + return { + "tooltip": "Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare signed Less than zero (vector). This instruction reads each vector element in the source SIMD&FP register and if the signed integer value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMN": + return { + "tooltip": "Compare Negative (extended register) adds a register value and a sign or zero-extended register value, followed by an optional left shift amount. The argument that is extended from the register can be a byte, halfword, word, or doubleword. It updates the condition flags based on the result, and discards the result.", + "html": "

Compare Negative (extended register) adds a register value and a sign or zero-extended register value, followed by an optional left shift amount. The argument that is extended from the <Rm> register can be a byte, halfword, word, or doubleword. It updates the condition flags based on the result, and discards the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMN": + return { + "tooltip": "Compare Negative (immediate) adds a register value and an optionally-shifted immediate value. It updates the condition flags based on the result, and discards the result.", + "html": "

Compare Negative (immediate) adds a register value and an optionally-shifted immediate value. It updates the condition flags based on the result, and discards the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMN": + return { + "tooltip": "Compare Negative (shifted register) adds a register value and an optionally-shifted register value. It updates the condition flags based on the result, and discards the result.", + "html": "

Compare Negative (shifted register) adds a register value and an optionally-shifted register value. It updates the condition flags based on the result, and discards the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMP": + return { + "tooltip": "Compare (extended register) subtracts a sign or zero-extended register value, followed by an optional left shift amount, from a register value. The argument that is extended from the register can be a byte, halfword, word, or doubleword. It updates the condition flags based on the result, and discards the result.", + "html": "

Compare (extended register) subtracts a sign or zero-extended register value, followed by an optional left shift amount, from a register value. The argument that is extended from the <Rm> register can be a byte, halfword, word, or doubleword. It updates the condition flags based on the result, and discards the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMP": + return { + "tooltip": "Compare (immediate) subtracts an optionally-shifted immediate value from a register value. It updates the condition flags based on the result, and discards the result.", + "html": "

Compare (immediate) subtracts an optionally-shifted immediate value from a register value. It updates the condition flags based on the result, and discards the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMP": + return { + "tooltip": "Compare (shifted register) subtracts an optionally-shifted register value from a register value. It updates the condition flags based on the result, and discards the result.", + "html": "

Compare (shifted register) subtracts an optionally-shifted register value from a register value. It updates the condition flags based on the result, and discards the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMP": + case "CMPAL": + case "CMPCC": + case "CMPCS": + case "CMPEQ": + case "CMPGE": + case "CMPGT": + case "CMPHI": + case "CMPLE": + case "CMPLS": + case "CMPLT": + case "CMPMI": + case "CMPNE": + case "CMPPL": + case "CMPVC": + case "CMPVS": + return { + "tooltip": "Compare active integer elements in the source vector with an immediate, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Compare active integer elements in the source vector with an immediate, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMP": + case "CMPAL": + case "CMPCC": + case "CMPCS": + case "CMPEQ": + case "CMPGE": + case "CMPGT": + case "CMPHI": + case "CMPLE": + case "CMPLS": + case "CMPLT": + case "CMPMI": + case "CMPNE": + case "CMPPL": + case "CMPVC": + case "CMPVS": + return { + "tooltip": "Compare active integer elements in the first source vector with overlapping 64-bit doubleword elements in the second source vector, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Compare active integer elements in the first source vector with overlapping 64-bit doubleword elements in the second source vector, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMP": + case "CMPAL": + case "CMPCC": + case "CMPCS": + case "CMPEQ": + case "CMPGE": + case "CMPGT": + case "CMPHI": + case "CMPLE": + case "CMPLS": + case "CMPLT": + case "CMPMI": + case "CMPNE": + case "CMPPL": + case "CMPVC": + case "CMPVS": + return { + "tooltip": "Compare active integer elements in the first source vector with corresponding elements in the second source vector, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Compare active integer elements in the first source vector with corresponding elements in the second source vector, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMPLE": + return { + "tooltip": "Compare active signed integer elements in the first source vector being less than or equal to corresponding signed elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Compare active signed integer elements in the first source vector being less than or equal to corresponding signed elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMPLO": + return { + "tooltip": "Compare active unsigned integer elements in the first source vector being lower than corresponding unsigned elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Compare active unsigned integer elements in the first source vector being lower than corresponding unsigned elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMPLS": + return { + "tooltip": "Compare active unsigned integer elements in the first source vector being lower than or same as corresponding unsigned elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Compare active unsigned integer elements in the first source vector being lower than or same as corresponding unsigned elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMPLT": + return { + "tooltip": "Compare active signed integer elements in the first source vector being less than corresponding signed elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Compare active signed integer elements in the first source vector being less than corresponding signed elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMPP": + return { + "tooltip": "Compare with Tag subtracts the 56-bit address held in the second source register from the 56-bit address held in the first source register, updates the condition flags based on the result of the subtraction, and discards the result.", + "html": "

Compare with Tag subtracts the 56-bit address held in the second source register from the 56-bit address held in the first source register, updates the condition flags based on the result of the subtraction, and discards the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CMTST": + return { + "tooltip": "Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Compare bitwise Test bits nonzero (vector). This instruction reads each vector element in the first source SIMD&FP register, performs an AND with the corresponding vector element in the second source SIMD&FP register, and if the result is not zero, sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CNEG": + return { + "tooltip": "Conditional Negate returns, in the destination register, the negated value of the source register if the condition is TRUE, and otherwise returns the value of the source register.", + "html": "

Conditional Negate returns, in the destination register, the negated value of the source register if the condition is TRUE, and otherwise returns the value of the source register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CNOT": + return { + "tooltip": "Logically invert the boolean value in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Logically invert the boolean value in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

Boolean TRUE is any non-zero value in a source, and one in a result element. Boolean FALSE is always zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CNT": + return { + "tooltip": "Count bits counts the number of binary one bits in the value of the source register, and writes the result to the destination register.", + "html": "

Count bits counts the number of binary one bits in the value of the source register, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CNT": + return { + "tooltip": "Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Population Count per byte. This instruction counts the number of bits that have a value of one in each vector element in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CNT": + return { + "tooltip": "Count non-zero bits in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Count non-zero bits in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CNTB": + case "CNTD": + case "CNTH": + case "CNTW": + return { + "tooltip": "Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then places the result in the scalar destination.", + "html": "

Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then places the result in the scalar destination.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CNTP": + return { + "tooltip": "Counts the number of active and true elements in the source predicate and places the scalar result in the destination general-purpose register. Inactive predicate elements are not counted.", + "html": "

Counts the number of active and true elements in the source predicate and places the scalar result in the destination general-purpose register. Inactive predicate elements are not counted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CNTP": + return { + "tooltip": "Counts the number of true elements in the source predicate and places the scalar result in the destination general-purpose register.", + "html": "

Counts the number of true elements in the source predicate and places the scalar result in the destination general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "COMPACT": + return { + "tooltip": "Read the active elements from the source vector and pack them into the lowest-numbered elements of the destination vector. Then set any remaining elements of the destination vector to zero.", + "html": "

Read the active elements from the source vector and pack them into the lowest-numbered elements of the destination vector. Then set any remaining elements of the destination vector to zero.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "COSP": + return { + "tooltip": "Clear Other Speculative Predictions by Context prevents predictions, other than Cache prefetch, Control flow, and Data Value predictions, that predict execution addresses based on information gathered from earlier execution within a particular execution context. Predictions, other than Cache prefetch, Control flow, and Data Value predictions, determined by the actions of code in the target execution context or contexts appearing in program order before the instruction cannot exploitatively control any speculative access occurring after the instruction is complete and synchronized.", + "html": "

Clear Other Speculative Predictions by Context prevents predictions, other than Cache prefetch, Control flow, and Data Value predictions, that predict execution addresses based on information gathered from earlier execution within a particular execution context. Predictions, other than Cache prefetch, Control flow, and Data Value predictions, determined by the actions of code in the target execution context or contexts appearing in program order before the instruction cannot exploitatively control any speculative access occurring after the instruction is complete and synchronized.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPP": + return { + "tooltip": "Cache Prefetch Prediction Restriction by Context prevents cache allocation predictions that predict execution addresses based on information gathered from earlier execution within a particular execution context. The actions of code in the target execution context or contexts appearing in program order before the instruction cannot exploitatively control cache prefetch predictions occurring after the instruction is complete and synchronized.", + "html": "

Cache Prefetch Prediction Restriction by Context prevents cache allocation predictions that predict execution addresses based on information gathered from earlier execution within a particular execution context. The actions of code in the target execution context or contexts appearing in program order before the instruction cannot exploitatively control cache prefetch predictions occurring after the instruction is complete and synchronized.

For more information, see CPP RCTX, Cache Prefetch Prediction Restriction by Context.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPY": + return { + "tooltip": "Copy a signed integer immediate to each active element in the destination vector. Inactive elements in the destination vector register are set to zero.", + "html": "

Copy a signed integer immediate to each active element in the destination vector. Inactive elements in the destination vector register are set to zero.

The immediate operand is a signed value in the range -128 to +127, and for element widths of 16 bits or higher it may also be a signed multiple of 256 in the range -32768 to +32512 (excluding 0).

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<simm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPY": + return { + "tooltip": "Copy a signed integer immediate to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Copy a signed integer immediate to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

The immediate operand is a signed value in the range -128 to +127, and for element widths of 16 bits or higher it may also be a signed multiple of 256 in the range -32768 to +32512 (excluding 0).

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<simm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPY": + return { + "tooltip": "Copy the general-purpose scalar source register to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Copy the general-purpose scalar source register to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPY": + return { + "tooltip": "Copy the SIMD & floating-point scalar source register to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Copy the SIMD & floating-point scalar source register to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFE": + case "CPYFM": + case "CPYFP": + return { + "tooltip": "Memory Copy Forward-only. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFP, then CPYFM, and then CPYFE.", + "html": "

Memory Copy Forward-only. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFP, then CPYFM, and then CPYFE.

CPYFP performs some preconditioning of the arguments suitable for using the CPYFM instruction, and performs an implementation defined amount of the memory copy. CPYFM performs an implementation defined amount of the memory copy. CPYFE performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFEN": + case "CPYFMN": + case "CPYFPN": + return { + "tooltip": "Memory Copy Forward-only, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPN, then CPYFMN, and then CPYFEN.", + "html": "

Memory Copy Forward-only, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPN, then CPYFMN, and then CPYFEN.

CPYFPN performs some preconditioning of the arguments suitable for using the CPYFMN instruction, and performs an implementation defined amount of the memory copy. CPYFMN performs an implementation defined amount of the memory copy. CPYFEN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFERN": + case "CPYFMRN": + case "CPYFPRN": + return { + "tooltip": "Memory Copy Forward-only, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRN, then CPYFMRN, and then CPYFERN.", + "html": "

Memory Copy Forward-only, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRN, then CPYFMRN, and then CPYFERN.

CPYFPRN performs some preconditioning of the arguments suitable for using the CPYFMRN instruction, and performs an implementation defined amount of the memory copy. CPYFMRN performs an implementation defined amount of the memory copy. CPYFERN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFERT": + case "CPYFMRT": + case "CPYFPRT": + return { + "tooltip": "Memory Copy Forward-only, reads unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRT, then CPYFMRT, and then CPYFERT.", + "html": "

Memory Copy Forward-only, reads unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRT, then CPYFMRT, and then CPYFERT.

CPYFPRT performs some preconditioning of the arguments suitable for using the CPYFMRT instruction, and performs an implementation defined amount of the memory copy. CPYFMRT performs an implementation defined amount of the memory copy. CPYFERT performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFERTN": + case "CPYFMRTN": + case "CPYFPRTN": + return { + "tooltip": "Memory Copy Forward-only, reads unprivileged, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRTN, then CPYFMRTN, and then CPYFERTN.", + "html": "

Memory Copy Forward-only, reads unprivileged, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRTN, then CPYFMRTN, and then CPYFERTN.

CPYFPRTN performs some preconditioning of the arguments suitable for using the CPYFMRTN instruction, and performs an implementation defined amount of the memory copy. CPYFMRTN performs an implementation defined amount of the memory copy. CPYFERTN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFERTRN": + case "CPYFMRTRN": + case "CPYFPRTRN": + return { + "tooltip": "Memory Copy Forward-only, reads unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRTRN, then CPYFMRTRN, and then CPYFERTRN.", + "html": "

Memory Copy Forward-only, reads unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRTRN, then CPYFMRTRN, and then CPYFERTRN.

CPYFPRTRN performs some preconditioning of the arguments suitable for using the CPYFMRTRN instruction, and performs an implementation defined amount of the memory copy. CPYFMRTRN performs an implementation defined amount of the memory copy. CPYFERTRN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFERTWN": + case "CPYFMRTWN": + case "CPYFPRTWN": + return { + "tooltip": "Memory Copy Forward-only, reads unprivileged, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRTWN, then CPYFMRTWN, and then CPYFERTWN.", + "html": "

Memory Copy Forward-only, reads unprivileged, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPRTWN, then CPYFMRTWN, and then CPYFERTWN.

CPYFPRTWN performs some preconditioning of the arguments suitable for using the CPYFMRTWN instruction, and performs an implementation defined amount of the memory copy. CPYFMRTWN performs an implementation defined amount of the memory copy. CPYFERTWN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFET": + case "CPYFMT": + case "CPYFPT": + return { + "tooltip": "Memory Copy Forward-only, reads and writes unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPT, then CPYFMT, and then CPYFET.", + "html": "

Memory Copy Forward-only, reads and writes unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPT, then CPYFMT, and then CPYFET.

CPYFPT performs some preconditioning of the arguments suitable for using the CPYFMT instruction, and performs an implementation defined amount of the memory copy. CPYFMT performs an implementation defined amount of the memory copy. CPYFET performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFETN": + case "CPYFMTN": + case "CPYFPTN": + return { + "tooltip": "Memory Copy Forward-only, reads and writes unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPTN, then CPYFMTN, and then CPYFETN.", + "html": "

Memory Copy Forward-only, reads and writes unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPTN, then CPYFMTN, and then CPYFETN.

CPYFPTN performs some preconditioning of the arguments suitable for using the CPYFMTN instruction, and performs an implementation defined amount of the memory copy. CPYFMTN performs an implementation defined amount of the memory copy. CPYFETN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFETRN": + case "CPYFMTRN": + case "CPYFPTRN": + return { + "tooltip": "Memory Copy Forward-only, reads and writes unprivileged, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPTRN, then CPYFMTRN, and then CPYFETRN.", + "html": "

Memory Copy Forward-only, reads and writes unprivileged, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPTRN, then CPYFMTRN, and then CPYFETRN.

CPYFPTRN performs some preconditioning of the arguments suitable for using the CPYFMTRN instruction, and performs an implementation defined amount of the memory copy. CPYFMTRN performs an implementation defined amount of the memory copy. CPYFETRN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFETWN": + case "CPYFMTWN": + case "CPYFPTWN": + return { + "tooltip": "Memory Copy Forward-only, reads and writes unprivileged, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPTWN, then CPYFMTWN, and then CPYFETWN.", + "html": "

Memory Copy Forward-only, reads and writes unprivileged, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPTWN, then CPYFMTWN, and then CPYFETWN.

CPYFPTWN performs some preconditioning of the arguments suitable for using the CPYFMTWN instruction, and performs an implementation defined amount of the memory copy. CPYFMTWN performs an implementation defined amount of the memory copy. CPYFETWN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFEWN": + case "CPYFMWN": + case "CPYFPWN": + return { + "tooltip": "Memory Copy Forward-only, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWN, then CPYFMWN, and then CPYFEWN.", + "html": "

Memory Copy Forward-only, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWN, then CPYFMWN, and then CPYFEWN.

CPYFPWN performs some preconditioning of the arguments suitable for using the CPYFMWN instruction, and performs an implementation defined amount of the memory copy. CPYFMWN performs an implementation defined amount of the memory copy. CPYFEWN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFEWT": + case "CPYFMWT": + case "CPYFPWT": + return { + "tooltip": "Memory Copy Forward-only, writes unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWT, then CPYFMWT, and then CPYFEWT.", + "html": "

Memory Copy Forward-only, writes unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWT, then CPYFMWT, and then CPYFEWT.

CPYFPWT performs some preconditioning of the arguments suitable for using the CPYFMWT instruction, and performs an implementation defined amount of the memory copy. CPYFMWT performs an implementation defined amount of the memory copy. CPYFEWT performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFEWTN": + case "CPYFMWTN": + case "CPYFPWTN": + return { + "tooltip": "Memory Copy Forward-only, writes unprivileged, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWTN, then CPYFMWTN, and then CPYFEWTN.", + "html": "

Memory Copy Forward-only, writes unprivileged, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWTN, then CPYFMWTN, and then CPYFEWTN.

CPYFPWTN performs some preconditioning of the arguments suitable for using the CPYFMWTN instruction, and performs an implementation defined amount of the memory copy. CPYFMWTN performs an implementation defined amount of the memory copy. CPYFEWTN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFEWTRN": + case "CPYFMWTRN": + case "CPYFPWTRN": + return { + "tooltip": "Memory Copy Forward-only, writes unprivileged, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWTRN, then CPYFMWTRN, and then CPYFEWTRN.", + "html": "

Memory Copy Forward-only, writes unprivileged, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWTRN, then CPYFMWTRN, and then CPYFEWTRN.

CPYFPWTRN performs some preconditioning of the arguments suitable for using the CPYFMWTRN instruction, and performs an implementation defined amount of the memory copy. CPYFMWTRN performs an implementation defined amount of the memory copy. CPYFEWTRN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYFEWTWN": + case "CPYFMWTWN": + case "CPYFPWTWN": + return { + "tooltip": "Memory Copy Forward-only, writes unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWTWN, then CPYFMWTWN, and then CPYFEWTWN.", + "html": "

Memory Copy Forward-only, writes unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYFPWTWN, then CPYFMWTWN, and then CPYFEWTWN.

CPYFPWTWN performs some preconditioning of the arguments suitable for using the CPYFMWTWN instruction, and performs an implementation defined amount of the memory copy. CPYFMWTWN performs an implementation defined amount of the memory copy. CPYFEWTWN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

The memory copy performed by these instructions is in the forward direction only, so the instructions are suitable for a memory copy only where there is no overlap between the source and destination locations, or where the source address is greater than the destination address.

The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is implementation defined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYE": + case "CPYM": + case "CPYP": + return { + "tooltip": "Memory Copy. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYP, then CPYM, and then CPYE.", + "html": "

Memory Copy. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYP, then CPYM, and then CPYE.

CPYP performs some preconditioning of the arguments suitable for using the CPYM instruction, and performs an implementation defined amount of the memory copy. CPYM performs an implementation defined amount of the memory copy. CPYE performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYP, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYEN": + case "CPYMN": + case "CPYPN": + return { + "tooltip": "Memory Copy, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPN, then CPYMN, and then CPYEN.", + "html": "

Memory Copy, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPN, then CPYMN, and then CPYEN.

CPYPN performs some preconditioning of the arguments suitable for using the CPYMN instruction, and performs an implementation defined amount of the memory copy. CPYMN performs an implementation defined amount of the memory copy. CPYEN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYERN": + case "CPYMRN": + case "CPYPRN": + return { + "tooltip": "Memory Copy, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRN, then CPYMRN, and then CPYERN.", + "html": "

Memory Copy, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRN, then CPYMRN, and then CPYERN.

CPYPRN performs some preconditioning of the arguments suitable for using the CPYMRN instruction, and performs an implementation defined amount of the memory copy. CPYMRN performs an implementation defined amount of the memory copy. CPYERN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPRN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYERT": + case "CPYMRT": + case "CPYPRT": + return { + "tooltip": "Memory Copy, reads unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRT, then CPYMRT, and then CPYERT.", + "html": "

Memory Copy, reads unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRT, then CPYMRT, and then CPYERT.

CPYPRT performs some preconditioning of the arguments suitable for using the CPYMRT instruction, and performs an implementation defined amount of the memory copy. CPYMRT performs an implementation defined amount of the memory copy. CPYERT performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPRT, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYERTN": + case "CPYMRTN": + case "CPYPRTN": + return { + "tooltip": "Memory Copy, reads unprivileged, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRTN, then CPYMRTN, and then CPYERTN.", + "html": "

Memory Copy, reads unprivileged, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRTN, then CPYMRTN, and then CPYERTN.

CPYPRTN performs some preconditioning of the arguments suitable for using the CPYMRTN instruction, and performs an implementation defined amount of the memory copy. CPYMRTN performs an implementation defined amount of the memory copy. CPYERTN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPRTN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYERTRN": + case "CPYMRTRN": + case "CPYPRTRN": + return { + "tooltip": "Memory Copy, reads unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRTRN, then CPYMRTRN, and then CPYERTRN.", + "html": "

Memory Copy, reads unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRTRN, then CPYMRTRN, and then CPYERTRN.

CPYPRTRN performs some preconditioning of the arguments suitable for using the CPYMRTRN instruction, and performs an implementation defined amount of the memory copy. CPYMRTRN performs an implementation defined amount of the memory copy. CPYERTRN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPRTRN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYERTWN": + case "CPYMRTWN": + case "CPYPRTWN": + return { + "tooltip": "Memory Copy, reads unprivileged, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRTWN, then CPYMRTWN, and then CPYERTWN.", + "html": "

Memory Copy, reads unprivileged, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPRTWN, then CPYMRTWN, and then CPYERTWN.

CPYPRTWN performs some preconditioning of the arguments suitable for using the CPYMRTWN instruction, and performs an implementation defined amount of the memory copy. CPYMRTWN performs an implementation defined amount of the memory copy. CPYERTWN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPRTWN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYET": + case "CPYMT": + case "CPYPT": + return { + "tooltip": "Memory Copy, reads and writes unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPT, then CPYMT, and then CPYET.", + "html": "

Memory Copy, reads and writes unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPT, then CPYMT, and then CPYET.

CPYPT performs some preconditioning of the arguments suitable for using the CPYMT instruction, and performs an implementation defined amount of the memory copy. CPYMT performs an implementation defined amount of the memory copy. CPYET performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPT, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYETN": + case "CPYMTN": + case "CPYPTN": + return { + "tooltip": "Memory Copy, reads and writes unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPTN, then CPYMTN, and then CPYETN.", + "html": "

Memory Copy, reads and writes unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPTN, then CPYMTN, and then CPYETN.

CPYPTN performs some preconditioning of the arguments suitable for using the CPYMTN instruction, and performs an implementation defined amount of the memory copy. CPYMTN performs an implementation defined amount of the memory copy. CPYETN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPTN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYETRN": + case "CPYMTRN": + case "CPYPTRN": + return { + "tooltip": "Memory Copy, reads and writes unprivileged, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPTRN, then CPYMTRN, and then CPYETRN.", + "html": "

Memory Copy, reads and writes unprivileged, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPTRN, then CPYMTRN, and then CPYETRN.

CPYPTRN performs some preconditioning of the arguments suitable for using the CPYMTRN instruction, and performs an implementation defined amount of the memory copy. CPYMTRN performs an implementation defined amount of the memory copy. CPYETRN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPTRN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYETWN": + case "CPYMTWN": + case "CPYPTWN": + return { + "tooltip": "Memory Copy, reads and writes unprivileged, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPTWN, then CPYMTWN, and then CPYETWN.", + "html": "

Memory Copy, reads and writes unprivileged, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPTWN, then CPYMTWN, and then CPYETWN.

CPYPTWN performs some preconditioning of the arguments suitable for using the CPYMTWN instruction, and performs an implementation defined amount of the memory copy. CPYMTWN performs an implementation defined amount of the memory copy. CPYETWN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPTWN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYEWN": + case "CPYMWN": + case "CPYPWN": + return { + "tooltip": "Memory Copy, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWN, then CPYMWN, and then CPYEWN.", + "html": "

Memory Copy, writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWN, then CPYMWN, and then CPYEWN.

CPYPWN performs some preconditioning of the arguments suitable for using the CPYMWN instruction, and performs an implementation defined amount of the memory copy. CPYMWN performs an implementation defined amount of the memory copy. CPYEWN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPWN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYEWT": + case "CPYMWT": + case "CPYPWT": + return { + "tooltip": "Memory Copy, writes unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWT, then CPYMWT, and then CPYEWT.", + "html": "

Memory Copy, writes unprivileged. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWT, then CPYMWT, and then CPYEWT.

CPYPWT performs some preconditioning of the arguments suitable for using the CPYMWT instruction, and performs an implementation defined amount of the memory copy. CPYMWT performs an implementation defined amount of the memory copy. CPYEWT performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPWT, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYEWTN": + case "CPYMWTN": + case "CPYPWTN": + return { + "tooltip": "Memory Copy, writes unprivileged, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWTN, then CPYMWTN, and then CPYEWTN.", + "html": "

Memory Copy, writes unprivileged, reads and writes non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWTN, then CPYMWTN, and then CPYEWTN.

CPYPWTN performs some preconditioning of the arguments suitable for using the CPYMWTN instruction, and performs an implementation defined amount of the memory copy. CPYMWTN performs an implementation defined amount of the memory copy. CPYEWTN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPWTN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYEWTRN": + case "CPYMWTRN": + case "CPYPWTRN": + return { + "tooltip": "Memory Copy, writes unprivileged, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWTRN, then CPYMWTRN, and then CPYEWTRN.", + "html": "

Memory Copy, writes unprivileged, reads non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWTRN, then CPYMWTRN, and then CPYEWTRN.

CPYPWTRN performs some preconditioning of the arguments suitable for using the CPYMWTRN instruction, and performs an implementation defined amount of the memory copy. CPYMWTRN performs an implementation defined amount of the memory copy. CPYEWTRN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPWTRN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CPYEWTWN": + case "CPYMWTWN": + case "CPYPWTWN": + return { + "tooltip": "Memory Copy, writes unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWTWN, then CPYMWTWN, and then CPYEWTWN.", + "html": "

Memory Copy, writes unprivileged and non-temporal. These instructions perform a memory copy. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPWTWN, then CPYMWTWN, and then CPYEWTWN.

CPYPWTWN performs some preconditioning of the arguments suitable for using the CPYMWTWN instruction, and performs an implementation defined amount of the memory copy. CPYMWTWN performs an implementation defined amount of the memory copy. CPYEWTWN performs the last part of the memory copy.

The inclusion of implementation defined amounts of memory copy allows some optimization of the size that can be performed.

For CPYPWTWN, the following saturation logic is applied:

If Xn<63:55> != 000000000, the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CRC32B": + case "CRC32H": + case "CRC32W": + case "CRC32X": + return { + "tooltip": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.", + "html": "

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x04C11DB7 is used for the CRC calculation.

In an Armv8.0 implementation, this is an optional instruction. From Armv8.1, it is mandatory for all implementations to implement this instruction.

ID_AA64ISAR0_EL1.CRC32 indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CRC32CB": + case "CRC32CH": + case "CRC32CW": + case "CRC32CX": + return { + "tooltip": "CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.", + "html": "

CRC32 checksum performs a cyclic redundancy check (CRC) calculation on a value held in a general-purpose register. It takes an input CRC value in the first source operand, performs a CRC on the input value in the second source operand, and returns the output CRC value. The second source operand can be 8, 16, 32, or 64 bits. To align with common usage, the bit order of the values is reversed as part of the operation, and the polynomial 0x1EDC6F41 is used for the CRC calculation.

In an Armv8.0 implementation, this is an optional instruction. From Armv8.1, it is mandatory for all implementations to implement this instruction.

ID_AA64ISAR0_EL1.CRC32 indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CSDB": + return { + "tooltip": "Consumption of Speculative Data Barrier is a memory barrier that controls speculative execution and data value prediction.", + "html": "

Consumption of Speculative Data Barrier is a memory barrier that controls speculative execution and data value prediction.

No instruction other than branch instructions appearing in program order after the CSDB can be speculatively executed using the results of any:

For purposes of the definition of CSDB, PSTATE.{N,Z,C,V} is not considered a data value. This definition permits:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CSEL": + return { + "tooltip": "If the condition is true, Conditional Select writes the value of the first source register to the destination register. If the condition is false, it writes the value of the second source register to the destination register.", + "html": "

If the condition is true, Conditional Select writes the value of the first source register to the destination register. If the condition is false, it writes the value of the second source register to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CSET": + return { + "tooltip": "Conditional Set sets the destination register to 1 if the condition is TRUE, and otherwise sets it to 0.", + "html": "

Conditional Set sets the destination register to 1 if the condition is TRUE, and otherwise sets it to 0.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CSETM": + return { + "tooltip": "Conditional Set Mask sets all bits of the destination register to 1 if the condition is TRUE, and otherwise sets all bits to 0.", + "html": "

Conditional Set Mask sets all bits of the destination register to 1 if the condition is TRUE, and otherwise sets all bits to 0.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CSINC": + return { + "tooltip": "Conditional Select Increment returns, in the destination register, the value of the first source register if the condition is TRUE, and otherwise returns the value of the second source register incremented by 1.", + "html": "

Conditional Select Increment returns, in the destination register, the value of the first source register if the condition is TRUE, and otherwise returns the value of the second source register incremented by 1.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CSINV": + return { + "tooltip": "Conditional Select Invert returns, in the destination register, the value of the first source register if the condition is TRUE, and otherwise returns the bitwise inversion value of the second source register.", + "html": "

Conditional Select Invert returns, in the destination register, the value of the first source register if the condition is TRUE, and otherwise returns the bitwise inversion value of the second source register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CSNEG": + return { + "tooltip": "Conditional Select Negation returns, in the destination register, the value of the first source register if the condition is TRUE, and otherwise returns the negated value of the second source register.", + "html": "

Conditional Select Negation returns, in the destination register, the value of the first source register if the condition is TRUE, and otherwise returns the negated value of the second source register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CTERMEQ": + case "CTERMNE": + return { + "tooltip": "Detect termination conditions in serialized vector loops. Tests whether the comparison between the scalar source operands holds true and if not tests the state of the !Last condition flag (C) which indicates whether the previous flag-setting predicate instruction selected the last element of the vector partition.", + "html": "

Detect termination conditions in serialized vector loops. Tests whether the comparison between the scalar source operands holds true and if not tests the state of the !Last condition flag (C) which indicates whether the previous flag-setting predicate instruction selected the last element of the vector partition.

The Z and C condition flags are preserved by this instruction. The N and V condition flags are set as a pair to generate one of the following conditions for a subsequent conditional instruction:

The scalar source operands are 32-bit or 64-bit general-purpose registers of the same size.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "CTZ": + return { + "tooltip": "Count Trailing Zeros counts the number of consecutive binary zero bits, starting from the least significant bit in the source register, and places the count in the destination register.", + "html": "

Count Trailing Zeros counts the number of consecutive binary zero bits, starting from the least significant bit in the source register, and places the count in the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DC": + return { + "tooltip": "Data Cache operation. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.", + "html": "

Data Cache operation. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DCPS1": + return { + "tooltip": "Debug Change PE State to EL1, when executed in Debug state", + "html": "

Debug Change PE State to EL1, when executed in Debug state:

The target exception level of a DCPS1 instruction is:

When the target Exception level of a DCPS1 instruction is ELx, on executing this instruction:

This instruction is undefined at EL0 in Non-secure state if EL2 is implemented and HCR_EL2.TGE == 1.

This instruction is always undefined in Non-debug state.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DCPS2": + return { + "tooltip": "Debug Change PE State to EL2, when executed in Debug state", + "html": "

Debug Change PE State to EL2, when executed in Debug state:

The target exception level of a DCPS2 instruction is:

When the target Exception level of a DCPS2 instruction is ELx, on executing this instruction:

This instruction is undefined at the following exception levels:

This instruction is always undefined in Non-debug state.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DCPS3": + return { + "tooltip": "Debug Change PE State to EL3, when executed in Debug state", + "html": "

Debug Change PE State to EL3, when executed in Debug state:

The target exception level of a DCPS3 instruction is EL3.

On executing a DCPS3 instruction:

This instruction is undefined at all exception levels if either:

This instruction is always undefined in Non-debug state.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DECB": + case "DECD": + case "DECH": + case "DECW": + return { + "tooltip": "Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination.", + "html": "

Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DECD": + case "DECH": + case "DECW": + return { + "tooltip": "Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements.", + "html": "

Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DECP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to decrement the scalar destination.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to decrement the scalar destination.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DECP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to decrement all destination vector elements.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to decrement all destination vector elements.

The predicate size specifier may be omitted in assembler source code, but this is deprecated and will be prohibited in a future release of the architecture.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DGH": + return { + "tooltip": "Data Gathering Hint is a hint instruction that indicates that it is not expected to be performance optimal to merge memory accesses with Normal Non-cacheable or Device-GRE attributes appearing in program order before the hint instruction with any memory accesses appearing after the hint instruction into a single memory transaction on an interconnect.", + "html": "

Data Gathering Hint is a hint instruction that indicates that it is not expected to be performance optimal to merge memory accesses with Normal Non-cacheable or Device-GRE attributes appearing in program order before the hint instruction with any memory accesses appearing after the hint instruction into a single memory transaction on an interconnect.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DMB": + return { + "tooltip": "Data Memory Barrier is a memory barrier that ensures the ordering of observations of memory accesses, see Data Memory Barrier.", + "html": "

Data Memory Barrier is a memory barrier that ensures the ordering of observations of memory accesses, see Data Memory Barrier.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DSB": + return { + "tooltip": "Data Synchronization Barrier is a memory barrier that ensures the completion of memory accesses, see Data Synchronization Barrier.", + "html": "

Data Synchronization Barrier is a memory barrier that ensures the completion of memory accesses, see Data Synchronization Barrier.

A DSB instruction with the nXS qualifier is complete when the subset of these memory accesses with the XS attribute set to 0 are complete. It does not require that memory accesses with the XS attribute set to 1 are complete.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DUP": + return { + "tooltip": "Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.", + "html": "

Duplicate vector element to vector or scalar. This instruction duplicates the vector element at the specified element index in the source SIMD&FP register into a scalar or each element in a vector, and writes the result to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DUP": + return { + "tooltip": "Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.", + "html": "

Duplicate general-purpose register to vector. This instruction duplicates the contents of the source general-purpose register into a scalar or each element in a vector, and writes the result to the SIMD&FP destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DUP": + return { + "tooltip": "Unconditionally broadcast the signed integer immediate into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the signed integer immediate into each element of the destination vector. This instruction is unpredicated.

The immediate operand is a signed value in the range -128 to +127, and for element widths of 16 bits or higher it may also be a signed multiple of 256 in the range -32768 to +32512 (excluding 0).

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<simm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DUP": + return { + "tooltip": "Unconditionally broadcast the general-purpose scalar source register into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the general-purpose scalar source register into each element of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DUP": + return { + "tooltip": "Unconditionally broadcast the indexed source vector element into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the indexed source vector element into each element of the destination vector. This instruction is unpredicated.

The immediate element index is in the range of 0 to 63 (bytes), 31 (halfwords), 15 (words), 7 (doublewords) or 3 (quadwords). Selecting an element beyond the accessible vector length causes the destination vector to be set to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DUPM": + return { + "tooltip": "Unconditionally broadcast the logical bitmask immediate into each element of the destination vector. This instruction is unpredicated. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits.", + "html": "

Unconditionally broadcast the logical bitmask immediate into each element of the destination vector. This instruction is unpredicated. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DUPQ": + return { + "tooltip": "Unconditionally broadcast the indexed element within each 128-bit source vector segment to all elements of the corresponding destination vector segment. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the indexed element within each 128-bit source vector segment to all elements of the corresponding destination vector segment. This instruction is unpredicated.

The immediate element index is in the range of 0 to 15 (bytes), 7 (halfwords), 3 (words) or 1 (doublewords).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "DVP": + return { + "tooltip": "Data Value Prediction Restriction by Context prevents data value predictions that predict execution addresses based on information gathered from earlier execution within a particular execution context. Data value predictions determined by the actions of code in the target execution context or contexts appearing in program order before the instruction cannot be used to exploitatively control speculative execution occurring after the instruction is complete and synchronized.", + "html": "

Data Value Prediction Restriction by Context prevents data value predictions that predict execution addresses based on information gathered from earlier execution within a particular execution context. Data value predictions determined by the actions of code in the target execution context or contexts appearing in program order before the instruction cannot be used to exploitatively control speculative execution occurring after the instruction is complete and synchronized.

For more information, see DVP RCTX, Data Value Prediction Restriction by Context.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EON": + return { + "tooltip": "Bitwise Exclusive-OR NOT (shifted register) performs a bitwise exclusive-OR NOT of a register value and an optionally-shifted register value, and writes the result to the destination register.", + "html": "

Bitwise Exclusive-OR NOT (shifted register) performs a bitwise exclusive-OR NOT of a register value and an optionally-shifted register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EON": + return { + "tooltip": "Bitwise exclusive OR an inverted immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.", + "html": "

Bitwise exclusive OR an inverted immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR3": + return { + "tooltip": "Three-way Exclusive-OR performs a three-way exclusive-OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.", + "html": "

Three-way Exclusive-OR performs a three-way exclusive-OR of the values in the three source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SHA3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR3": + return { + "tooltip": "Bitwise exclusive OR the corresponding elements of all three source vectors, and destructively place the results in the corresponding elements of the destination and first source vector. This instruction is unpredicated.", + "html": "

Bitwise exclusive OR the corresponding elements of all three source vectors, and destructively place the results in the corresponding elements of the destination and first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR": + return { + "tooltip": "Bitwise Exclusive-OR (vector). This instruction performs a bitwise exclusive-OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.", + "html": "

Bitwise Exclusive-OR (vector). This instruction performs a bitwise exclusive-OR operation between the two source SIMD&FP registers, and places the result in the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR": + return { + "tooltip": "Bitwise Exclusive-OR (immediate) performs a bitwise exclusive-OR of a register value and an immediate value, and writes the result to the destination register.", + "html": "

Bitwise Exclusive-OR (immediate) performs a bitwise exclusive-OR of a register value and an immediate value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR": + return { + "tooltip": "Bitwise Exclusive-OR (shifted register) performs a bitwise exclusive-OR of a register value and an optionally-shifted register value, and writes the result to the destination register.", + "html": "

Bitwise Exclusive-OR (shifted register) performs a bitwise exclusive-OR of a register value and an optionally-shifted register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR": + return { + "tooltip": "Bitwise exclusive OR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Bitwise exclusive OR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR": + return { + "tooltip": "Bitwise exclusive OR active elements of the second source vector with corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Bitwise exclusive OR active elements of the second source vector with corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR": + return { + "tooltip": "Bitwise exclusive OR an immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.", + "html": "

Bitwise exclusive OR an immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EOR": + return { + "tooltip": "Bitwise exclusive OR all elements of the second source vector with corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Bitwise exclusive OR all elements of the second source vector with corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EORBT": + return { + "tooltip": "Interleaving exclusive OR between the even-numbered elements of the first source vector register and the odd-numbered elements of the second source vector register, placing the result in the even-numbered elements of the destination vector, leaving the odd-numbered elements unchanged. This instruction is unpredicated.", + "html": "

Interleaving exclusive OR between the even-numbered elements of the first source vector register and the odd-numbered elements of the second source vector register, placing the result in the even-numbered elements of the destination vector, leaving the odd-numbered elements unchanged. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EORQV": + return { + "tooltip": "Bitwise exclusive OR of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as all zeros.", + "html": "

Bitwise exclusive OR of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as all zeros.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EORS": + return { + "tooltip": "Bitwise exclusive OR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Bitwise exclusive OR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EORTB": + return { + "tooltip": "Interleaving exclusive OR between the odd-numbered elements of the first source vector register and the even-numbered elements of the second source vector register, placing the result in the odd-numbered elements of the destination vector, leaving the even-numbered elements unchanged. This instruction is unpredicated.", + "html": "

Interleaving exclusive OR between the odd-numbered elements of the first source vector register and the even-numbered elements of the second source vector register, placing the result in the odd-numbered elements of the destination vector, leaving the even-numbered elements unchanged. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EORV": + return { + "tooltip": "Bitwise exclusive OR horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as zero.", + "html": "

Bitwise exclusive OR horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ERET": + return { + "tooltip": "Exception Return using the ELR and SPSR for the current Exception level. When executed, the PE restores PSTATE from the SPSR, and branches to the address held in the ELR.", + "html": "

Exception Return using the ELR and SPSR for the current Exception level. When executed, the PE restores PSTATE from the SPSR, and branches to the address held in the ELR.

The PE checks the SPSR for the current Exception level for an illegal return event. See Illegal return events from AArch64 state.

ERET is undefined at EL0.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ERETAA": + case "ERETAB": + return { + "tooltip": "Exception Return, with pointer authentication. This instruction authenticates the address in ELR, using SP as the modifier and the specified key, the PE restores PSTATE from the SPSR for the current Exception level, and branches to the authenticated address.", + "html": "

Exception Return, with pointer authentication. This instruction authenticates the address in ELR, using SP as the modifier and the specified key, the PE restores PSTATE from the SPSR for the current Exception level, and branches to the authenticated address.

Key A is used for ERETAA. Key B is used for ERETAB.

If the authentication passes, the PE continues execution at the target of the branch. For information on behavior if the authentication fails, see Faulting on pointer authentication.

The authenticated address is not written back to ELR.

The PE checks the SPSR for the current Exception level for an illegal return event. See Illegal return events from AArch64 state.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ESB": + return { + "tooltip": "Error Synchronization Barrier is an error synchronization event that might also update DISR_EL1 and VDISR_EL2.", + "html": "

Error Synchronization Barrier is an error synchronization event that might also update DISR_EL1 and VDISR_EL2.

This instruction can be used at all Exception levels and in Debug state.

In Debug state, this instruction behaves as if SError interrupts are masked at all Exception levels. See Error Synchronization Barrier in the Arm(R) Reliability, Availability, and Serviceability (RAS) Specification, Armv8, for Armv8-A architecture profile.

If the RAS Extension is not implemented, this instruction executes as a NOP.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EXT": + return { + "tooltip": "Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.", + "html": "

Extract vector from pair of vectors. This instruction extracts the lowest vector elements from the second source SIMD&FP register and the highest vector elements from the first source SIMD&FP register, concatenates the results into a vector, and writes the vector to the destination SIMD&FP register vector. The index value specifies the lowest vector element to extract from the first source register, and consecutive elements are extracted from the first, then second, source registers until the destination vector is filled.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EXT": + return { + "tooltip": "Copy the indexed byte up to the last byte of the first source vector to the bottom of the result vector, then fill the remainder of the result starting from the first byte of the second source vector. The result is placed destructively in the destination and first source vector, or constructively in the destination vector. This instruction is unpredicated.", + "html": "

Copy the indexed byte up to the last byte of the first source vector to the bottom of the result vector, then fill the remainder of the result starting from the first byte of the second source vector. The result is placed destructively in the destination and first source vector, or constructively in the destination vector. This instruction is unpredicated.

An index that is greater than or equal to the vector length in bytes is treated as zero, resulting in the first source vector being copied to the result unchanged.

The Destructive encoding of this instruction might be immediately preceded in program order by a MOVPRFX instruction. The MOVPRFX instruction must conform to all of the following requirements, otherwise the behavior of the MOVPRFX and this instruction is UNPREDICTABLE: The MOVPRFX instruction must be unpredicated. The MOVPRFX instruction must specify the same destination register as this instruction. The destination register must not refer to architectural register state referenced by any other source operand register of this instruction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EXTQ": + return { + "tooltip": "For each 128-bit vector segment of the result, copy the indexed byte up to and including the last byte of the corresponding first source vector segment to the bottom of the result segment, then fill the remainder of the result segment starting from the first byte of the corresponding second source vector segment. The result segments are destructively placed in the corresponding first source vector segment. This instruction is unpredicated.", + "html": "

For each 128-bit vector segment of the result, copy the indexed byte up to and including the last byte of the corresponding first source vector segment to the bottom of the result segment, then fill the remainder of the result segment starting from the first byte of the corresponding second source vector segment. The result segments are destructively placed in the corresponding first source vector segment. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "EXTR": + return { + "tooltip": "Extract register extracts a register from a pair of registers.", + "html": "

Extract register extracts a register from a pair of registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FABD": + return { + "tooltip": "Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Absolute Difference (vector). This instruction subtracts the floating-point values in the elements of the second source SIMD&FP register, from the corresponding floating-point values in the elements of the first source SIMD&FP register, places the absolute value of each result in a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FABD": + return { + "tooltip": "Compute the absolute difference of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the result in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Compute the absolute difference of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the result in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FABS": + return { + "tooltip": "Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Absolute value (vector). This instruction calculates the absolute value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FABS": + return { + "tooltip": "Floating-point Absolute value (scalar). This instruction calculates the absolute value in the SIMD&FP source register and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Absolute value (scalar). This instruction calculates the absolute value in the SIMD&FP source register and writes the result to the SIMD&FP destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FABS": + return { + "tooltip": "Take the absolute value of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. This clears the sign bit and cannot signal a floating-point exception. Inactive elements in the destination vector register remain unmodified.", + "html": "

Take the absolute value of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. This clears the sign bit and cannot signal a floating-point exception. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FACGE": + return { + "tooltip": "Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Absolute Compare Greater than or Equal (vector). This instruction compares the absolute value of each floating-point value in the first source SIMD&FP register with the absolute value of the corresponding floating-point value in the second source SIMD&FP register and if the first value is greater than or equal to the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FAC": + case "FACAL": + case "FACCC": + case "FACCS": + case "FACEQ": + case "FACGE": + case "FACGT": + case "FACHI": + case "FACLE": + case "FACLS": + case "FACLT": + case "FACMI": + case "FACNE": + case "FACPL": + case "FACVC": + case "FACVS": + return { + "tooltip": "Compare active absolute values of floating-point elements in the first source vector with corresponding absolute values of elements in the second source vector, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Compare active absolute values of floating-point elements in the first source vector with corresponding absolute values of elements in the second source vector, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FACGT": + return { + "tooltip": "Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Absolute Compare Greater than (vector). This instruction compares the absolute value of each vector element in the first source SIMD&FP register with the absolute value of the corresponding vector element in the second source SIMD&FP register and if the first value is greater than the second value sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FACLE": + return { + "tooltip": "Compare active absolute values of floating-point elements in the first source vector being less than or equal to corresponding absolute values of elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Compare active absolute values of floating-point elements in the first source vector being less than or equal to corresponding absolute values of elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FACLT": + return { + "tooltip": "Compare active absolute values of floating-point elements in the first source vector being less than corresponding absolute values of elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Compare active absolute values of floating-point elements in the first source vector being less than corresponding absolute values of elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADD": + return { + "tooltip": "Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Add (vector). This instruction adds corresponding vector elements in the two source SIMD&FP registers, writes the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADD": + return { + "tooltip": "Floating-point Add (scalar). This instruction adds the floating-point values of the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point Add (scalar). This instruction adds the floating-point values of the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADD": + return { + "tooltip": "Add an immediate to each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.5 or +1.0 only. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add an immediate to each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.5 or +1.0 only. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADD": + return { + "tooltip": "Add active floating-point elements of the second source vector to corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active floating-point elements of the second source vector to corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADD": + return { + "tooltip": "Add all floating-point elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Add all floating-point elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADD": + return { + "tooltip": "Destructively add all elements of the two or four source vectors to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Destructively add all elements of the two or four source vectors to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.F64F64 indicates whether the double-precision variant is implemented, and ID_AA64SMFR0_EL1.F16F16 indicates whether the half-precision variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADDA": + return { + "tooltip": "Floating-point add a SIMD&FP scalar source and all active lanes of the vector source and place the result destructively in the SIMD&FP scalar source register. Vector elements are processed strictly in order from low to high, with the scalar source providing the initial value. Inactive elements in the source vector are ignored.", + "html": "

Floating-point add a SIMD&FP scalar source and all active lanes of the vector source and place the result destructively in the SIMD&FP scalar source register. Vector elements are processed strictly in order from low to high, with the scalar source providing the initial value. Inactive elements in the source vector are ignored.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADDP": + return { + "tooltip": "Floating-point Add Pair of elements (scalar). This instruction adds two floating-point vector elements in the source SIMD&FP register and writes the scalar result into the destination SIMD&FP register.", + "html": "

Floating-point Add Pair of elements (scalar). This instruction adds two floating-point vector elements in the source SIMD&FP register and writes the scalar result into the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADDP": + return { + "tooltip": "Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Add Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, adds each pair of values together, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADDP": + return { + "tooltip": "Add pairs of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Add pairs of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADDQV": + return { + "tooltip": "Floating-point addition of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as +0.0.", + "html": "

Floating-point addition of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as +0.0.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FADDV": + return { + "tooltip": "Floating-point add horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as +0.0.", + "html": "

Floating-point add horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as +0.0.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCADD": + return { + "tooltip": "Floating-point Complex Add.", + "html": "

Floating-point Complex Add.

This instruction operates on complex numbers that are represented in SIMD&FP registers as pairs of elements, with the more significant element holding the imaginary part of the number and the less significant element holding the real part of the number. Each element holds a floating-point value. It performs the following computation on the corresponding complex number element pairs from the two source registers:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCADD": + return { + "tooltip": "Add the real and imaginary components of the active floating-point complex numbers from the first source vector to the complex numbers from the second source vector which have first been rotated by 90 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation, equivalent to multiplying the complex numbers in the second source vector by \u00b1j beforehand. Destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add the real and imaginary components of the active floating-point complex numbers from the first source vector to the complex numbers from the second source vector which have first been rotated by 90 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation, equivalent to multiplying the complex numbers in the second source vector by \u00b1j beforehand. Destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCCMP": + return { + "tooltip": "Floating-point Conditional quiet Compare (scalar). This instruction compares the two SIMD&FP source register values and writes the result to the PSTATE.{N, Z, C, V} flags. If the condition does not pass then the PSTATE.{N, Z, C, V} flags are set to the flag bit specifier.", + "html": "

Floating-point Conditional quiet Compare (scalar). This instruction compares the two SIMD&FP source register values and writes the result to the PSTATE.{N, Z, C, V} flags. If the condition does not pass then the PSTATE.{N, Z, C, V} flags are set to the flag bit specifier.

This instruction raises an Invalid Operation floating-point exception if either or both of the operands is a signaling NaN.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCCMPE": + return { + "tooltip": "Floating-point Conditional signaling Compare (scalar). This instruction compares the two SIMD&FP source register values and writes the result to the PSTATE.{N, Z, C, V} flags. If the condition does not pass then the PSTATE.{N, Z, C, V} flags are set to the flag bit specifier.", + "html": "

Floating-point Conditional signaling Compare (scalar). This instruction compares the two SIMD&FP source register values and writes the result to the PSTATE.{N, Z, C, V} flags. If the condition does not pass then the PSTATE.{N, Z, C, V} flags are set to the flag bit specifier.

This instruction raises an Invalid Operation floating-point exception if either or both of the operands is any type of NaN.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCLAMP": + return { + "tooltip": "Clamp each floating-point element in the two or four destination vectors to between the floating-point minimum value in the corresponding element of the first source vector and the floating-point maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the two or four destination vectors.", + "html": "

Clamp each floating-point element in the two or four destination vectors to between the floating-point minimum value in the corresponding element of the first source vector and the floating-point maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the two or four destination vectors.

Regardless of the value of FPCR.AH, the behavior is as follows for each mininum number and maximum number operation:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCLAMP": + return { + "tooltip": "Clamp each floating-point element in the destination vector to between the floating-point minimum value in the corresponding element of the first source vector and the floating-point maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the destination vector.", + "html": "

Clamp each floating-point element in the destination vector to between the floating-point minimum value in the corresponding element of the first source vector and the floating-point maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the destination vector.

Regardless of the value of FPCR.AH, the behavior is as follows for each mininum number and maximum number operation:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMEQ": + return { + "tooltip": "Floating-point Compare Equal (vector). This instruction compares each floating-point value from the first source SIMD&FP register, with the corresponding floating-point value from the second source SIMD&FP register, and if the comparison is equal sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Compare Equal (vector). This instruction compares each floating-point value from the first source SIMD&FP register, with the corresponding floating-point value from the second source SIMD&FP register, and if the comparison is equal sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMEQ": + return { + "tooltip": "Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Compare Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCM": + case "FCMAL": + case "FCMCC": + case "FCMCS": + case "FCMEQ": + case "FCMGE": + case "FCMGT": + case "FCMHI": + case "FCMLE": + case "FCMLS": + case "FCMLT": + case "FCMMI": + case "FCMNE": + case "FCMPL": + case "FCMVC": + case "FCMVS": + return { + "tooltip": "Compare active floating-point elements in the source vector with zero, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Compare active floating-point elements in the source vector with zero, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCM": + case "FCMAL": + case "FCMCC": + case "FCMCS": + case "FCMEQ": + case "FCMGE": + case "FCMGT": + case "FCMHI": + case "FCMLE": + case "FCMLS": + case "FCMLT": + case "FCMMI": + case "FCMNE": + case "FCMPL": + case "FCMVC": + case "FCMVS": + return { + "tooltip": "Compare active floating-point elements in the first source vector with corresponding elements in the second source vector, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Compare active floating-point elements in the first source vector with corresponding elements in the second source vector, and place the boolean results of the specified comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMGE": + return { + "tooltip": "Floating-point Compare Greater than or Equal (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than or equal to the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Compare Greater than or Equal (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than or equal to the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMGE": + return { + "tooltip": "Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Compare Greater than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMGT": + return { + "tooltip": "Floating-point Compare Greater than (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Compare Greater than (vector). This instruction reads each floating-point value in the first source SIMD&FP register and if the value is greater than the corresponding floating-point value in the second source SIMD&FP register sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMGT": + return { + "tooltip": "Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Compare Greater than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is greater than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMLA": + return { + "tooltip": "Floating-point Complex Multiply Accumulate (by element).", + "html": "

Floating-point Complex Multiply Accumulate (by element).

This instruction operates on complex numbers that are represented in SIMD&FP registers as pairs of elements, with the more significant element holding the imaginary part of the number and the less significant element holding the real part of the number. Each element holds a floating-point value. It performs the following computation on complex numbers from the first source register and the destination register with the specified complex number from the second source register:

The multiplication and addition operations are performed as a fused multiply-add, without any intermediate rounding.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMLA": + return { + "tooltip": "Floating-point Complex Multiply Accumulate.", + "html": "

Floating-point Complex Multiply Accumulate.

This instruction operates on complex numbers that are represented in SIMD&FP registers as pairs of elements, with the more significant element holding the imaginary part of the number and the less significant element holding the real part of the number. Each element holds a floating-point value. It performs the following computation on the corresponding complex number element pairs from the two source registers and the destination register:

The multiplication and addition operations are performed as a fused multiply-add, without any intermediate rounding.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMLA": + return { + "tooltip": "Multiply the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the floating-point complex numbers in the first source vector by the corresponding complex number in the second source vector rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.", + "html": "

Multiply the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the floating-point complex numbers in the first source vector by the corresponding complex number in the second source vector rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.

Then destructively add the products to the corresponding components of the complex numbers in the addend and destination vector, without intermediate rounding.

These transformations permit the creation of a variety of multiply-add and multiply-subtract operations on complex numbers by combining two of these instructions with the same vector operands but with rotations that are 90 degrees apart.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMLA": + return { + "tooltip": "Multiply the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the floating-point complex numbers in each 128-bit segment of the first source vector by the specified complex number in the corresponding the second source vector segment rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.", + "html": "

Multiply the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the floating-point complex numbers in each 128-bit segment of the first source vector by the specified complex number in the corresponding the second source vector segment rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.

Then destructively add the products to the corresponding components of the complex numbers in the addend and destination vector, without intermediate rounding.

These transformations permit the creation of a variety of multiply-add and multiply-subtract operations on complex numbers by combining two of these instructions with the same vector operands but with rotations that are 90 degrees apart.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

The complex numbers within the second source vector are specified using an immediate index which selects the same complex number position within each 128-bit vector segment. The index range is from 0 to one less than the number of complex numbers per 128-bit segment, encoded in 1 to 2 bits depending on the size of the complex number. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMLE": + return { + "tooltip": "Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Compare Less than or Equal to zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than or equal to zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMLE": + return { + "tooltip": "Compare active floating-point elements in the first source vector being less than or equal to corresponding elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Compare active floating-point elements in the first source vector being less than or equal to corresponding elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMLT": + return { + "tooltip": "Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.", + "html": "

Floating-point Compare Less than zero (vector). This instruction reads each floating-point value in the source SIMD&FP register and if the value is less than zero sets every bit of the corresponding vector element in the destination SIMD&FP register to one, otherwise sets every bit of the corresponding vector element in the destination SIMD&FP register to zero.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMLT": + return { + "tooltip": "Compare active floating-point elements in the first source vector being less than corresponding elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Compare active floating-point elements in the first source vector being less than corresponding elements in the second source vector, and place the boolean results of the comparison in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMP": + return { + "tooltip": "Floating-point quiet Compare (scalar). This instruction compares the two SIMD&FP source register values, or the first SIMD&FP source register value and zero. It writes the result to the PSTATE.{N, Z, C, V} flags.", + "html": "

Floating-point quiet Compare (scalar). This instruction compares the two SIMD&FP source register values, or the first SIMD&FP source register value and zero. It writes the result to the PSTATE.{N, Z, C, V} flags.

This instruction raises an Invalid Operation floating-point exception if either or both of the operands is a signaling NaN.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCMPE": + return { + "tooltip": "Floating-point signaling Compare (scalar). This instruction compares the two SIMD&FP source register values, or the first SIMD&FP source register value and zero. It writes the result to the PSTATE.{N, Z, C, V} flags.", + "html": "

Floating-point signaling Compare (scalar). This instruction compares the two SIMD&FP source register values, or the first SIMD&FP source register value and zero. It writes the result to the PSTATE.{N, Z, C, V} flags.

This instruction raises an Invalid Operation floating-point exception if either or both of the operands is any type of NaN.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCPY": + return { + "tooltip": "Copy a floating-point immediate into each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Copy a floating-point immediate into each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCSEL": + return { + "tooltip": "Floating-point Conditional Select (scalar). This instruction allows the SIMD&FP destination register to take the value from either one or the other of two SIMD&FP source registers. If the condition passes, the first SIMD&FP source register value is taken, otherwise the second SIMD&FP source register value is taken.", + "html": "

Floating-point Conditional Select (scalar). This instruction allows the SIMD&FP destination register to take the value from either one or the other of two SIMD&FP source registers. If the condition passes, the first SIMD&FP source register value is taken, otherwise the second SIMD&FP source register value is taken.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVT": + return { + "tooltip": "Floating-point Convert precision (scalar). This instruction converts the floating-point value in the SIMD&FP source register to the precision for the destination register data type using the rounding mode that is determined by the FPCR and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert precision (scalar). This instruction converts the floating-point value in the SIMD&FP source register to the precision for the destination register data type using the rounding mode that is determined by the FPCR and writes the result to the SIMD&FP destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVT": + return { + "tooltip": "Convert to single-precision from half-precision, each element of the source vector, and place the results in the double-width destination elements of the destination vectors.", + "html": "

Convert to single-precision from half-precision, each element of the source vector, and place the results in the double-width destination elements of the destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.F16F16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVT": + return { + "tooltip": "Convert to half-precision from single-precision, each element of the two source vectors, and place the results in the half-width destination elements.", + "html": "

Convert to half-precision from single-precision, each element of the two source vectors, and place the results in the half-width destination elements.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVT": + return { + "tooltip": "Convert the size and precision of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert the size and precision of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

Since the input and result types have a different size the smaller type is held unpacked in the least significant bits of elements of the larger size. When the input is the smaller type the upper bits of each source element are ignored. When the result is the smaller type the results are zero-extended to fill each destination element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTAS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Signed integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to a signed integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTAS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding to nearest with ties to Away (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round to Nearest with Ties to Away rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Signed integer, rounding to nearest with ties to Away (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round to Nearest with Ties to Away rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTAU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (vector). This instruction converts each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest with Ties to Away rounding mode and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTAU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round to Nearest with Ties to Away rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding to nearest with ties to Away (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round to Nearest with Ties to Away rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTL": + case "FCVTL2": + return { + "tooltip": "Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the FPCR, and writes each result to the equivalent element of the vector in the SIMD&FP destination register.", + "html": "

Floating-point Convert to higher precision Long (vector). This instruction reads each element in a vector in the SIMD&FP source register, converts each value to double the precision of the source element using the rounding mode that is determined by the FPCR, and writes each result to the equivalent element of the vector in the SIMD&FP destination register.

Where the operation lengthens a 64-bit vector to a 128-bit vector, the FCVTL2 variant operates on the elements in the top 64 bits of the source register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTL": + return { + "tooltip": "Convert to single-precision from half-precision, each element of the source vector, and place the deinterleaved results in the double-width destination elements of the destination vectors.", + "html": "

Convert to single-precision from half-precision, each element of the source vector, and place the deinterleaved results in the double-width destination elements of the destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.F16F16 indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTLT": + return { + "tooltip": "Convert odd-numbered floating-point elements from the source vector to the next higher precision, and place the results in the active overlapping double-width elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert odd-numbered floating-point elements from the source vector to the next higher precision, and place the results in the active overlapping double-width elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTMS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Signed integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTMS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding toward Minus infinity (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round towards Minus Infinity rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Signed integer, rounding toward Minus infinity (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round towards Minus Infinity rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTMU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding toward Minus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTMU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding toward Minus infinity (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round towards Minus Infinity rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding toward Minus infinity (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round towards Minus Infinity rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTN": + case "FCVTN2": + return { + "tooltip": "Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the FPCR.", + "html": "

Floating-point Convert to lower precision Narrow (vector). This instruction reads each vector element in the SIMD&FP source register, converts each result to half the precision of the source element, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The rounding mode is determined by the FPCR.

The FCVTN instruction writes the vector to the lower half of the destination register and clears the upper half, while the FCVTN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTN": + return { + "tooltip": "Convert to half-precision from single-precision, each element of the two source vectors, and place the two-way interleaved results in the half-width destination elements.", + "html": "

Convert to half-precision from single-precision, each element of the two source vectors, and place the two-way interleaved results in the half-width destination elements.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTNS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Signed integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTNS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding to nearest with ties to even (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round to Nearest rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Signed integer, rounding to nearest with ties to even (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round to Nearest rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTNT": + return { + "tooltip": "Convert active floating-point elements from the source vector to the next lower precision, and place the results in the odd-numbered half-width elements of the destination vector, leaving the even-numbered elements unchanged. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert active floating-point elements from the source vector to the next lower precision, and place the results in the odd-numbered half-width elements of the destination vector, leaving the even-numbered elements unchanged. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTNU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTNU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round to Nearest rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding to nearest with ties to even (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round to Nearest rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTPS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Signed integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTPS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding toward Plus infinity (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round towards Plus Infinity rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Signed integer, rounding toward Plus infinity (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round towards Plus Infinity rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTPU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding toward Plus infinity (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTPU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding toward Plus infinity (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round towards Plus Infinity rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding toward Plus infinity (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round towards Plus Infinity rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTX": + return { + "tooltip": "Convert active double-precision floating-point elements from the source vector to single-precision, rounding to Odd, and place the results in the even-numbered 32-bit elements of the destination vector, while setting the odd-numbered elements to zero. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert active double-precision floating-point elements from the source vector to single-precision, rounding to Odd, and place the results in the even-numbered 32-bit elements of the destination vector, while setting the odd-numbered elements to zero. Inactive elements in the destination vector register remain unmodified.

Rounding to Odd (aka Von Neumann rounding) permits a two-step conversion from double-precision to half-precision without incurring intermediate rounding errors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTXN": + case "FCVTXN2": + return { + "tooltip": "Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Convert to lower precision Narrow, rounding to odd (vector). This instruction reads each vector element in the source SIMD&FP register, narrows each value to half the precision of the source element using the Round to Odd rounding mode, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

This instruction uses the Round to Odd rounding mode which is not defined by the IEEE 754-2008 standard. This rounding mode ensures that if the result of the conversion is inexact the least significant bit of the mantissa is forced to 1. This rounding mode enables a floating-point value to be converted to a lower precision format via an intermediate precision format while avoiding double rounding errors. For example, a 64-bit floating-point value can be converted to a correctly rounded 16-bit floating-point value by first using this instruction to produce a 32-bit value and then using another instruction with the wanted rounding mode to convert the 32-bit value to the final 16-bit floating-point value.

The FCVTXN instruction writes the vector to the lower half of the destination register and clears the upper half, while the FCVTXN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTXNT": + return { + "tooltip": "Convert active double-precision floating-point elements from the source vector to single-precision, rounding to Odd, and place the results in the odd-numbered 32-bit elements of the destination vector, leaving the even-numbered elements unchanged. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert active double-precision floating-point elements from the source vector to single-precision, rounding to Odd, and place the results in the odd-numbered 32-bit elements of the destination vector, leaving the even-numbered elements unchanged. Inactive elements in the destination vector register remain unmodified.

Rounding to Odd (aka Von Neumann rounding) permits a two-step conversion from double-precision to half-precision without incurring intermediate rounding errors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZS": + return { + "tooltip": "Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Signed fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Signed integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to a signed integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZS": + return { + "tooltip": "Floating-point Convert to Signed fixed-point, rounding toward Zero (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Signed fixed-point, rounding toward Zero (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit fixed-point signed integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZS": + return { + "tooltip": "Floating-point Convert to Signed integer, rounding toward Zero (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Signed integer, rounding toward Zero (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit signed integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZS": + return { + "tooltip": "Convert to the signed 32-bit integer nearer to zero from single-precision, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.", + "html": "

Convert to the signed 32-bit integer nearer to zero from single-precision, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZS": + return { + "tooltip": "Convert to the signed integer nearer to zero from each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert to the signed integer nearer to zero from each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

If the input and result types have a different size the smaller type is held unpacked in the least significant bits of elements of the larger size. When the input is the smaller type the upper bits of each source element are ignored. When the result is the smaller type the results are sign-extended to fill each destination element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZU": + return { + "tooltip": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from floating-point to fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding toward Zero (vector). This instruction converts a scalar or each element in a vector from a floating-point value to an unsigned integer value using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZU": + return { + "tooltip": "Floating-point Convert to Unsigned fixed-point, rounding toward Zero (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Unsigned fixed-point, rounding toward Zero (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit fixed-point unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZU": + return { + "tooltip": "Floating-point Convert to Unsigned integer, rounding toward Zero (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.", + "html": "

Floating-point Convert to Unsigned integer, rounding toward Zero (scalar). This instruction converts the floating-point value in the SIMD&FP source register to a 32-bit or 64-bit unsigned integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZU": + return { + "tooltip": "Convert to the unsigned 32-bit integer nearer to zero from single-precision, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.", + "html": "

Convert to the unsigned 32-bit integer nearer to zero from single-precision, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FCVTZU": + return { + "tooltip": "Convert to the unsigned integer nearer to zero from each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert to the unsigned integer nearer to zero from each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

If the input and result types have a different size the smaller type is held unpacked in the least significant bits of elements of the larger size. When the input is the smaller type the upper bits of each source element are ignored. When the result is the smaller type the results are zero-extended to fill each destination element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDIV": + return { + "tooltip": "Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Divide (vector). This instruction divides the floating-point values in the elements in the first source SIMD&FP register, by the floating-point values in the corresponding elements in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDIV": + return { + "tooltip": "Floating-point Divide (scalar). This instruction divides the floating-point value of the first source SIMD&FP register by the floating-point value of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point Divide (scalar). This instruction divides the floating-point value of the first source SIMD&FP register by the floating-point value of the second source SIMD&FP register, and writes the result to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDIV": + return { + "tooltip": "Divide active floating-point elements of the first source vector by corresponding floating-point elements of the second source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Divide active floating-point elements of the first source vector by corresponding floating-point elements of the second source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDIVR": + return { + "tooltip": "Reversed divide active floating-point elements of the second source vector by corresponding floating-point elements of the first source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reversed divide active floating-point elements of the second source vector by corresponding floating-point elements of the first source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDOT": + return { + "tooltip": "This instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in each 32-bit element of the first source and second source vectors, without intermediate rounding, and then destructively adds the single-precision sum-of-products to the corresponding single-precision element of the destination vector.", + "html": "

This instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in each 32-bit element of the first source and second source vectors, without intermediate rounding, and then destructively adds the single-precision sum-of-products to the corresponding single-precision element of the destination vector.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDOT": + return { + "tooltip": "This instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in each 32-bit element of the first source vector and a pair of half-precision floating-point values in an indexed 32-bit element of the second source vector, without intermediate rounding, and then destructively adds the single-precision sum-of-products to the corresponding single-precision element of the destination vector.", + "html": "

This instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in each 32-bit element of the first source vector and a pair of half-precision floating-point values in an indexed 32-bit element of the second source vector, without intermediate rounding, and then destructively adds the single-precision sum-of-products to the corresponding single-precision element of the destination vector.

The half-precision floating-point pairs within the second source vector are specified using an immediate index which selects the same pair position within each 128-bit vector segment. The index range is from 0 to 3.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDOT": + return { + "tooltip": "The instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in the corresponding 32-bit elements of the two or four first source vectors and the indexed 32-bit element of the second source vector, without intermediate rounding. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the ZA single-vector groups.", + "html": "

The instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in the corresponding 32-bit elements of the two or four first source vectors and the indexed 32-bit element of the second source vector, without intermediate rounding. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the ZA single-vector groups.

The half-precision floating-point pairs within the second source vector are specified using an immediate index which selects the same pair position within each 128-bit vector segment. The element index range is from 0 to 3. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDOT": + return { + "tooltip": "The instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in the corresponding 32-bit elements of the two or four first source vectors and the second source vector, without intermediate rounding. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in the corresponding 32-bit elements of the two or four first source vectors and the second source vector, without intermediate rounding. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDOT": + return { + "tooltip": "The instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in the corresponding 32-bit elements of the two or four first and second source vectors, without intermediate rounding. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The instruction computes the fused sum-of-products of a pair of half-precision floating-point values held in the corresponding 32-bit elements of the two or four first and second source vectors, without intermediate rounding. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FDUP": + return { + "tooltip": "Unconditionally broadcast the floating-point immediate into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the floating-point immediate into each element of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FEXPA": + return { + "tooltip": "The FEXPA instruction accelerates the polynomial series calculation of the exp(x) function.", + "html": "

The FEXPA instruction accelerates the polynomial series calculation of the exp(x) function.

The double-precision variant copies the low 52 bits of an entry from a hard-wired table of 64-bit coefficients, indexed by the low 6 bits of each element of the source vector, and prepends to that the next 11 bits of the source element (src<16:6>), setting the sign bit to zero.

The single-precision variant copies the low 23 bits of an entry from hard-wired table of 32-bit coefficients, indexed by the low 6 bits of each element of the source vector, and prepends to that the next 8 bits of the source element (src<13:6>), setting the sign bit to zero.

The half-precision variant copies the low 10 bits of an entry from hard-wired table of 16-bit coefficients, indexed by the low 5 bits of each element of the source vector, and prepends to that the next 5 bits of the source element (src<9:5>), setting the sign bit to zero.

A coefficient table entry with index m holds the floating-point value 2(m/64), or for the half-precision variant 2(m/32). This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FJCVTZS": + return { + "tooltip": "Floating-point Javascript Convert to Signed fixed-point, rounding toward Zero. This instruction converts the double-precision floating-point value in the SIMD&FP source register to a 32-bit signed integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register. If the result is too large to be represented as a signed 32-bit integer, then the result is the integer modulo 232, as held in a 32-bit signed integer.", + "html": "

Floating-point Javascript Convert to Signed fixed-point, rounding toward Zero. This instruction converts the double-precision floating-point value in the SIMD&FP source register to a 32-bit signed integer using the Round towards Zero rounding mode, and writes the result to the general-purpose destination register. If the result is too large to be represented as a signed 32-bit integer, then the result is the integer modulo 232, as held in a 32-bit signed integer.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FLOGB": + return { + "tooltip": "This instruction returns the signed integer base 2 logarithm of each floating-point input element |x| after normalization.", + "html": "

This instruction returns the signed integer base 2 logarithm of each floating-point input element |x| after normalization.

This is the unbiased exponent of x used in the representation of the floating-point value, such that, for positive x, x = significand \u00d7 2exponent.

The integer results are placed in elements of the destination vector which have the same width (esize) as the floating-point input elements:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAD": + return { + "tooltip": "Multiply the corresponding active floating-point elements of the first and second source vectors and add to elements of the third (addend) vector without intermediate rounding. Destructively place the results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active floating-point elements of the first and second source vectors and add to elements of the third (addend) vector without intermediate rounding. Destructively place the results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMADD": + return { + "tooltip": "Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, adds the product to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, adds the product to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAX": + return { + "tooltip": "Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Maximum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the larger of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAX": + return { + "tooltip": "Floating-point Maximum (scalar). This instruction compares the two source SIMD&FP registers, and writes the larger of the two floating-point values to the destination SIMD&FP register.", + "html": "

Floating-point Maximum (scalar). This instruction compares the two source SIMD&FP registers, and writes the larger of the two floating-point values to the destination SIMD&FP register.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAX": + return { + "tooltip": "Determine the maximum of floating-point elements of the second source vector and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the maximum of floating-point elements of the second source vector and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAX": + return { + "tooltip": "Determine the maximum of floating-point elements of the two or four second source vectors and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the maximum of floating-point elements of the two or four second source vectors and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAX": + return { + "tooltip": "Determine the maximum of an immediate and each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.0 or +1.0 only.", + "html": "

Determine the maximum of an immediate and each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.0 or +1.0 only.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAX": + return { + "tooltip": "Determine the maximum of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.", + "html": "

Determine the maximum of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNM": + return { + "tooltip": "Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Maximum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the larger of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNM": + return { + "tooltip": "Floating-point Maximum Number (scalar). This instruction compares the first and second source SIMD&FP register values, and writes the larger of the two floating-point values to the destination SIMD&FP register.", + "html": "

Floating-point Maximum Number (scalar). This instruction compares the first and second source SIMD&FP register values, and writes the larger of the two floating-point values to the destination SIMD&FP register.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNM": + return { + "tooltip": "Determine the maximum number value of floating-point elements of the second source vector and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the maximum number value of floating-point elements of the second source vector and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNM": + return { + "tooltip": "Determine the maximum number value of floating-point elements of the two or four second source vectors and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the maximum number value of floating-point elements of the two or four second source vectors and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNM": + return { + "tooltip": "Determine the maximum number value of an immediate and each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.0 or +1.0 only.", + "html": "

Determine the maximum number value of an immediate and each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.0 or +1.0 only.

Regardless of the value of FPCR.AH, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNM": + return { + "tooltip": "Determine the maximum number value of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.", + "html": "

Determine the maximum number value of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.

Regardless of the value of FPCR.AH, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNMP": + return { + "tooltip": "Floating-point Maximum Number of Pair of elements (scalar). This instruction compares two vector elements in the source SIMD&FP register and writes the largest of the floating-point values as a scalar to the destination SIMD&FP register.", + "html": "

Floating-point Maximum Number of Pair of elements (scalar). This instruction compares two vector elements in the source SIMD&FP register and writes the largest of the floating-point values as a scalar to the destination SIMD&FP register.

Regardless of the value of FPCR.AH, the behavior is as follows for each pairwise operation:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNMP": + return { + "tooltip": "Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Maximum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

Regardless of the value of FPCR.AH, the behavior is as follows for each pairwise operation:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNMP": + return { + "tooltip": "Compute the maximum value of each pair of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Compute the maximum value of each pair of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

Regardless of the value of FPCR.AH, the behavior is as follows for each pairwise operation:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNMQV": + return { + "tooltip": "Floating-point maximum number of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the default NaN.", + "html": "

Floating-point maximum number of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the default NaN.

Regardless of the value of FPCR.AH, the behavior is as follows:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNMV": + return { + "tooltip": "Floating-point Maximum Number across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Maximum Number across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXNMV": + return { + "tooltip": "Floating-point maximum number horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the default NaN.", + "html": "

Floating-point maximum number horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the default NaN.

Regardless of the value of FPCR.AH, the behavior is as follows:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXP": + return { + "tooltip": "Floating-point Maximum of Pair of elements (scalar). This instruction compares two vector elements in the source SIMD&FP register and writes the largest of the floating-point values as a scalar to the destination SIMD&FP register.", + "html": "

Floating-point Maximum of Pair of elements (scalar). This instruction compares two vector elements in the source SIMD&FP register and writes the largest of the floating-point values as a scalar to the destination SIMD&FP register.

When FPCR.AH is 0, the behavior is as follows for each pairwise operation:

When FPCR.AH is 1, the behavior is as follows for each pairwise operation:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXP": + return { + "tooltip": "Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Maximum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the larger of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

When FPCR.AH is 0, the behavior is as follows for each pairwise operation:

When FPCR.AH is 1, the behavior is as follows for each pairwise operation:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXP": + return { + "tooltip": "Compute the maximum value of each pair of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Compute the maximum value of each pair of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

When FPCR.AH is 0, the behavior is as follows for each pairwise operation:

When FPCR.AH is 1, the behavior is as follows for each pairwise operation:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXQV": + return { + "tooltip": "Floating-point maximum of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as -Infinity.", + "html": "

Floating-point maximum of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as -Infinity.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXV": + return { + "tooltip": "Floating-point Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMAXV": + return { + "tooltip": "Floating-point maximum horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as -Infinity.", + "html": "

Floating-point maximum horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as -Infinity.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMIN": + return { + "tooltip": "Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMIN": + return { + "tooltip": "Floating-point Minimum (scalar). This instruction compares the first and second source SIMD&FP register values, and writes the smaller of the two floating-point values to the destination SIMD&FP register.", + "html": "

Floating-point Minimum (scalar). This instruction compares the first and second source SIMD&FP register values, and writes the smaller of the two floating-point values to the destination SIMD&FP register.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMIN": + return { + "tooltip": "Determine the mininum of floating-point elements of the second source vector and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the mininum of floating-point elements of the second source vector and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMIN": + return { + "tooltip": "Determine the mininum of floating-point elements of the two or four second source vectors and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the mininum of floating-point elements of the two or four second source vectors and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMIN": + return { + "tooltip": "Determine the minimum of an immediate and each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.0 or +1.0 only.", + "html": "

Determine the minimum of an immediate and each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.0 or +1.0 only.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMIN": + return { + "tooltip": "Determine the minimum of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.", + "html": "

Determine the minimum of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNM": + return { + "tooltip": "Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Minimum Number (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, writes the smaller of the two floating-point values into a vector, and writes the vector to the destination SIMD&FP register.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNM": + return { + "tooltip": "Floating-point Minimum Number (scalar). This instruction compares the first and second source SIMD&FP register values, and writes the smaller of the two floating-point values to the destination SIMD&FP register.", + "html": "

Floating-point Minimum Number (scalar). This instruction compares the first and second source SIMD&FP register values, and writes the smaller of the two floating-point values to the destination SIMD&FP register.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNM": + return { + "tooltip": "Determine the minimum number value of floating-point elements of the second source vector and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the minimum number value of floating-point elements of the second source vector and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNM": + return { + "tooltip": "Determine the minimum number value of floating-point elements of the two or four second source vectors and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the minimum number value of floating-point elements of the two or four second source vectors and the corresponding floating-point elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNM": + return { + "tooltip": "Determine the minimum number value of an immediate and each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.0 or +1.0 only.", + "html": "

Determine the minimum number value of an immediate and each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.0 or +1.0 only.

Regardless of the value of FPCR.AH, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNM": + return { + "tooltip": "Determine the minimum number value of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.", + "html": "

Determine the minimum number value of active floating-point elements of the second source vector and corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector.

Regardless of the value of FPCR.AH, the behavior is as follows:

Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNMP": + return { + "tooltip": "Floating-point Minimum Number of Pair of elements (scalar). This instruction compares two vector elements in the source SIMD&FP register and writes the smallest of the floating-point values as a scalar to the destination SIMD&FP register.", + "html": "

Floating-point Minimum Number of Pair of elements (scalar). This instruction compares two vector elements in the source SIMD&FP register and writes the smallest of the floating-point values as a scalar to the destination SIMD&FP register.

Regardless of the value of FPCR.AH, the behavior is as follows for each pairwise operation:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNMP": + return { + "tooltip": "Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Minimum Number Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of floating-point values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

Regardless of the value of FPCR.AH, the behavior is as follows for each pairwise operation:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNMP": + return { + "tooltip": "Compute the minimum value of each pair of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Compute the minimum value of each pair of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

Regardless of the value of FPCR.AH, the behavior is as follows for each pairwise operation:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNMQV": + return { + "tooltip": "Floating-point minimum number of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the default NaN.", + "html": "

Floating-point minimum number of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the default NaN.

Regardless of the value of FPCR.AH, the behavior is as follows:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNMV": + return { + "tooltip": "Floating-point Minimum Number across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Minimum Number across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.

Regardless of the value of FPCR.AH, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINNMV": + return { + "tooltip": "Floating-point minimum number horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the default NaN.", + "html": "

Floating-point minimum number horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the default NaN.

Regardless of the value of FPCR.AH, the behavior is as follows:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINP": + return { + "tooltip": "Floating-point Minimum of Pair of elements (scalar). This instruction compares two vector elements in the source SIMD&FP register and writes the smallest of the floating-point values as a scalar to the destination SIMD&FP register.", + "html": "

Floating-point Minimum of Pair of elements (scalar). This instruction compares two vector elements in the source SIMD&FP register and writes the smallest of the floating-point values as a scalar to the destination SIMD&FP register.

When FPCR.AH is 0, the behavior is as follows for each pairwise operation:

When FPCR.AH is 1, the behavior is as follows for each pairwise operation:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINP": + return { + "tooltip": "Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Minimum Pairwise (vector). This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements from the concatenated vector, writes the smaller of each pair of values into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

When FPCR.AH is 0, the behavior is as follows for each pairwise operation:

When FPCR.AH is 1, the behavior is as follows for each pairwise operation:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINP": + return { + "tooltip": "Compute the minimum value of each pair of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Compute the minimum value of each pair of adjacent floating-point elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

When FPCR.AH is 0, the behavior is as follows for each pairwise operation:

When FPCR.AH is 1, the behavior is as follows for each pairwise operation:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINQV": + return { + "tooltip": "Floating-point minimum of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as +Infinity.", + "html": "

Floating-point minimum of the same element numbers from each 128-bit source vector segment using a recursive pairwise reduction, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as +Infinity.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINV": + return { + "tooltip": "Floating-point Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are floating-point values.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMINV": + return { + "tooltip": "Floating-point minimum horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as +Infinity.", + "html": "

Floating-point minimum horizontally over all lanes of a vector using a recursive pairwise reduction, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as +Infinity.

When FPCR.AH is 0, the behavior is as follows:

When FPCR.AH is 1, the behavior is as follows:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLA": + return { + "tooltip": "Floating-point fused Multiply-Add to accumulator (by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and accumulates the results in the vector elements of the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point fused Multiply-Add to accumulator (by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and accumulates the results in the vector elements of the destination SIMD&FP register. All the values in this instruction are floating-point values.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLA": + return { + "tooltip": "Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point fused Multiply-Add to accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, adds the product to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLA": + return { + "tooltip": "Multiply the corresponding active floating-point elements of the first and second source vectors and add to elements of the third source (addend) vector without intermediate rounding. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active floating-point elements of the first and second source vectors and add to elements of the third source (addend) vector without intermediate rounding. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLA": + return { + "tooltip": "Multiply all floating-point elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively added without intermediate rounding to the corresponding elements of the addend and destination vector.", + "html": "

Multiply all floating-point elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively added without intermediate rounding to the corresponding elements of the addend and destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLA": + return { + "tooltip": "Multiply the indexed element of the second source vector by the corresponding floating-point elements of the two or four first source vectors and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups.", + "html": "

Multiply the indexed element of the second source vector by the corresponding floating-point elements of the two or four first source vectors and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 2 bits depending on the size of the element. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLA": + return { + "tooltip": "Multiply the corresponding floating-point elements of the two or four first source vector with corresponding elements of the second source vector and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Multiply the corresponding floating-point elements of the two or four first source vector with corresponding elements of the second source vector and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.F64F64 indicates whether the double-precision variant is implemented, and ID_AA64SMFR0_EL1.F16F16 indicates whether the half-precision variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLA": + return { + "tooltip": "Multiply the corresponding floating-point elements of the two or four first and second source vectors and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Multiply the corresponding floating-point elements of the two or four first and second source vectors and destructively add without intermediate rounding to the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.F64F64 indicates whether the double-precision variant is implemented, and ID_AA64SMFR0_EL1.F16F16 indicates whether the half-precision variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLAL": + case "FMLAL2": + return { + "tooltip": "Floating-point fused Multiply-Add Long to accumulator (by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.", + "html": "

Floating-point fused Multiply-Add Long to accumulator (by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

In Armv8.2 and Armv8.3, this is an optional instruction. From Armv8.4 it is mandatory for all implementations to support it.

ID_AA64ISAR0_EL1.FHM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLAL": + case "FMLAL2": + return { + "tooltip": "Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.", + "html": "

Floating-point fused Multiply-Add Long to accumulator (vector). This instruction multiplies corresponding half-precision floating-point values in the vectors in the two source SIMD&FP registers, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

In Armv8.2 and Armv8.3, this is an optional instruction. From Armv8.4 it is mandatory for all implementations to support it.

ID_AA64ISAR0_EL1.FHM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLAL": + return { + "tooltip": "This half-precision floating-point multiply-add long instruction widens all 16-bit half-precision elements in the one, two, or four first source vectors and the indexed element of the second source vector to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups.", + "html": "

This half-precision floating-point multiply-add long instruction widens all 16-bit half-precision elements in the one, two, or four first source vectors and the indexed element of the second source vector to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups.

The half-precision elements within the second source vector are specified using a 3-bit immediate index which selects the same element position within each 128-bit vector segment.

The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLAL": + return { + "tooltip": "This half-precision floating-point multiply-add long instruction widens all 16-bit half-precision elements in the one, two, or four first source vectors and the second source vector to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This half-precision floating-point multiply-add long instruction widens all 16-bit half-precision elements in the one, two, or four first source vectors and the second source vector to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLAL": + return { + "tooltip": "This half-precision floating-point multiply-add long instruction widens all 16-bit half-precision elements in the two or four first and second source vectors to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This half-precision floating-point multiply-add long instruction widens all 16-bit half-precision elements in the two or four first and second source vectors to single-precision format, then multiplies the corresponding elements and destructively adds these values without intermediate rounding to the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLALB": + return { + "tooltip": "This half-precision floating-point multiply-add long instruction widens the even-numbered half-precision elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the source vectors. This instruction is unpredicated.", + "html": "

This half-precision floating-point multiply-add long instruction widens the even-numbered half-precision elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the source vectors. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLALB": + return { + "tooltip": "This half-precision floating-point multiply-add long instruction widens the even-numbered half-precision elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the first source vector. This instruction is unpredicated.", + "html": "

This half-precision floating-point multiply-add long instruction widens the even-numbered half-precision elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLALT": + return { + "tooltip": "This half-precision floating-point multiply-add long instruction widens the odd-numbered half-precision elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the source vectors. This instruction is unpredicated.", + "html": "

This half-precision floating-point multiply-add long instruction widens the odd-numbered half-precision elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the source vectors. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLALT": + return { + "tooltip": "This half-precision floating-point multiply-add long instruction widens the odd-numbered half-precision elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the first source vector. This instruction is unpredicated.", + "html": "

This half-precision floating-point multiply-add long instruction widens the odd-numbered half-precision elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and adds these values without intermediate rounding to the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLS": + return { + "tooltip": "Floating-point fused Multiply-Subtract from accumulator (by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and subtracts the results from the vector elements of the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point fused Multiply-Subtract from accumulator (by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and subtracts the results from the vector elements of the destination SIMD&FP register. All the values in this instruction are floating-point values.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLS": + return { + "tooltip": "Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point fused Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, negates the product, adds the result to the corresponding vector element of the destination SIMD&FP register, and writes the result to the destination SIMD&FP register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLS": + return { + "tooltip": "Multiply the corresponding active floating-point elements of the first and second source vectors and subtract from elements of the third source (addend) vector without intermediate rounding. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active floating-point elements of the first and second source vectors and subtract from elements of the third source (addend) vector without intermediate rounding. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLS": + return { + "tooltip": "Multiply all floating-point elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively subtracted without intermediate rounding from the corresponding elements of the addend and destination vector.", + "html": "

Multiply all floating-point elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively subtracted without intermediate rounding from the corresponding elements of the addend and destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLS": + return { + "tooltip": "Multiply the indexed element of the second source vector by the corresponding floating-point elements of the two or four first source vectors and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups.", + "html": "

Multiply the indexed element of the second source vector by the corresponding floating-point elements of the two or four first source vectors and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 2 bits depending on the size of the element. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLS": + return { + "tooltip": "Multiply the corresponding floating-point elements of the two or four first source vector with corresponding elements of the second source vector and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Multiply the corresponding floating-point elements of the two or four first source vector with corresponding elements of the second source vector and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.F64F64 indicates whether the double-precision variant is implemented, and ID_AA64SMFR0_EL1.F16F16 indicates whether the half-precision variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLS": + return { + "tooltip": "Multiply the corresponding floating-point elements of the two or four first and second source vectors and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Multiply the corresponding floating-point elements of the two or four first and second source vectors and destructively subtract without intermediate rounding from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.F64F64 indicates whether the double-precision variant is implemented, and ID_AA64SMFR0_EL1.F16F16 indicates whether the half-precision variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSL": + case "FMLSL2": + return { + "tooltip": "Floating-point fused Multiply-Subtract Long from accumulator (by element). This instruction multiplies the negated vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.", + "html": "

Floating-point fused Multiply-Subtract Long from accumulator (by element). This instruction multiplies the negated vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

In Armv8.2 and Armv8.3, this is an optional instruction. From Armv8.4 it is mandatory for all implementations to support it.

ID_AA64ISAR0_EL1.FHM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSL": + case "FMLSL2": + return { + "tooltip": "Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.", + "html": "

Floating-point fused Multiply-Subtract Long from accumulator (vector). This instruction negates the values in the vector of one SIMD&FP register, multiplies these with the corresponding values in another vector, and accumulates the product to the corresponding vector element of the destination SIMD&FP register. The instruction does not round the result of the multiply before the accumulation.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

In Armv8.2 and Armv8.3, this is an optional instruction. From Armv8.4 it is mandatory for all implementations to support it.

ID_AA64ISAR0_EL1.FHM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSL": + return { + "tooltip": "This half-precision floating-point multiply-subtract long instruction widens all 16-bit half-precision elements in the one, two, or four first source vectors and the indexed element of the second source vector to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups.", + "html": "

This half-precision floating-point multiply-subtract long instruction widens all 16-bit half-precision elements in the one, two, or four first source vectors and the indexed element of the second source vector to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups.

The half-precision elements within the second source vector are specified using a 3-bit immediate index which selects the same element position within each 128-bit vector segment.

The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSL": + return { + "tooltip": "This half-precision floating-point multiply-subtract long instruction widens all 16-bit half-precision elements in the one, two, or four first source vectors and the second source vector to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This half-precision floating-point multiply-subtract long instruction widens all 16-bit half-precision elements in the one, two, or four first source vectors and the second source vector to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSL": + return { + "tooltip": "This half-precision floating-point multiply-subtract long instruction widens all 16-bit half-precision elements in the two or four first and second source vectors to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This half-precision floating-point multiply-subtract long instruction widens all 16-bit half-precision elements in the two or four first and second source vectors to single-precision format, then multiplies the corresponding elements and destructively subtracts these values without intermediate rounding from the overlapping 32-bit single-precision elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSLB": + return { + "tooltip": "This half-precision floating-point multiply-subtract long instruction widens the even-numbered half-precision elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the source vectors. This instruction is unpredicated.", + "html": "

This half-precision floating-point multiply-subtract long instruction widens the even-numbered half-precision elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the source vectors. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSLB": + return { + "tooltip": "This half-precision floating-point multiply-subtract long instruction widens the even-numbered half-precision elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the first source vector. This instruction is unpredicated.", + "html": "

This half-precision floating-point multiply-subtract long instruction widens the even-numbered half-precision elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSLT": + return { + "tooltip": "This half-precision floating-point multiply-subtract long instruction widens the odd-numbered half-precision elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the source vectors. This instruction is unpredicated.", + "html": "

This half-precision floating-point multiply-subtract long instruction widens the odd-numbered half-precision elements in the first source vector and the corresponding elements in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the source vectors. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMLSLT": + return { + "tooltip": "This half-precision floating-point multiply-subtract long instruction widens the odd-numbered half-precision elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the first source vector. This instruction is unpredicated.", + "html": "

This half-precision floating-point multiply-subtract long instruction widens the odd-numbered half-precision elements in the first source vector and the indexed element from the corresponding 128-bit segment in the second source vector to single-precision format and then destructively multiplies and subtracts these values without intermediate rounding from the single-precision elements of the destination vector that overlap with the corresponding half-precision elements in the first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMMLA": + return { + "tooltip": "The floating-point matrix multiply-accumulate instruction supports single-precision and double-precision data types in a 2\u00d72 matrix contained in segments of 128 or 256 bits, respectively. It multiplies the 2\u00d72 matrix in each segment of the first source vector by the 2\u00d72 matrix in the corresponding segment of the second source vector. The resulting 2\u00d72 matrix product is then destructively added to the matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing a 2-way dot product per destination element. This instruction is unpredicated. The single-precision variant is vector length agnostic. The double-precision variant requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits are set to zero.", + "html": "

The floating-point matrix multiply-accumulate instruction supports single-precision and double-precision data types in a 2\u00d72 matrix contained in segments of 128 or 256 bits, respectively. It multiplies the 2\u00d72 matrix in each segment of the first source vector by the 2\u00d72 matrix in the corresponding segment of the second source vector. The resulting 2\u00d72 matrix product is then destructively added to the matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing a 2-way dot product per destination element. This instruction is unpredicated. The single-precision variant is vector length agnostic. The double-precision variant requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits are set to zero.

ID_AA64ZFR0_EL1.F32MM indicates whether the single-precision variant is implemented.

ID_AA64ZFR0_EL1.F64MM indicates whether the double-precision variant is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOPA": + return { + "tooltip": "The half-precision floating-point sum of outer products and accumulate instruction works with a 32-bit element ZA tile.", + "html": "

The half-precision floating-point sum of outer products and accumulate instruction works with a 32-bit element ZA tile.

This instruction widens the SVLS\u00d72 sub-matrix of half-precision floating-point values held in the first source vector to single-precision floating-point values and multiplies it by the widened 2\u00d7SVLS sub-matrix of half-precision floating-point values in the second source vector to single-precision floating-point values.

Each source vector is independently predicated by a corresponding governing predicate. When a 16-bit source element is Inactive it is treated as having the value +0.0, but if both pairs of source vector elements that correspond to a 32-bit destination element contain Inactive elements, then the destination element remains unmodified.

The resulting SVLS\u00d7SVLS single-precision floating-point sum of outer products is then destructively added to the single-precision floating-point destination tile. This is equivalent to performing a 2-way dot product and accumulate to each of the destination tile elements.

Each 32-bit container of the first source vector holds 2 consecutive column elements of each row of a SVLS\u00d72 sub-matrix. Similarly, each 32-bit container of the second source vector holds 2 consecutive row elements of each column of a 2\u00d7SVLS sub-matrix.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOPA": + return { + "tooltip": "The half-precision variant works with a 16-bit element ZA tile.", + "html": "

The half-precision variant works with a 16-bit element ZA tile.

The single-precision variant works with a 32-bit element ZA tile.

The double-precision variant works with a 64-bit element ZA tile.

These instructions generate an outer product of the first source vector and the second source vector. In case of the half-precision variant, the first source is SVLH\u00d71 vector and the second source is 1\u00d7SVLH vector. In case of the single-precision variant, the first source is SVLS\u00d71 vector and the second source is 1\u00d7SVLS vector. In case of the double-precision variant, the first source is SVLD\u00d71 vector and the second source is 1\u00d7SVLD vector.

Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is Inactive the corresponding destination tile element remains unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOPS": + return { + "tooltip": "The half-precision floating-point sum of outer products and subtract instruction works with a 32-bit element ZA tile.", + "html": "

The half-precision floating-point sum of outer products and subtract instruction works with a 32-bit element ZA tile.

This instruction widens the SVLS\u00d72 sub-matrix of half-precision floating-point values held in the first source vector to single-precision floating-point values and multiplies it by the widened 2\u00d7SVLS sub-matrix of half-precision floating-point values in the second source vector to single-precision floating-point values.

Each source vector is independently predicated by a corresponding governing predicate. When a 16-bit source element is Inactive it is treated as having the value +0.0, but if both pairs of source vector elements that correspond to a 32-bit destination element contain Inactive elements, then the destination element remains unmodified.

The resulting SVLS\u00d7SVLS single-precision floating-point sum of outer products is then destructively subtracted from the single-precision floating-point destination tile. This is equivalent to performing a 2-way dot product and subtract from each of the destination tile elements.

Each 32-bit container of the first source vector holds 2 consecutive column elements of each row of a SVLS\u00d72 sub-matrix. Similarly, each 32-bit container of the second source vector holds 2 consecutive row elements of each column of a 2\u00d7SVLS sub-matrix.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOPS": + return { + "tooltip": "The half-precision variant works with a 16-bit element ZA tile.", + "html": "

The half-precision variant works with a 16-bit element ZA tile.

The single-precision variant works with a 32-bit element ZA tile.

The double-precision variant works with a 64-bit element ZA tile.

These instructions generate an outer product of the first source vector and the second source vector. In case of the half-precision variant, the first source is SVLH\u00d71 vector and the second source is 1\u00d7SVLH vector. In case of the single-precision variant, the first source is SVLS\u00d71 vector and the second source is 1\u00d7SVLS vector. In case of the double-precision variant, the first source is SVLD\u00d71 vector and the second source is 1\u00d7SVLD vector.

Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is Inactive the corresponding destination tile element remains unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOV": + return { + "tooltip": "Floating-point move immediate (vector). This instruction copies an immediate floating-point constant into every element of the SIMD&FP destination register.", + "html": "

Floating-point move immediate (vector). This instruction copies an immediate floating-point constant into every element of the SIMD&FP destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOV": + return { + "tooltip": "Move floating-point constant +0.0 to to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Move floating-point constant +0.0 to to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOV": + return { + "tooltip": "Unconditionally broadcast the floating-point constant +0.0 into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the floating-point constant +0.0 into each element of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOV": + return { + "tooltip": "Move a floating-point immediate into each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Move a floating-point immediate into each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOV": + return { + "tooltip": "Unconditionally broadcast the floating-point immediate into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the floating-point immediate into each element of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOV": + return { + "tooltip": "Floating-point Move register without conversion. This instruction copies the floating-point value in the SIMD&FP source register to the SIMD&FP destination register.", + "html": "

Floating-point Move register without conversion. This instruction copies the floating-point value in the SIMD&FP source register to the SIMD&FP destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOV": + return { + "tooltip": "Floating-point Move to or from general-purpose register without conversion. This instruction transfers the contents of a SIMD&FP register to a general-purpose register, or the contents of a general-purpose register to a SIMD&FP register.", + "html": "

Floating-point Move to or from general-purpose register without conversion. This instruction transfers the contents of a SIMD&FP register to a general-purpose register, or the contents of a general-purpose register to a SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMOV": + return { + "tooltip": "Floating-point move immediate (scalar). This instruction copies a floating-point immediate constant into the SIMD&FP destination register.", + "html": "

Floating-point move immediate (scalar). This instruction copies a floating-point immediate constant into the SIMD&FP destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMSB": + return { + "tooltip": "Multiply the corresponding active floating-point elements of the first and second source vectors and subtract from elements of the third (addend) vector without intermediate rounding. Destructively place the results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active floating-point elements of the first and second source vectors and subtract from elements of the third (addend) vector without intermediate rounding. Destructively place the results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMSUB": + return { + "tooltip": "Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, negates the product, adds that to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, negates the product, adds that to the value of the third SIMD&FP source register, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMUL": + return { + "tooltip": "Floating-point Multiply (by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.", + "html": "

Floating-point Multiply (by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are floating-point values.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMUL": + return { + "tooltip": "Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Multiply (vector). This instruction multiplies corresponding floating-point values in the vectors in the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMUL": + return { + "tooltip": "Floating-point Multiply (scalar). This instruction multiplies the floating-point values of the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point Multiply (scalar). This instruction multiplies the floating-point values of the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMUL": + return { + "tooltip": "Multiply by an immediate each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.5 or +2.0 only. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply by an immediate each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.5 or +2.0 only. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMUL": + return { + "tooltip": "Multiply active floating-point elements of the first source vector by corresponding floating-point elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply active floating-point elements of the first source vector by corresponding floating-point elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMUL": + return { + "tooltip": "Multiply all elements of the first source vector by corresponding floating-point elements of the second source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Multiply all elements of the first source vector by corresponding floating-point elements of the second source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMUL": + return { + "tooltip": "Multiply all floating-point elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The results are placed in the corresponding elements of the destination vector.", + "html": "

Multiply all floating-point elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The results are placed in the corresponding elements of the destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMULX": + return { + "tooltip": "Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Multiply extended (by element). This instruction multiplies the floating-point values in the vector elements in the first source SIMD&FP register by the specified floating-point value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

If one value is zero and the other value is infinite, the result is 2.0. In this case, the result is negative if only one of the values is negative, otherwise the result is positive.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMULX": + return { + "tooltip": "Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Multiply extended. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.

If one value is zero and the other value is infinite, the result is 2.0. In this case, the result is negative if only one of the values is negative, otherwise the result is positive.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FMULX": + return { + "tooltip": "Multiply active floating-point elements of the first source vector by corresponding floating-point elements of the second source vector except that \u221e\u00d70.0 gives 2.0 instead of NaN, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply active floating-point elements of the first source vector by corresponding floating-point elements of the second source vector except that \u221e\u00d70.0 gives 2.0 instead of NaN, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

The instruction can be used with FRECPX to safely convert arbitrary elements in mathematical vector space to unit vectors or direction vectors with length 1.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNEG": + return { + "tooltip": "Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Negate (vector). This instruction negates the value of each vector element in the source SIMD&FP register, writes the result to a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNEG": + return { + "tooltip": "Floating-point Negate (scalar). This instruction negates the value in the SIMD&FP source register and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Negate (scalar). This instruction negates the value in the SIMD&FP source register and writes the result to the SIMD&FP destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNEG": + return { + "tooltip": "Negate each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. This inverts the sign bit and cannot signal a floating-point exception. Inactive elements in the destination vector register remain unmodified.", + "html": "

Negate each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. This inverts the sign bit and cannot signal a floating-point exception. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNMAD": + return { + "tooltip": "Multiply the corresponding active floating-point elements of the first and second source vectors and add to elements of the third (addend) vector without intermediate rounding. Destructively place the negated results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active floating-point elements of the first and second source vectors and add to elements of the third (addend) vector without intermediate rounding. Destructively place the negated results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNMADD": + return { + "tooltip": "Floating-point Negated fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, negates the product, subtracts the value of the third SIMD&FP source register, and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point Negated fused Multiply-Add (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, negates the product, subtracts the value of the third SIMD&FP source register, and writes the result to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNMLA": + return { + "tooltip": "Multiply the corresponding active floating-point elements of the first and second source vectors and add to elements of the third source (addend) vector without intermediate rounding. Destructively place the negated results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active floating-point elements of the first and second source vectors and add to elements of the third source (addend) vector without intermediate rounding. Destructively place the negated results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNMLS": + return { + "tooltip": "Multiply the corresponding active floating-point elements of the first and second source vectors and subtract from elements of the third source (addend) vector without intermediate rounding. Destructively place the negated results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active floating-point elements of the first and second source vectors and subtract from elements of the third source (addend) vector without intermediate rounding. Destructively place the negated results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNMSB": + return { + "tooltip": "Multiply the corresponding active floating-point elements of the first and second source vectors and subtract from elements of the third (addend) vector without intermediate rounding. Destructively place the negated results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active floating-point elements of the first and second source vectors and subtract from elements of the third (addend) vector without intermediate rounding. Destructively place the negated results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNMSUB": + return { + "tooltip": "Floating-point Negated fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, subtracts the value of the third SIMD&FP source register, and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point Negated fused Multiply-Subtract (scalar). This instruction multiplies the values of the first two SIMD&FP source registers, subtracts the value of the third SIMD&FP source register, and writes the result to the destination SIMD&FP register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FNMUL": + return { + "tooltip": "Floating-point Multiply-Negate (scalar). This instruction multiplies the floating-point values of the two source SIMD&FP registers, and writes the negation of the result to the destination SIMD&FP register.", + "html": "

Floating-point Multiply-Negate (scalar). This instruction multiplies the floating-point values of the two source SIMD&FP registers, and writes the negation of the result to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRECPE": + return { + "tooltip": "Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Reciprocal Estimate. This instruction finds an approximate reciprocal estimate for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRECPE": + return { + "tooltip": "Find the approximate reciprocal of each floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Find the approximate reciprocal of each floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRECPS": + return { + "tooltip": "Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Reciprocal Step. This instruction multiplies the corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 2.0, places the resulting floating-point values in a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRECPS": + return { + "tooltip": "Multiply corresponding floating-point elements of the first and second source vectors, subtract the products from 2.0 without intermediate rounding and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Multiply corresponding floating-point elements of the first and second source vectors, subtract the products from 2.0 without intermediate rounding and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

This instruction can be used to perform a single Newton-Raphson iteration for calculating the reciprocal of a vector of floating-point values.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRECPX": + return { + "tooltip": "Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for the source SIMD&FP register and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point Reciprocal exponent (scalar). This instruction finds an approximate reciprocal exponent for the source SIMD&FP register and writes the result to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRECPX": + return { + "tooltip": "Invert the exponent and zero the fractional part of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Invert the exponent and zero the fractional part of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

The result of this instruction can be used with FMULX to convert arbitrary elements in mathematical vector space to \"unit vectors\" or \"direction vectors\" of length 1.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT32X": + return { + "tooltip": "Floating-point Round to 32-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to 32-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

A zero input returns a zero result with the same sign. When one of the result values is not numerically equal to the corresponding input value, an Inexact exception is raised. When an input is infinite, NaN or out-of-range, the instruction returns for the corresponding result value the most negative integer representable in the destination size, and an Invalid Operation floating-point exception is raised.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT32X": + return { + "tooltip": "Floating-point Round to 32-bit Integer, using current rounding mode (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value that fits into a 32-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to 32-bit Integer, using current rounding mode (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value that fits into a 32-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

A zero input returns a zero result with the same sign. When the result value is not numerically equal to the input value, an Inexact exception is raised. When the input is infinite, NaN or out-of-range, the instruction returns {for the corresponding result value} the most negative integer representable in the destination size, and an Invalid Operation floating-point exception is raised.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT32Z": + return { + "tooltip": "Floating-point Round to 32-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to 32-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 32-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A zero input returns a zero result with the same sign. When one of the result values is not numerically equal to the corresponding input value, an Inexact exception is raised. When an input is infinite, NaN or out-of-range, the instruction returns for the corresponding result value the most negative integer representable in the destination size, and an Invalid Operation floating-point exception is raised.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT32Z": + return { + "tooltip": "Floating-point Round to 32-bit Integer toward Zero (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value that fits into a 32-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to 32-bit Integer toward Zero (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value that fits into a 32-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A zero input returns a zero result with the same sign. When the result value is not numerically equal to the {corresponding} input value, an Inexact exception is raised. When the input is infinite, NaN or out-of-range, the instruction returns {for the corresponding result value} the most negative integer representable in the destination size, and an Invalid Operation floating-point exception is raised.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT64X": + return { + "tooltip": "Floating-point Round to 64-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to 64-bit Integer, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

A zero input returns a zero result with the same sign. When one of the result values is not numerically equal to the corresponding input value, an Inexact exception is raised. When an input is infinite, NaN or out-of-range, the instruction returns for the corresponding result value the most negative integer representable in the destination size, and an Invalid Operation floating-point exception is raised.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT64X": + return { + "tooltip": "Floating-point Round to 64-bit Integer, using current rounding mode (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value that fits into a 64-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to 64-bit Integer, using current rounding mode (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value that fits into a 64-bit integer size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

A zero input returns a zero result with the same sign. When the result value is not numerically equal to the input value, an Inexact exception is raised. When the input is infinite, NaN or out-of-range, the instruction returns {for the corresponding result value} the most negative integer representable in the destination size, and an Invalid Operation floating-point exception is raised.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT64Z": + return { + "tooltip": "Floating-point Round to 64-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to 64-bit Integer toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values that fit into a 64-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A zero input returns a zero result with the same sign. When one of the result values is not numerically equal to the corresponding input value, an Inexact exception is raised. When an input is infinite, NaN or out-of-range, the instruction returns for the corresponding result value the most negative integer representable in the destination size, and an Invalid Operation floating-point exception is raised.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT64Z": + return { + "tooltip": "Floating-point Round to 64-bit Integer toward Zero (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value that fits into a 64-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to 64-bit Integer toward Zero (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value that fits into a 64-bit integer size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A zero input returns a zero result with the same sign. When the result value is not numerically equal to the {corresponding} input value, an Inexact exception is raised. When the input is infinite, NaN or out-of-range, the instruction returns {for the corresponding result value} the most negative integer representable in the destination size, and an Invalid Operation floating-point exception is raised.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTA": + return { + "tooltip": "Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, to nearest with ties to Away (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTA": + return { + "tooltip": "Floating-point Round to Integral, to nearest with ties to Away (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, to nearest with ties to Away (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round to Nearest with Ties to Away rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTA": + return { + "tooltip": "Round to the nearest integral floating-point value, with ties rounding away from zero, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.", + "html": "

Round to the nearest integral floating-point value, with ties rounding away from zero, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINT": + case "FRINTA": + case "FRINTI": + case "FRINTM": + case "FRINTN": + case "FRINTP": + case "FRINTX": + return { + "tooltip": "Round to an integral floating-point value with the specified rounding option from each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Round to an integral floating-point value with the specified rounding option from each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTI": + return { + "tooltip": "Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTI": + return { + "tooltip": "Floating-point Round to Integral, using current rounding mode (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, using current rounding mode (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTM": + return { + "tooltip": "Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, toward Minus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTM": + return { + "tooltip": "Floating-point Round to Integral, toward Minus infinity (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, toward Minus infinity (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round towards Minus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTM": + return { + "tooltip": "Round down to an integral floating-point value, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.", + "html": "

Round down to an integral floating-point value, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTN": + return { + "tooltip": "Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, to nearest with ties to even (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTN": + return { + "tooltip": "Floating-point Round to Integral, to nearest with ties to even (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, to nearest with ties to even (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round to Nearest rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTN": + return { + "tooltip": "Round to the nearest integral floating-point value, with ties rounding to an even value, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.", + "html": "

Round to the nearest integral floating-point value, with ties rounding to an even value, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTP": + return { + "tooltip": "Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, toward Plus infinity (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTP": + return { + "tooltip": "Floating-point Round to Integral, toward Plus infinity (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, toward Plus infinity (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round towards Plus Infinity rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTP": + return { + "tooltip": "Round up to an integral floating-point value, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.", + "html": "

Round up to an integral floating-point value, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTX": + return { + "tooltip": "Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral exact, using current rounding mode (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

When a result value is not numerically equal to the corresponding input value, an Inexact exception is raised. A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTX": + return { + "tooltip": "Floating-point Round to Integral exact, using current rounding mode (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral exact, using current rounding mode (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the rounding mode that is determined by the FPCR, and writes the result to the SIMD&FP destination register.

When the result value is not numerically equal to the input value, an Inexact exception is raised. A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTZ": + return { + "tooltip": "Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, toward Zero (vector). This instruction rounds a vector of floating-point values in the SIMD&FP source register to integral floating-point values of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRINTZ": + return { + "tooltip": "Floating-point Round to Integral, toward Zero (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Round to Integral, toward Zero (scalar). This instruction rounds a floating-point value in the SIMD&FP source register to an integral floating-point value of the same size using the Round towards Zero rounding mode, and writes the result to the SIMD&FP destination register.

A zero input gives a zero result with the same sign, an infinite input gives an infinite result with the same sign, and a NaN is propagated as for normal arithmetic.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRSQRTE": + return { + "tooltip": "Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Reciprocal Square Root Estimate. This instruction calculates an approximate square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRSQRTE": + return { + "tooltip": "Find the approximate reciprocal square root of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Find the approximate reciprocal square root of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRSQRTS": + return { + "tooltip": "Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Reciprocal Square Root Step. This instruction multiplies corresponding floating-point values in the vectors of the two source SIMD&FP registers, subtracts each of the products from 3.0, divides these results by 2.0, places the results into a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FRSQRTS": + return { + "tooltip": "Multiply corresponding floating-point elements of the first and second source vectors, subtract the products from 3.0 and divide the results by 2.0 without any intermediate rounding and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Multiply corresponding floating-point elements of the first and second source vectors, subtract the products from 3.0 and divide the results by 2.0 without any intermediate rounding and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

This instruction can be used to perform a single Newton-Raphson iteration for calculating the reciprocal square root of a vector of floating-point values.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSCALE": + return { + "tooltip": "Multiply the active floating-point elements of the first source vector by 2.0 to the power of the signed integer values in the corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the active floating-point elements of the first source vector by 2.0 to the power of the signed integer values in the corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSQRT": + return { + "tooltip": "Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Square Root (vector). This instruction calculates the square root for each vector element in the source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSQRT": + return { + "tooltip": "Floating-point Square Root (scalar). This instruction calculates the square root of the value in the SIMD&FP source register and writes the result to the SIMD&FP destination register.", + "html": "

Floating-point Square Root (scalar). This instruction calculates the square root of the value in the SIMD&FP source register and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSQRT": + return { + "tooltip": "Calculate the square root of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Calculate the square root of each active floating-point element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSUB": + return { + "tooltip": "Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Floating-point Subtract (vector). This instruction subtracts the elements in the vector in the second source SIMD&FP register, from the corresponding elements in the vector in the first source SIMD&FP register, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSUB": + return { + "tooltip": "Floating-point Subtract (scalar). This instruction subtracts the floating-point value of the second source SIMD&FP register from the floating-point value of the first source SIMD&FP register, and writes the result to the destination SIMD&FP register.", + "html": "

Floating-point Subtract (scalar). This instruction subtracts the floating-point value of the second source SIMD&FP register from the floating-point value of the first source SIMD&FP register, and writes the result to the destination SIMD&FP register.

This instruction can generate a floating-point exception. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSUB": + return { + "tooltip": "Subtract an immediate from each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.5 or +1.0 only. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract an immediate from each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.5 or +1.0 only. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSUB": + return { + "tooltip": "Subtract active floating-point elements of the second source vector from corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active floating-point elements of the second source vector from corresponding floating-point elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSUB": + return { + "tooltip": "Subtract all floating-point elements of the second source vector from corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract all floating-point elements of the second source vector from corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSUB": + return { + "tooltip": "Destructively subtract all elements of the two or four source vectors from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Destructively subtract all elements of the two or four source vectors from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.F64F64 indicates whether the double-precision variant is implemented, and ID_AA64SMFR0_EL1.F16F16 indicates whether the half-precision variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSUBR": + return { + "tooltip": "Reversed subtract from an immediate each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.5 or +1.0 only. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reversed subtract from an immediate each active floating-point element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate may take the value +0.5 or +1.0 only. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FSUBR": + return { + "tooltip": "Reversed subtract active floating-point elements of the first source vector from corresponding floating-point elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reversed subtract active floating-point elements of the first source vector from corresponding floating-point elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FTMAD": + return { + "tooltip": "The FTMAD instruction calculates the series terms for either sin(x) or cos(x), where the argument x has been adjusted to be in the range -\u03c0/4 < x \u2264 \u03c0/4.", + "html": "

The FTMAD instruction calculates the series terms for either sin(x) or cos(x), where the argument x has been adjusted to be in the range -\u03c0/4 < x \u2264 \u03c0/4.

To calculate the series terms of sin(x) and cos(x) the initial source operands of FTMAD should be zero in the first source vector and x2 in the second source vector. The FTMAD instruction is then executed eight times to calculate the sum of eight series terms, which gives a result of sufficient precision.

The FTMAD instruction multiplies each element of the first source vector by the absolute value of the corresponding element of the second source vector and performs a fused addition of each product with a value obtained from a table of hard-wired coefficients, and places the results destructively in the first source vector.

The coefficients are different for sin(x) and cos(x), and are selected by a combination of the sign bit in the second source element and an immediate index in the range 0 to 7.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FTSMUL": + return { + "tooltip": "The FTSMUL instruction calculates the initial value for the FTMAD instruction. The instruction squares each element in the first source vector and then sets the sign bit to a copy of bit 0 of the corresponding element in the second source register, and places the results in the destination vector. This instruction is unpredicated.", + "html": "

The FTSMUL instruction calculates the initial value for the FTMAD instruction. The instruction squares each element in the first source vector and then sets the sign bit to a copy of bit 0 of the corresponding element in the second source register, and places the results in the destination vector. This instruction is unpredicated.

To compute sin(x) or cos(x) the instruction is executed with elements of the first source vector set to x, adjusted to be in the range -\u03c0/4 < x \u2264 \u03c0/4.

The elements of the second source vector hold the corresponding value of the quadrant q number as an integer not a floating-point value. The value q satisfies the relationship (2q-1) \u00d7 \u03c0/4 < x \u2264 (2q+1) \u00d7 \u03c0/4.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FTSSEL": + return { + "tooltip": "The FTSSEL instruction selects the coefficient for the final multiplication in the polynomial series approximation. The instruction places the value 1.0 or a copy of the first source vector element in the destination element, depending on bit 0 of the quadrant number q held in the corresponding element of the second source vector. The sign bit of the destination element is copied from bit 1 of the corresponding value of q. This instruction is unpredicated.", + "html": "

The FTSSEL instruction selects the coefficient for the final multiplication in the polynomial series approximation. The instruction places the value 1.0 or a copy of the first source vector element in the destination element, depending on bit 0 of the quadrant number q held in the corresponding element of the second source vector. The sign bit of the destination element is copied from bit 1 of the corresponding value of q. This instruction is unpredicated.

To compute sin(x) or cos(x) the instruction is executed with elements of the first source vector set to x, adjusted to be in the range -\u03c0/4 < x \u2264 \u03c0/4.

The elements of the second source vector hold the corresponding value of the quadrant q number as an integer not a floating-point value. The value q satisfies the relationship (2q-1) \u00d7 \u03c0/4 < x \u2264 (2q+1) \u00d7 \u03c0/4.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "FVDOT": + return { + "tooltip": "The instruction computes the fused sum-of-products of each vertical pair of half-precision floating-point values in the corresponding elements of the two first source vectors with the pair of half-precision floating-point values in the indexed 32-bit group of the corresponding 128-bit segment of the second source vector, without intermediate rounding. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the two ZA single-vector groups.", + "html": "

The instruction computes the fused sum-of-products of each vertical pair of half-precision floating-point values in the corresponding elements of the two first source vectors with the pair of half-precision floating-point values in the indexed 32-bit group of the corresponding 128-bit segment of the second source vector, without intermediate rounding. The single-precision sum-of-products results are destructively added to the corresponding single-precision elements of the two ZA single-vector groups.

The half-precision floating-point pairs within the second source vector are specified using an immediate index which selects the same pair position within each 128-bit vector segment. The element index range is from 0 to 3.

The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the ZA operand consists of two ZA single-vector groups. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction follows SME ZA-targeting floating-point behaviors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSBDSYNC": + return { + "tooltip": "Guarded Control Stack Barrier. This instruction generates a Guarded control stack data synchronization event.", + "html": "

Guarded Control Stack Barrier. This instruction generates a Guarded control stack data synchronization event.

If FEAT_GCS is not implemented, this instruction executes as a NOP.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSPOPCX": + return { + "tooltip": "Guarded Control Stack Pop and Compare exception return record loads an exception return record from the location indicated by the current Guarded control stack pointer register, compares the loaded values with the current ELR_ELx, SPSR_ELx, and LR, and increments the pointer by the size of a Guarded control stack exception return record.", + "html": "

Guarded Control Stack Pop and Compare exception return record loads an exception return record from the location indicated by the current Guarded control stack pointer register, compares the loaded values with the current ELR_ELx, SPSR_ELx, and LR, and increments the pointer by the size of a Guarded control stack exception return record.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSPOPM": + return { + "tooltip": "Guarded Control Stack Pop loads the 64-bit doubleword that is pointed to by the current Guarded control stack pointer, writes it to the destination register, and increments the current Guarded control stack pointer register by the size of a Guarded control stack procedure return record.", + "html": "

Guarded Control Stack Pop loads the 64-bit doubleword that is pointed to by the current Guarded control stack pointer, writes it to the destination register, and increments the current Guarded control stack pointer register by the size of a Guarded control stack procedure return record.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSPOPX": + return { + "tooltip": "Guarded Control Stack Pop exception return record loads an exception return record from the location indicated by the current Guarded control stack pointer register, checks that the record is an exception return record, and increments the pointer by the size of a Guarded control stack exception return record.", + "html": "

Guarded Control Stack Pop exception return record loads an exception return record from the location indicated by the current Guarded control stack pointer register, checks that the record is an exception return record, and increments the pointer by the size of a Guarded control stack exception return record.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSPUSHM": + return { + "tooltip": "Guarded Control Stack Push decrements the current Guarded control stack pointer register by the size of a Guarded control procedure return record and stores an entry to the Guarded control stack.", + "html": "

Guarded Control Stack Push decrements the current Guarded control stack pointer register by the size of a Guarded control procedure return record and stores an entry to the Guarded control stack.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSPUSHX": + return { + "tooltip": "Guarded Control Stack Push exception return record decrements the current Guarded control stack pointer register by the size of a Guarded control stack exception return record and stores an exception return record to the Guarded control stack.", + "html": "

Guarded Control Stack Push exception return record decrements the current Guarded control stack pointer register by the size of a Guarded control stack exception return record and stores an exception return record to the Guarded control stack.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSSS1": + return { + "tooltip": "Guarded Control Stack Switch Stack 1 validates that the stack being switched to contains a Valid cap entry, stores an In-progress cap entry to the stack that is being switched to, and sets the current Guarded control stack pointer to the stack that is being switched to.", + "html": "

Guarded Control Stack Switch Stack 1 validates that the stack being switched to contains a Valid cap entry, stores an In-progress cap entry to the stack that is being switched to, and sets the current Guarded control stack pointer to the stack that is being switched to.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSSS2": + return { + "tooltip": "Guarded Control Stack Switch Stack 2 validates that the most recent entry of the Guarded control stack being switched to contains an In-progress cap entry, stores a Valid cap entry to the Guarded control stack that is being switched from, and sets Xt to the Guarded control stack pointer that is being switched from.", + "html": "

Guarded Control Stack Switch Stack 2 validates that the most recent entry of the Guarded control stack being switched to contains an In-progress cap entry, stores a Valid cap entry to the Guarded control stack that is being switched from, and sets Xt to the Guarded control stack pointer that is being switched from.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSSTR": + return { + "tooltip": "Guarded Control Stack Store stores a doubleword from a register to memory. The address that is used for the store is calculated from a base register.", + "html": "

Guarded Control Stack Store stores a doubleword from a register to memory. The address that is used for the store is calculated from a base register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GCSSTTR": + return { + "tooltip": "Guarded Control Stack unprivileged Store stores a doubleword from a register to memory. The address that is used for the store is calculated from a base register.", + "html": "

Guarded Control Stack unprivileged Store stores a doubleword from a register to memory. The address that is used for the store is calculated from a base register.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "GMI": + return { + "tooltip": "Tag Mask Insert inserts the tag in the first source register into the excluded set specified in the second source register, writing the new excluded set to the destination register.", + "html": "

Tag Mask Insert inserts the tag in the first source register into the excluded set specified in the second source register, writing the new excluded set to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "HINT": + return { + "tooltip": "Hint instruction is for the instruction set space that is reserved for architectural hint instructions.", + "html": "

Hint instruction is for the instruction set space that is reserved for architectural hint instructions.

Some encodings described here are not allocated in this revision of the architecture, and behave as NOPs. These encodings might be allocated to other hint functionality in future revisions of the architecture and therefore must not be used by software.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "HISTCNT": + return { + "tooltip": "This instruction compares each active 32 or 64-bit element of the first source vector with all active elements with an element number less than or equal to its own in the second source vector, and places the count of matching elements in the corresponding element of the destination vector. Inactive elements in the destination vector are set to zero.", + "html": "

This instruction compares each active 32 or 64-bit element of the first source vector with all active elements with an element number less than or equal to its own in the second source vector, and places the count of matching elements in the corresponding element of the destination vector. Inactive elements in the destination vector are set to zero.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "HISTSEG": + return { + "tooltip": "This instruction compares each 8-bit byte element of the first source vector with all of the elements in the corresponding 128-bit segment of the second source vector and places the count of matching elements in the corresponding element of the destination vector. This instruction is unpredicated.", + "html": "

This instruction compares each 8-bit byte element of the first source vector with all of the elements in the corresponding 128-bit segment of the second source vector and places the count of matching elements in the corresponding element of the destination vector. This instruction is unpredicated.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "HLT": + return { + "tooltip": "Halt instruction. An HLT instruction can generate a Halt Instruction debug event, which causes entry into Debug state.", + "html": "

Halt instruction. An HLT instruction can generate a Halt Instruction debug event, which causes entry into Debug state.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "HVC": + return { + "tooltip": "Hypervisor Call causes an exception to EL2. Software executing at EL1 can use this instruction to call the hypervisor to request a service.", + "html": "

Hypervisor Call causes an exception to EL2. Software executing at EL1 can use this instruction to call the hypervisor to request a service.

The HVC instruction is undefined:

On executing an HVC instruction, the PE records the exception as a Hypervisor Call exception in ESR_ELx, using the EC value 0x16, and the value of the immediate argument.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "IC": + return { + "tooltip": "Instruction Cache operation. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.", + "html": "

Instruction Cache operation. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INCB": + case "INCD": + case "INCH": + case "INCW": + return { + "tooltip": "Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination.", + "html": "

Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INCD": + case "INCH": + case "INCW": + return { + "tooltip": "Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements.", + "html": "

Determines the number of active elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INCP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to increment the scalar destination.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to increment the scalar destination.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INCP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to increment all destination vector elements.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to increment all destination vector elements.

The predicate size specifier may be omitted in assembler source code, but this is deprecated and will be prohibited in a future release of the architecture.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INDEX": + return { + "tooltip": "Populates the destination vector by setting the first element to the first signed immediate integer operand and monotonically incrementing the value by the second signed immediate integer operand for each subsequent element. This instruction is unpredicated.", + "html": "

Populates the destination vector by setting the first element to the first signed immediate integer operand and monotonically incrementing the value by the second signed immediate integer operand for each subsequent element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INDEX": + return { + "tooltip": "Populates the destination vector by setting the first element to the first signed immediate integer operand and monotonically incrementing the value by the second signed scalar integer operand for each subsequent element. The scalar source operand is a general-purpose register in which only the least significant bits corresponding to the vector element size are used and any remaining bits are ignored. This instruction is unpredicated.", + "html": "

Populates the destination vector by setting the first element to the first signed immediate integer operand and monotonically incrementing the value by the second signed scalar integer operand for each subsequent element. The scalar source operand is a general-purpose register in which only the least significant bits corresponding to the vector element size are used and any remaining bits are ignored. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INDEX": + return { + "tooltip": "Populates the destination vector by setting the first element to the first signed scalar integer operand and monotonically incrementing the value by the second signed immediate integer operand for each subsequent element. The scalar source operand is a general-purpose register in which only the least significant bits corresponding to the vector element size are used and any remaining bits are ignored. This instruction is unpredicated.", + "html": "

Populates the destination vector by setting the first element to the first signed scalar integer operand and monotonically incrementing the value by the second signed immediate integer operand for each subsequent element. The scalar source operand is a general-purpose register in which only the least significant bits corresponding to the vector element size are used and any remaining bits are ignored. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INDEX": + return { + "tooltip": "Populates the destination vector by setting the first element to the first signed scalar integer operand and monotonically incrementing the value by the second signed scalar integer operand for each subsequent element. The scalar source operands are general-purpose registers in which only the least significant bits corresponding to the vector element size are used and any remaining bits are ignored. This instruction is unpredicated.", + "html": "

Populates the destination vector by setting the first element to the first signed scalar integer operand and monotonically incrementing the value by the second signed scalar integer operand for each subsequent element. The scalar source operands are general-purpose registers in which only the least significant bits corresponding to the vector element size are used and any remaining bits are ignored. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INS": + return { + "tooltip": "Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.", + "html": "

Insert vector element from another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.

This instruction can insert data into individual elements within a SIMD&FP register without clearing the remaining bits to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INS": + return { + "tooltip": "Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.", + "html": "

Insert vector element from general-purpose register. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

This instruction can insert data into individual elements within a SIMD&FP register without clearing the remaining bits to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INSR": + return { + "tooltip": "Shift the destination vector left by one element, and then place a copy of the least-significant bits of the general-purpose register in element 0 of the destination vector. This instruction is unpredicated.", + "html": "

Shift the destination vector left by one element, and then place a copy of the least-significant bits of the general-purpose register in element 0 of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "INSR": + return { + "tooltip": "Shift the destination vector left by one element, and then place a copy of the SIMD&FP scalar register in element 0 of the destination vector. This instruction is unpredicated.", + "html": "

Shift the destination vector left by one element, and then place a copy of the SIMD&FP scalar register in element 0 of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "IRG": + return { + "tooltip": "Insert Random Tag inserts a random Logical Address Tag into the address in the first source register, and writes the result to the destination register. Any tags specified in the optional second source register or in GCR_EL1.Exclude are excluded from the selection of the random Logical Address Tag.", + "html": "

Insert Random Tag inserts a random Logical Address Tag into the address in the first source register, and writes the result to the destination register. Any tags specified in the optional second source register or in GCR_EL1.Exclude are excluded from the selection of the random Logical Address Tag.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ISB": + return { + "tooltip": "Instruction Synchronization Barrier flushes the pipeline in the PE and is a context synchronization event. For more information, see Instruction Synchronization Barrier (ISB).", + "html": "

Instruction Synchronization Barrier flushes the pipeline in the PE and is a context synchronization event. For more information, see Instruction Synchronization Barrier (ISB).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LASTA": + return { + "tooltip": "If there is an active element then extract the element after the last active element modulo the number of elements from the final source vector register. If there are no active elements, extract element zero. Then zero-extend and place the extracted element in the destination general-purpose register.", + "html": "

If there is an active element then extract the element after the last active element modulo the number of elements from the final source vector register. If there are no active elements, extract element zero. Then zero-extend and place the extracted element in the destination general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LASTA": + return { + "tooltip": "If there is an active element then extract the element after the last active element modulo the number of elements from the final source vector register. If there are no active elements, extract element zero. Then place the extracted element in the destination SIMD&FP scalar register.", + "html": "

If there is an active element then extract the element after the last active element modulo the number of elements from the final source vector register. If there are no active elements, extract element zero. Then place the extracted element in the destination SIMD&FP scalar register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LASTB": + return { + "tooltip": "If there is an active element then extract the last active element from the final source vector register. If there are no active elements, extract the highest-numbered element. Then zero-extend and place the extracted element in the destination general-purpose register.", + "html": "

If there is an active element then extract the last active element from the final source vector register. If there are no active elements, extract the highest-numbered element. Then zero-extend and place the extracted element in the destination general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LASTB": + return { + "tooltip": "If there is an active element then extract the last active element from the final source vector register. If there are no active elements, extract the highest-numbered element. Then place the extracted element in the destination SIMD&FP register.", + "html": "

If there is an active element then extract the last active element from the final source vector register. If there are no active elements, extract the highest-numbered element. Then place the extracted element in the destination SIMD&FP register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1": + return { + "tooltip": "Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.", + "html": "

Load multiple single-element structures to one, two, three, or four registers. This instruction loads multiple single-element structures from memory and writes the result to one, two, three, or four SIMD&FP registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1": + return { + "tooltip": "Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.", + "html": "

Load one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "Contiguous load of unsigned bytes to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load of unsigned bytes to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "Contiguous load of unsigned bytes to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load of unsigned bytes to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "Contiguous load of unsigned bytes to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load of unsigned bytes to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "Contiguous load of unsigned bytes to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load of unsigned bytes to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "Gather load of unsigned bytes to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of unsigned bytes to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "Contiguous load of unsigned bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of unsigned bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "Contiguous load of unsigned bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of unsigned bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "Gather load of unsigned bytes to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of unsigned bytes to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1B": + return { + "tooltip": "The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 8-bit elements in a vector. The immediate offset is in the range 0 to 15. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 8-bit elements in a vector. The immediate offset is in the range 0 to 15. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "Contiguous load of unsigned doublewords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load of unsigned doublewords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "Contiguous load of unsigned doublewords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load of unsigned doublewords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "Contiguous load of unsigned doublewords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load of unsigned doublewords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "Contiguous load of unsigned doublewords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load of unsigned doublewords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "Gather load of doublewords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 8 in the range 0 to 248. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of doublewords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 8 in the range 0 to 248. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "Contiguous load of unsigned doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of unsigned doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "Contiguous load of unsigned doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of unsigned doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "Gather load of doublewords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 8. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of doublewords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 8. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1D": + return { + "tooltip": "The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 64-bit elements in a vector. The immediate offset is in the range 0 to 1. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 8 and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 64-bit elements in a vector. The immediate offset is in the range 0 to 1. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 8 and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "Contiguous load of unsigned halfwords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load of unsigned halfwords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "Contiguous load of unsigned halfwords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load of unsigned halfwords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "Contiguous load of unsigned halfwords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load of unsigned halfwords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "Contiguous load of unsigned halfwords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load of unsigned halfwords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "Gather load of unsigned halfwords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of unsigned halfwords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "Contiguous load of unsigned halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of unsigned halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "Contiguous load of unsigned halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of unsigned halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "Gather load of unsigned halfwords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of unsigned halfwords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1H": + return { + "tooltip": "The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 16-bit elements in a vector. The immediate offset is in the range 0 to 7. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 2 and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 16-bit elements in a vector. The immediate offset is in the range 0 to 7. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 2 and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1Q": + return { + "tooltip": "Gather load of quadwords to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of quadwords to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1Q": + return { + "tooltip": "The slice number in the tile is selected by the slice index register, modulo the number of 128-bit elements in a Streaming SVE vector. The memory address is generated by scalar base and optional scalar offset which is multiplied by 16 and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

The slice number in the tile is selected by the slice index register, modulo the number of 128-bit elements in a Streaming SVE vector. The memory address is generated by scalar base and optional scalar offset which is multiplied by 16 and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1R": + return { + "tooltip": "Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.", + "html": "

Load one single-element structure and Replicate to all lanes (of one register). This instruction loads a single-element structure from memory and replicates the structure to all the lanes of the SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RB": + return { + "tooltip": "Load a single unsigned byte from a memory address generated by a 64-bit scalar base address plus an immediate offset which is in the range 0 to 63.", + "html": "

Load a single unsigned byte from a memory address generated by a 64-bit scalar base address plus an immediate offset which is in the range 0 to 63.

Broadcast the loaded data into all active elements of the destination vector, setting the inactive elements to zero. If all elements are inactive then the instruction will not perform a read from Device memory or cause a data abort.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RD": + return { + "tooltip": "Load a single doubleword from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 8 in the range 0 to 504.", + "html": "

Load a single doubleword from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 8 in the range 0 to 504.

Broadcast the loaded data into all active elements of the destination vector, setting the inactive elements to zero. If all elements are inactive then the instruction will not perform a read from Device memory or cause a data abort.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RH": + return { + "tooltip": "Load a single unsigned halfword from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 2 in the range 0 to 126.", + "html": "

Load a single unsigned halfword from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 2 in the range 0 to 126.

Broadcast the loaded data into all active elements of the destination vector, setting the inactive elements to zero. If all elements are inactive then the instruction will not perform a read from Device memory or cause a data abort.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1ROB": + return { + "tooltip": "Load thirty-two contiguous bytes to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 32 in the range -256 to +224 added to the base address.", + "html": "

Load thirty-two contiguous bytes to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 32 in the range -256 to +224 added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero.

The resulting 256-bit vector is then replicated to fill the destination vector. The instruction requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits in the destination vector are set to zero.

Only the first thirty-two predicate elements are used and higher numbered predicate elements are ignored.

ID_AA64ZFR0_EL1.F64MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1ROB": + return { + "tooltip": "Load thirty-two contiguous bytes to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is added to the base address.", + "html": "

Load thirty-two contiguous bytes to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero.

The resulting 256-bit vector is then replicated to fill the destination vector. The instruction requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits in the destination vector are set to zero.

Only the first thirty-two predicate elements are used and higher numbered predicate elements are ignored.

ID_AA64ZFR0_EL1.F64MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1ROD": + return { + "tooltip": "Load four contiguous doublewords to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 32 in the range -256 to +224 added to the base address.", + "html": "

Load four contiguous doublewords to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 32 in the range -256 to +224 added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero.

The resulting 256-bit vector is then replicated to fill the destination vector. The instruction requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits in the destination vector are set to zero.

Only the first four predicate elements are used and higher numbered predicate elements are ignored.

ID_AA64ZFR0_EL1.F64MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1ROD": + return { + "tooltip": "Load four contiguous doublewords to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 8 and added to the base address.", + "html": "

Load four contiguous doublewords to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 8 and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero.

The resulting 256-bit vector is then replicated to fill the destination vector. The instruction requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits in the destination vector are set to zero.

Only the first four predicate elements are used and higher numbered predicate elements are ignored.

ID_AA64ZFR0_EL1.F64MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1ROH": + return { + "tooltip": "Load sixteen contiguous halfwords to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 32 in the range -256 to +224 added to the base address.", + "html": "

Load sixteen contiguous halfwords to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 32 in the range -256 to +224 added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero.

The resulting 256-bit vector is then replicated to fill the destination vector. The instruction requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits in the destination vector are set to zero.

Only the first sixteen predicate elements are used and higher numbered predicate elements are ignored.

ID_AA64ZFR0_EL1.F64MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1ROH": + return { + "tooltip": "Load sixteen contiguous halfwords to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 2 and added to the base address.", + "html": "

Load sixteen contiguous halfwords to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 2 and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero.

The resulting 256-bit vector is then replicated to fill the destination vector. The instruction requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits in the destination vector are set to zero.

Only the first sixteen predicate elements are used and higher numbered predicate elements are ignored.

ID_AA64ZFR0_EL1.F64MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1ROW": + return { + "tooltip": "Load eight contiguous words to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 32 in the range -256 to +224 added to the base address.", + "html": "

Load eight contiguous words to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 32 in the range -256 to +224 added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero.

The resulting 256-bit vector is then replicated to fill the destination vector. The instruction requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits in the destination vector are set to zero.

Only the first eight predicate elements are used and higher numbered predicate elements are ignored.

ID_AA64ZFR0_EL1.F64MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1ROW": + return { + "tooltip": "Load eight contiguous words to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 4 and added to the base address.", + "html": "

Load eight contiguous words to elements of a 256-bit (octaword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 4 and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero.

The resulting 256-bit vector is then replicated to fill the destination vector. The instruction requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits in the destination vector are set to zero.

Only the first eight predicate elements are used and higher numbered predicate elements are ignored.

ID_AA64ZFR0_EL1.F64MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RQB": + return { + "tooltip": "Load sixteen contiguous bytes to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 16 in the range -128 to +112 added to the base address.", + "html": "

Load sixteen contiguous bytes to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 16 in the range -128 to +112 added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero. The resulting short vector is then replicated to fill the long destination vector. Only the first sixteen predicate elements are used and higher numbered predicate elements are ignored.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RQB": + return { + "tooltip": "Load sixteen contiguous bytes to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is added to the base address.", + "html": "

Load sixteen contiguous bytes to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero. The resulting short vector is then replicated to fill the long destination vector. Only the first sixteen predicate elements are used and higher numbered predicate elements are ignored.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RQD": + return { + "tooltip": "Load two contiguous doublewords to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 16 in the range -128 to +112 added to the base address.", + "html": "

Load two contiguous doublewords to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 16 in the range -128 to +112 added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero. The resulting short vector is then replicated to fill the long destination vector. Only the first two predicate elements are used and higher numbered predicate elements are ignored.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RQD": + return { + "tooltip": "Load two contiguous doublewords to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 8 and added to the base address.", + "html": "

Load two contiguous doublewords to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 8 and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero. The resulting short vector is then replicated to fill the long destination vector. Only the first two predicate elements are used and higher numbered predicate elements are ignored.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RQH": + return { + "tooltip": "Load eight contiguous halfwords to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 16 in the range -128 to +112 added to the base address.", + "html": "

Load eight contiguous halfwords to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 16 in the range -128 to +112 added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero. The resulting short vector is then replicated to fill the long destination vector. Only the first eight predicate elements are used and higher numbered predicate elements are ignored.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RQH": + return { + "tooltip": "Load eight contiguous halfwords to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 2 and added to the base address.", + "html": "

Load eight contiguous halfwords to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 2 and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero. The resulting short vector is then replicated to fill the long destination vector. Only the first eight predicate elements are used and higher numbered predicate elements are ignored.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RQW": + return { + "tooltip": "Load four contiguous words to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 16 in the range -128 to +112 added to the base address.", + "html": "

Load four contiguous words to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and immediate index that is a multiple of 16 in the range -128 to +112 added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero. The resulting short vector is then replicated to fill the long destination vector. Only the first four predicate elements are used and higher numbered predicate elements are ignored.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RQW": + return { + "tooltip": "Load four contiguous words to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 4 and added to the base address.", + "html": "

Load four contiguous words to elements of a short, 128-bit (quadword) vector from the memory address generated by a 64-bit scalar base address and scalar index which is multiplied by 4 and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero. The resulting short vector is then replicated to fill the long destination vector. Only the first four predicate elements are used and higher numbered predicate elements are ignored.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RSB": + return { + "tooltip": "Load a single signed byte from a memory address generated by a 64-bit scalar base address plus an immediate offset which is in the range 0 to 63.", + "html": "

Load a single signed byte from a memory address generated by a 64-bit scalar base address plus an immediate offset which is in the range 0 to 63.

Broadcast the loaded data into all active elements of the destination vector, setting the inactive elements to zero. If all elements are inactive then the instruction will not perform a read from Device memory or cause a data abort.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RSH": + return { + "tooltip": "Load a single signed halfword from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 2 in the range 0 to 126.", + "html": "

Load a single signed halfword from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 2 in the range 0 to 126.

Broadcast the loaded data into all active elements of the destination vector, setting the inactive elements to zero. If all elements are inactive then the instruction will not perform a read from Device memory or cause a data abort.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RSW": + return { + "tooltip": "Load a single signed word from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 4 in the range 0 to 252.", + "html": "

Load a single signed word from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 4 in the range 0 to 252.

Broadcast the loaded data into all active elements of the destination vector, setting the inactive elements to zero. If all elements are inactive then the instruction will not perform a read from Device memory or cause a data abort.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1RW": + return { + "tooltip": "Load a single unsigned word from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 4 in the range 0 to 252.", + "html": "

Load a single unsigned word from a memory address generated by a 64-bit scalar base address plus an immediate offset which is a multiple of 4 in the range 0 to 252.

Broadcast the loaded data into all active elements of the destination vector, setting the inactive elements to zero. If all elements are inactive then the instruction will not perform a read from Device memory or cause a data abort.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SB": + return { + "tooltip": "Gather load of signed bytes to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of signed bytes to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SB": + return { + "tooltip": "Contiguous load of signed bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of signed bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SB": + return { + "tooltip": "Contiguous load of signed bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of signed bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SB": + return { + "tooltip": "Gather load of signed bytes to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of signed bytes to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SH": + return { + "tooltip": "Gather load of signed halfwords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of signed halfwords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SH": + return { + "tooltip": "Contiguous load of signed halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of signed halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SH": + return { + "tooltip": "Contiguous load of signed halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of signed halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SH": + return { + "tooltip": "Gather load of signed halfwords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of signed halfwords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SW": + return { + "tooltip": "Gather load of signed words to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of signed words to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SW": + return { + "tooltip": "Contiguous load of signed words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of signed words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SW": + return { + "tooltip": "Contiguous load of signed words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of signed words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1SW": + return { + "tooltip": "Gather load of signed words to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of signed words to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "Contiguous load of unsigned words to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load of unsigned words to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "Contiguous load of unsigned words to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load of unsigned words to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "Contiguous load of unsigned words to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load of unsigned words to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "Contiguous load of unsigned words to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load of unsigned words to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "Gather load of unsigned words to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of unsigned words to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "Contiguous load of unsigned words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of unsigned words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "Contiguous load of unsigned words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load of unsigned words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "Gather load of unsigned words to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load of unsigned words to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD1W": + return { + "tooltip": "The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 32-bit elements in a vector. The immediate offset is in the range 0 to 3. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 4 and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 32-bit elements in a vector. The immediate offset is in the range 0 to 3. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 4 and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2": + return { + "tooltip": "Load multiple 2-element structures to two registers. This instruction loads multiple 2-element structures from memory and writes the result to the two SIMD&FP registers, with de-interleaving.", + "html": "

Load multiple 2-element structures to two registers. This instruction loads multiple 2-element structures from memory and writes the result to the two SIMD&FP registers, with de-interleaving.

For an example of de-interleaving, see LD3 (multiple structures).

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2": + return { + "tooltip": "Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.", + "html": "

Load single 2-element structure to one lane of two registers. This instruction loads a 2-element structure from memory and writes the result to the corresponding elements of the two SIMD&FP registers without affecting the other bits of the registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2B": + return { + "tooltip": "Contiguous load two-byte structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load two-byte structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive bytes in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2B": + return { + "tooltip": "Contiguous load two-byte structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous load two-byte structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive bytes in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2D": + return { + "tooltip": "Contiguous load two-doubleword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load two-doubleword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive doublewords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2D": + return { + "tooltip": "Contiguous load two-doubleword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous load two-doubleword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive doublewords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2H": + return { + "tooltip": "Contiguous load two-halfword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load two-halfword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive halfwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2H": + return { + "tooltip": "Contiguous load two-halfword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous load two-halfword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive halfwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2Q": + return { + "tooltip": "Contiguous load two-quadword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load two-quadword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive quadwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2Q": + return { + "tooltip": "Contiguous load two-quadword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous load two-quadword structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive quadwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2R": + return { + "tooltip": "Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.", + "html": "

Load single 2-element structure and Replicate to all lanes of two registers. This instruction loads a 2-element structure from memory and replicates the structure to all the lanes of the two SIMD&FP registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2W": + return { + "tooltip": "Contiguous load two-word structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load two-word structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive words in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD2W": + return { + "tooltip": "Contiguous load two-word structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous load two-word structures, each to the same element number in two vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive words in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the two destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3": + return { + "tooltip": "Load multiple 3-element structures to three registers. This instruction loads multiple 3-element structures from memory and writes the result to the three SIMD&FP registers, with de-interleaving.", + "html": "

Load multiple 3-element structures to three registers. This instruction loads multiple 3-element structures from memory and writes the result to the three SIMD&FP registers, with de-interleaving.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3": + return { + "tooltip": "Load single 3-element structure to one lane of three registers. This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.", + "html": "

Load single 3-element structure to one lane of three registers. This instruction loads a 3-element structure from memory and writes the result to the corresponding elements of the three SIMD&FP registers without affecting the other bits of the registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3B": + return { + "tooltip": "Contiguous load three-byte structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load three-byte structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive bytes in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3B": + return { + "tooltip": "Contiguous load three-byte structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous load three-byte structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive bytes in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3D": + return { + "tooltip": "Contiguous load three-doubleword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load three-doubleword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive doublewords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3D": + return { + "tooltip": "Contiguous load three-doubleword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous load three-doubleword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive doublewords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3H": + return { + "tooltip": "Contiguous load three-halfword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load three-halfword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive halfwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3H": + return { + "tooltip": "Contiguous load three-halfword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous load three-halfword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive halfwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3Q": + return { + "tooltip": "Contiguous load three-quadword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load three-quadword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive quadwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3Q": + return { + "tooltip": "Contiguous load three-quadword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous load three-quadword structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive quadwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3R": + return { + "tooltip": "Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.", + "html": "

Load single 3-element structure and Replicate to all lanes of three registers. This instruction loads a 3-element structure from memory and replicates the structure to all the lanes of the three SIMD&FP registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3W": + return { + "tooltip": "Contiguous load three-word structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load three-word structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive words in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD3W": + return { + "tooltip": "Contiguous load three-word structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous load three-word structures, each to the same element number in three vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive words in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the three destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4": + return { + "tooltip": "Load multiple 4-element structures to four registers. This instruction loads multiple 4-element structures from memory and writes the result to the four SIMD&FP registers, with de-interleaving.", + "html": "

Load multiple 4-element structures to four registers. This instruction loads multiple 4-element structures from memory and writes the result to the four SIMD&FP registers, with de-interleaving.

For an example of de-interleaving, see LD3 (multiple structures).

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4": + return { + "tooltip": "Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.", + "html": "

Load single 4-element structure to one lane of four registers. This instruction loads a 4-element structure from memory and writes the result to the corresponding elements of the four SIMD&FP registers without affecting the other bits of the registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4B": + return { + "tooltip": "Contiguous load four-byte structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load four-byte structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive bytes in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4B": + return { + "tooltip": "Contiguous load four-byte structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous load four-byte structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive bytes in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4D": + return { + "tooltip": "Contiguous load four-doubleword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load four-doubleword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive doublewords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4D": + return { + "tooltip": "Contiguous load four-doubleword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous load four-doubleword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive doublewords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4H": + return { + "tooltip": "Contiguous load four-halfword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load four-halfword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive halfwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4H": + return { + "tooltip": "Contiguous load four-halfword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous load four-halfword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive halfwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4Q": + return { + "tooltip": "Contiguous load four-quadword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load four-quadword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive quadwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4Q": + return { + "tooltip": "Contiguous load four-quadword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous load four-quadword structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive quadwords in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4R": + return { + "tooltip": "Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.", + "html": "

Load single 4-element structure and Replicate to all lanes of four registers. This instruction loads a 4-element structure from memory and replicates the structure to all the lanes of the four SIMD&FP registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4W": + return { + "tooltip": "Contiguous load four-word structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous load four-word structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive words in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD4W": + return { + "tooltip": "Contiguous load four-word structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous load four-word structures, each to the same element number in four vector registers from the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive words in memory which make up each structure. Inactive elements will not cause a read from Device memory or signal a fault, and the corresponding element is set to zero in each of the four destination vector registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LD64B": + return { + "tooltip": "Single-copy Atomic 64-byte Load derives an address from a base register value, loads eight 64-bit doublewords from a memory location, and writes them to consecutive registers, Xt to X(t+7). The data that is loaded is atomic and is required to be 64-byte aligned.", + "html": "

Single-copy Atomic 64-byte Load derives an address from a base register value, loads eight 64-bit doublewords from a memory location, and writes them to consecutive registers, Xt to X(t+7). The data that is loaded is atomic and is required to be 64-byte aligned.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDADD": + case "LDADDA": + case "LDADDAL": + case "LDADDL": + return { + "tooltip": "Atomic add on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, adds the value held in a register to it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic add on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, adds the value held in a register to it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDADDAB": + case "LDADDALB": + case "LDADDB": + case "LDADDLB": + return { + "tooltip": "Atomic add on byte in memory atomically loads an 8-bit byte from memory, adds the value held in a register to it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic add on byte in memory atomically loads an 8-bit byte from memory, adds the value held in a register to it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDADDAH": + case "LDADDALH": + case "LDADDH": + case "LDADDLH": + return { + "tooltip": "Atomic add on halfword in memory atomically loads a 16-bit halfword from memory, adds the value held in a register to it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic add on halfword in memory atomically loads a 16-bit halfword from memory, adds the value held in a register to it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAP1": + return { + "tooltip": "Load-Acquire RCpc one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.", + "html": "

Load-Acquire RCpc one single-element structure to one lane of one register. This instruction loads a single-element structure from memory and writes the result to the specified lane of the SIMD&FP register without affecting the other bits of the register.

The instruction has memory ordering semantics, as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPR": + return { + "tooltip": "Load-Acquire RCpc Register derives an address from a base register value, loads a 32-bit word or 64-bit doubleword from the derived address in memory, and writes it to a register.", + "html": "

Load-Acquire RCpc Register derives an address from a base register value, loads a 32-bit word or 64-bit doubleword from the derived address in memory, and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPRB": + return { + "tooltip": "Load-Acquire RCpc Register Byte derives an address from a base register value, loads a byte from the derived address in memory, zero-extends it and writes it to a register.", + "html": "

Load-Acquire RCpc Register Byte derives an address from a base register value, loads a byte from the derived address in memory, zero-extends it and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPRH": + return { + "tooltip": "Load-Acquire RCpc Register Halfword derives an address from a base register value, loads a halfword from the derived address in memory, zero-extends it and writes it to a register.", + "html": "

Load-Acquire RCpc Register Halfword derives an address from a base register value, loads a halfword from the derived address in memory, zero-extends it and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPUR": + return { + "tooltip": "Load-Acquire RCpc SIMD&FP Register (unscaled offset). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from a base register value and an optional immediate offset.", + "html": "

Load-Acquire RCpc SIMD&FP Register (unscaled offset). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from a base register value and an optional immediate offset.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPUR": + return { + "tooltip": "Load-Acquire RCpc Register (unscaled) calculates an address from a base register and an immediate offset, loads a 32-bit word or 64-bit doubleword from memory, zero-extends it, and writes it to a register.", + "html": "

Load-Acquire RCpc Register (unscaled) calculates an address from a base register and an immediate offset, loads a 32-bit word or 64-bit doubleword from memory, zero-extends it, and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPURB": + return { + "tooltip": "Load-Acquire RCpc Register Byte (unscaled) calculates an address from a base register and an immediate offset, loads a byte from memory, zero-extends it, and writes it to a register.", + "html": "

Load-Acquire RCpc Register Byte (unscaled) calculates an address from a base register and an immediate offset, loads a byte from memory, zero-extends it, and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPURH": + return { + "tooltip": "Load-Acquire RCpc Register Halfword (unscaled) calculates an address from a base register and an immediate offset, loads a halfword from memory, zero-extends it, and writes it to a register.", + "html": "

Load-Acquire RCpc Register Halfword (unscaled) calculates an address from a base register and an immediate offset, loads a halfword from memory, zero-extends it, and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPURSB": + return { + "tooltip": "Load-Acquire RCpc Register Signed Byte (unscaled) calculates an address from a base register and an immediate offset, loads a signed byte from memory, sign-extends it, and writes it to a register.", + "html": "

Load-Acquire RCpc Register Signed Byte (unscaled) calculates an address from a base register and an immediate offset, loads a signed byte from memory, sign-extends it, and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPURSH": + return { + "tooltip": "Load-Acquire RCpc Register Signed Halfword (unscaled) calculates an address from a base register and an immediate offset, loads a signed halfword from memory, sign-extends it, and writes it to a register.", + "html": "

Load-Acquire RCpc Register Signed Halfword (unscaled) calculates an address from a base register and an immediate offset, loads a signed halfword from memory, sign-extends it, and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAPURSW": + return { + "tooltip": "Load-Acquire RCpc Register Signed Word (unscaled) calculates an address from a base register and an immediate offset, loads a signed word from memory, sign-extends it, and writes it to a register.", + "html": "

Load-Acquire RCpc Register Signed Word (unscaled) calculates an address from a base register and an immediate offset, loads a signed word from memory, sign-extends it, and writes it to a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

This difference in memory ordering is not described in the pseudocode.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAR": + return { + "tooltip": "Load-Acquire Register derives an address from a base register value, loads a 32-bit word or 64-bit doubleword from memory, and writes it to a register. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load-Acquire Register derives an address from a base register value, loads a 32-bit word or 64-bit doubleword from memory, and writes it to a register. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

For this instruction, if the destination is WZR/XZR, it is impossible for software to observe the presence of the acquire semantic other than its effect on the arrival at endpoints.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDARB": + return { + "tooltip": "Load-Acquire Register Byte derives an address from a base register value, loads a byte from memory, zero-extends it and writes it to a register. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load-Acquire Register Byte derives an address from a base register value, loads a byte from memory, zero-extends it and writes it to a register. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

For this instruction, if the destination is WZR/XZR, it is impossible for software to observe the presence of the acquire semantic other than its effect on the arrival at endpoints.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDARH": + return { + "tooltip": "Load-Acquire Register Halfword derives an address from a base register value, loads a halfword from memory, zero-extends it, and writes it to a register. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load-Acquire Register Halfword derives an address from a base register value, loads a halfword from memory, zero-extends it, and writes it to a register. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

For this instruction, if the destination is WZR/XZR, it is impossible for software to observe the presence of the acquire semantic other than its effect on the arrival at endpoints.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAXP": + return { + "tooltip": "Load-Acquire Exclusive Pair of Registers derives an address from a base register value, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. The instruction also has memory ordering semantics, as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load-Acquire Exclusive Pair of Registers derives an address from a base register value, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. The instruction also has memory ordering semantics, as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAXR": + return { + "tooltip": "Load-Acquire Exclusive Register derives an address from a base register value, loads a 32-bit word or 64-bit doubleword from memory, and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load-Acquire Exclusive Register derives an address from a base register value, loads a 32-bit word or 64-bit doubleword from memory, and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAXRB": + return { + "tooltip": "Load-Acquire Exclusive Register Byte derives an address from a base register value, loads a byte from memory, zero-extends it and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load-Acquire Exclusive Register Byte derives an address from a base register value, loads a byte from memory, zero-extends it and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDAXRH": + return { + "tooltip": "Load-Acquire Exclusive Register Halfword derives an address from a base register value, loads a halfword from memory, zero-extends it and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load-Acquire Exclusive Register Halfword derives an address from a base register value, loads a halfword from memory, zero-extends it and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDCLR": + case "LDCLRA": + case "LDCLRAL": + case "LDCLRL": + return { + "tooltip": "Atomic bit clear on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic bit clear on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDCLRAB": + case "LDCLRALB": + case "LDCLRB": + case "LDCLRLB": + return { + "tooltip": "Atomic bit clear on byte in memory atomically loads an 8-bit byte from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic bit clear on byte in memory atomically loads an 8-bit byte from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDCLRAH": + case "LDCLRALH": + case "LDCLRH": + case "LDCLRLH": + return { + "tooltip": "Atomic bit clear on halfword in memory atomically loads a 16-bit halfword from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic bit clear on halfword in memory atomically loads a 16-bit halfword from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDCLRP": + case "LDCLRPA": + case "LDCLRPAL": + case "LDCLRPL": + return { + "tooltip": "Atomic bit clear on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise AND with the complement of the value held in a pair of registers on it, and stores the result back to memory. The value initially loaded from memory is returned in the same pair of registers.", + "html": "

Atomic bit clear on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise AND with the complement of the value held in a pair of registers on it, and stores the result back to memory. The value initially loaded from memory is returned in the same pair of registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDEOR": + case "LDEORA": + case "LDEORAL": + case "LDEORL": + return { + "tooltip": "Atomic Exclusive-OR on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic Exclusive-OR on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDEORAB": + case "LDEORALB": + case "LDEORB": + case "LDEORLB": + return { + "tooltip": "Atomic Exclusive-OR on byte in memory atomically loads an 8-bit byte from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic Exclusive-OR on byte in memory atomically loads an 8-bit byte from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDEORAH": + case "LDEORALH": + case "LDEORH": + case "LDEORLH": + return { + "tooltip": "Atomic Exclusive-OR on halfword in memory atomically loads a 16-bit halfword from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic Exclusive-OR on halfword in memory atomically loads a 16-bit halfword from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1B": + return { + "tooltip": "Gather load with first-faulting behavior of unsigned bytes to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of unsigned bytes to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1B": + return { + "tooltip": "Contiguous load with first-faulting behavior of unsigned bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with first-faulting behavior of unsigned bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1B": + return { + "tooltip": "Gather load with first-faulting behavior of unsigned bytes to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of unsigned bytes to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1D": + return { + "tooltip": "Gather load with first-faulting behavior of doublewords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 8 in the range 0 to 248. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of doublewords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 8 in the range 0 to 248. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1D": + return { + "tooltip": "Contiguous load with first-faulting behavior of doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with first-faulting behavior of doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1D": + return { + "tooltip": "Gather load with first-faulting behavior of doublewords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 8. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of doublewords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 8. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1H": + return { + "tooltip": "Gather load with first-faulting behavior of unsigned halfwords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of unsigned halfwords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1H": + return { + "tooltip": "Contiguous load with first-faulting behavior of unsigned halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with first-faulting behavior of unsigned halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1H": + return { + "tooltip": "Gather load with first-faulting behavior of unsigned halfwords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of unsigned halfwords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SB": + return { + "tooltip": "Gather load with first-faulting behavior of signed bytes to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of signed bytes to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SB": + return { + "tooltip": "Contiguous load with first-faulting behavior of signed bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with first-faulting behavior of signed bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SB": + return { + "tooltip": "Gather load with first-faulting behavior of signed bytes to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of signed bytes to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SH": + return { + "tooltip": "Gather load with first-faulting behavior of signed halfwords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of signed halfwords to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SH": + return { + "tooltip": "Contiguous load with first-faulting behavior of signed halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with first-faulting behavior of signed halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SH": + return { + "tooltip": "Gather load with first-faulting behavior of signed halfwords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of signed halfwords to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SW": + return { + "tooltip": "Gather load with first-faulting behavior of signed words to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of signed words to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SW": + return { + "tooltip": "Contiguous load with first-faulting behavior of signed words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with first-faulting behavior of signed words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1SW": + return { + "tooltip": "Gather load with first-faulting behavior of signed words to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of signed words to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1W": + return { + "tooltip": "Gather load with first-faulting behavior of unsigned words to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of unsigned words to active elements of a vector register from memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1W": + return { + "tooltip": "Contiguous load with first-faulting behavior of unsigned words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with first-faulting behavior of unsigned words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDFF1W": + return { + "tooltip": "Gather load with first-faulting behavior of unsigned words to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load with first-faulting behavior of unsigned words to active elements of a vector register from memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDG": + return { + "tooltip": "Load Allocation Tag loads an Allocation Tag from a memory address, generates a Logical Address Tag from the Allocation Tag and merges it into the destination register. The address used for the load is calculated from the base register and an immediate signed offset scaled by the Tag granule.", + "html": "

Load Allocation Tag loads an Allocation Tag from a memory address, generates a Logical Address Tag from the Allocation Tag and merges it into the destination register. The address used for the load is calculated from the base register and an immediate signed offset scaled by the Tag granule.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDGM": + return { + "tooltip": "Load Tag Multiple reads a naturally aligned block of N Allocation Tags, where the size of N is identified in GMID_EL1.BS, and writes the Allocation Tag read from address A to the destination register at 4*A<7:4>+3:4*A<7:4>. Bits of the destination register not written with an Allocation Tag are set to 0.", + "html": "

Load Tag Multiple reads a naturally aligned block of N Allocation Tags, where the size of N is identified in GMID_EL1.BS, and writes the Allocation Tag read from address A to the destination register at 4*A<7:4>+3:4*A<7:4>. Bits of the destination register not written with an Allocation Tag are set to 0.

This instruction is undefined at EL0.

This instruction generates an Unchecked access.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDIAPP": + return { + "tooltip": "Load-Acquire RCpc ordered Pair of registers calculates an address from a base register value and an optional offset, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. The instruction also has memory ordering semantics, as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that", + "html": "

Load-Acquire RCpc ordered Pair of registers calculates an address from a base register value and an optional offset, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. The instruction also has memory ordering semantics, as described in Load-Acquire, Load-AcquirePC, and Store-Release, except that:

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDLAR": + return { + "tooltip": "Load LOAcquire Register loads a 32-bit word or 64-bit doubleword from memory, and writes it to a register. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load LOAcquire Register loads a 32-bit word or 64-bit doubleword from memory, and writes it to a register. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.

For this instruction, if the destination is WZR/XZR, it is impossible for software to observe the presence of the acquire semantic other than its effect on the arrival at endpoints.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDLARB": + return { + "tooltip": "Load LOAcquire Register Byte loads a byte from memory, zero-extends it and writes it to a register. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load LOAcquire Register Byte loads a byte from memory, zero-extends it and writes it to a register. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.

For this instruction, if the destination is WZR/XZR, it is impossible for software to observe the presence of the acquire semantic other than its effect on the arrival at endpoints.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDLARH": + return { + "tooltip": "Load LOAcquire Register Halfword loads a halfword from memory, zero-extends it, and writes it to a register. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load LOAcquire Register Halfword loads a halfword from memory, zero-extends it, and writes it to a register. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.

For this instruction, if the destination is WZR/XZR, it is impossible for software to observe the presence of the acquire semantic other than its effect on the arrival at endpoints.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNF1B": + return { + "tooltip": "Contiguous load with non-faulting behavior of unsigned bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with non-faulting behavior of unsigned bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNF1D": + return { + "tooltip": "Contiguous load with non-faulting behavior of doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with non-faulting behavior of doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNF1H": + return { + "tooltip": "Contiguous load with non-faulting behavior of unsigned halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with non-faulting behavior of unsigned halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNF1SB": + return { + "tooltip": "Contiguous load with non-faulting behavior of signed bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with non-faulting behavior of signed bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNF1SH": + return { + "tooltip": "Contiguous load with non-faulting behavior of signed halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with non-faulting behavior of signed halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNF1SW": + return { + "tooltip": "Contiguous load with non-faulting behavior of signed words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with non-faulting behavior of signed words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNF1W": + return { + "tooltip": "Contiguous load with non-faulting behavior of unsigned words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load with non-faulting behavior of unsigned words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNP": + return { + "tooltip": "Load Pair of SIMD&FP registers, with Non-temporal hint. This instruction loads a pair of SIMD&FP registers from memory, issuing a hint to the memory system that the access is non-temporal. The address that is used for the load is calculated from a base register value and an optional immediate offset.", + "html": "

Load Pair of SIMD&FP registers, with Non-temporal hint. This instruction loads a pair of SIMD&FP registers from memory, issuing a hint to the memory system that the access is non-temporal. The address that is used for the load is calculated from a base register value and an optional immediate offset.

For information about non-temporal pair instructions, see Load/Store SIMD and Floating-point Non-temporal pair.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNP": + return { + "tooltip": "Load Pair of Registers, with non-temporal hint, calculates an address from a base register value and an immediate offset, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers.", + "html": "

Load Pair of Registers, with non-temporal hint, calculates an address from a base register value and an immediate offset, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers.

For information about memory accesses, see Load/Store addressing modes. For information about Non-temporal pair instructions, see Load/Store Non-temporal pair.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1B": + return { + "tooltip": "Contiguous load non-temporal of bytes to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load non-temporal of bytes to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1B": + return { + "tooltip": "Contiguous load non-temporal of bytes to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load non-temporal of bytes to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1B": + return { + "tooltip": "Contiguous load non-temporal of bytes to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load non-temporal of bytes to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1B": + return { + "tooltip": "Contiguous load non-temporal of bytes to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load non-temporal of bytes to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1B": + return { + "tooltip": "Gather load non-temporal of unsigned bytes to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load non-temporal of unsigned bytes to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1B": + return { + "tooltip": "Contiguous load non-temporal of bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load non-temporal of bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1B": + return { + "tooltip": "Contiguous load non-temporal of bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load non-temporal of bytes to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1D": + return { + "tooltip": "Contiguous load non-temporal of doublewords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load non-temporal of doublewords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1D": + return { + "tooltip": "Contiguous load non-temporal of doublewords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load non-temporal of doublewords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1D": + return { + "tooltip": "Contiguous load non-temporal of doublewords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load non-temporal of doublewords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1D": + return { + "tooltip": "Contiguous load non-temporal of doublewords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load non-temporal of doublewords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1D": + return { + "tooltip": "Gather load non-temporal of doublewords to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load non-temporal of doublewords to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1D": + return { + "tooltip": "Contiguous load non-temporal of doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load non-temporal of doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1D": + return { + "tooltip": "Contiguous load non-temporal of doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load non-temporal of doublewords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1H": + return { + "tooltip": "Contiguous load non-temporal of halfwords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load non-temporal of halfwords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1H": + return { + "tooltip": "Contiguous load non-temporal of halfwords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load non-temporal of halfwords to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1H": + return { + "tooltip": "Contiguous load non-temporal of halfwords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load non-temporal of halfwords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1H": + return { + "tooltip": "Contiguous load non-temporal of halfwords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load non-temporal of halfwords to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1H": + return { + "tooltip": "Gather load non-temporal of unsigned halfwords to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load non-temporal of unsigned halfwords to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1H": + return { + "tooltip": "Contiguous load non-temporal of halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load non-temporal of halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1H": + return { + "tooltip": "Contiguous load non-temporal of halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load non-temporal of halfwords to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1SB": + return { + "tooltip": "Gather load non-temporal of signed bytes to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load non-temporal of signed bytes to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1SH": + return { + "tooltip": "Gather load non-temporal of signed halfwords to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load non-temporal of signed halfwords to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1SW": + return { + "tooltip": "Gather load non-temporal of signed words to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load non-temporal of signed words to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1W": + return { + "tooltip": "Contiguous load non-temporal of words to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load non-temporal of words to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1W": + return { + "tooltip": "Contiguous load non-temporal of words to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load non-temporal of words to elements of two or four consecutive vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1W": + return { + "tooltip": "Contiguous load non-temporal of words to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous load non-temporal of words to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1W": + return { + "tooltip": "Contiguous load non-temporal of words to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous load non-temporal of words to elements of two or four strided vector registers from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1W": + return { + "tooltip": "Gather load non-temporal of unsigned words to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.", + "html": "

Gather load non-temporal of unsigned words to active elements of a vector register from memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements will not cause a read from Device memory or signal faults, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1W": + return { + "tooltip": "Contiguous load non-temporal of words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load non-temporal of words to elements of a vector register from the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDNT1W": + return { + "tooltip": "Contiguous load non-temporal of words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.", + "html": "

Contiguous load non-temporal of words to elements of a vector register from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.

A non-temporal load is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDP": + return { + "tooltip": "Load Pair of SIMD&FP registers. This instruction loads a pair of SIMD&FP registers from memory. The address that is used for the load is calculated from a base register value and an optional immediate offset.", + "html": "

Load Pair of SIMD&FP registers. This instruction loads a pair of SIMD&FP registers from memory. The address that is used for the load is calculated from a base register value and an optional immediate offset.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDP": + return { + "tooltip": "Load Pair of Registers calculates an address from a base register value and an immediate offset, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Pair of Registers calculates an address from a base register value and an immediate offset, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDPSW": + return { + "tooltip": "Load Pair of Registers Signed Word calculates an address from a base register value and an immediate offset, loads two 32-bit words from memory, sign-extends them, and writes them to two registers. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Pair of Registers Signed Word calculates an address from a base register value and an immediate offset, loads two 32-bit words from memory, sign-extends them, and writes them to two registers. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load SIMD&FP Register (immediate offset). This instruction loads an element from memory, and writes the result as a scalar to the SIMD&FP register. The address that is used for the load is calculated from a base register value, a signed immediate offset, and an optional offset that is a multiple of the element size.", + "html": "

Load SIMD&FP Register (immediate offset). This instruction loads an element from memory, and writes the result as a scalar to the SIMD&FP register. The address that is used for the load is calculated from a base register value, a signed immediate offset, and an optional offset that is a multiple of the element size.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load Register (immediate) loads a word or doubleword from memory and writes it to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes. The Unsigned offset variant scales the immediate offset value by the size of the value accessed before adding it to the base register value.", + "html": "

Load Register (immediate) loads a word or doubleword from memory and writes it to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes. The Unsigned offset variant scales the immediate offset value by the size of the value accessed before adding it to the base register value.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load SIMD&FP Register (PC-relative literal). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from the PC value and an immediate offset.", + "html": "

Load SIMD&FP Register (PC-relative literal). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from the PC value and an immediate offset.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load Register (literal) calculates an address from the PC value and an immediate offset, loads a word from memory, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register (literal) calculates an address from the PC value and an immediate offset, loads a word from memory, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load a predicate register from a memory address generated by a 64-bit scalar base, plus an immediate offset in the range -256 to 255 which is multiplied by the current predicate register size in bytes. This instruction is unpredicated.", + "html": "

Load a predicate register from a memory address generated by a 64-bit scalar base, plus an immediate offset in the range -256 to 255 which is multiplied by the current predicate register size in bytes. This instruction is unpredicated.

The load is performed as contiguous byte accesses, each containing 8 consecutive predicate bits in ascending element order, with no endian conversion and no guarantee of single-copy atomicity larger than a byte. However, if alignment is checked, then a general-purpose base register must be aligned to 2 bytes.

For programmer convenience, an assembler must also accept a predicate-as-counter register name for the destination predicate register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load SIMD&FP Register (register offset). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.", + "html": "

Load SIMD&FP Register (register offset). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load Register (register) calculates an address from a base register value and an offset register value, loads a word from memory, and writes it to a register. The offset register value can optionally be shifted and extended. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register (register) calculates an address from a base register value and an offset register value, loads a word from memory, and writes it to a register. The offset register value can optionally be shifted and extended. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load a vector register from a memory address generated by a 64-bit scalar base, plus an immediate offset in the range -256 to 255 which is multiplied by the current vector register size in bytes. This instruction is unpredicated.", + "html": "

Load a vector register from a memory address generated by a 64-bit scalar base, plus an immediate offset in the range -256 to 255 which is multiplied by the current vector register size in bytes. This instruction is unpredicated.

The load is performed as contiguous byte accesses, with no endian conversion and no guarantee of single-copy atomicity larger than a byte. However, if alignment is checked, then the base register must be aligned to 16 bytes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "The ZA array vector is selected by the sum of the vector select register and immediate offset, modulo the number of bytes in a Streaming SVE vector. The immediate offset is in the range 0 to 15. The memory address is generated by a 64-bit scalar base, plus the same optional immediate offset multiplied by the current vector length in bytes. This instruction is unpredicated.", + "html": "

The ZA array vector is selected by the sum of the vector select register and immediate offset, modulo the number of bytes in a Streaming SVE vector. The immediate offset is in the range 0 to 15. The memory address is generated by a 64-bit scalar base, plus the same optional immediate offset multiplied by the current vector length in bytes. This instruction is unpredicated.

The load is performed as contiguous byte accesses, with no endian conversion and no guarantee of single-copy atomicity larger than a byte. However, if alignment is checked, then the base register must be aligned to 16 bytes.

This instruction does not require the PE to be in Streaming SVE mode, and it is expected that this instruction will not experience a significant slowdown due to contention with other PEs that are executing in Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDR": + return { + "tooltip": "Load the 64-byte ZT0 register from the memory address provided in the 64-bit scalar base register. This instruction is unpredicated.", + "html": "

Load the 64-byte ZT0 register from the memory address provided in the 64-bit scalar base register. This instruction is unpredicated.

The load is performed as contiguous byte accesses, with no endian conversion and no guarantee of single-copy atomicity larger than a byte. However, if alignment is checked, then the base register must be aligned to 16 bytes.

This instruction does not require the PE to be in Streaming SVE mode, and it is expected that this instruction will not experience a significant slowdown due to contention with other PEs that are executing in Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRAA": + case "LDRAB": + return { + "tooltip": "Load Register, with pointer authentication. This instruction authenticates an address from a base register using a modifier of zero and the specified key, adds an immediate offset to the authenticated address, and loads a 64-bit doubleword from memory at this resulting address into a register.", + "html": "

Load Register, with pointer authentication. This instruction authenticates an address from a base register using a modifier of zero and the specified key, adds an immediate offset to the authenticated address, and loads a 64-bit doubleword from memory at this resulting address into a register.

Key A is used for LDRAA. Key B is used for LDRAB.

If the authentication passes, the PE behaves the same as for an LDR instruction. For information on behavior if the authentication fails, see Faulting on pointer authentication.

The authenticated address is not written back to the base register, unless the pre-indexed variant of the instruction is used. In this case, the address that is written back to the base register does not include the pointer authentication code.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRB": + return { + "tooltip": "Load Register Byte (immediate) loads a byte from memory, zero-extends it, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Byte (immediate) loads a byte from memory, zero-extends it, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRB": + return { + "tooltip": "Load Register Byte (register) calculates an address from a base register value and an offset register value, loads a byte from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Byte (register) calculates an address from a base register value and an offset register value, loads a byte from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRH": + return { + "tooltip": "Load Register Halfword (immediate) loads a halfword from memory, zero-extends it, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Halfword (immediate) loads a halfword from memory, zero-extends it, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRH": + return { + "tooltip": "Load Register Halfword (register) calculates an address from a base register value and an offset register value, loads a halfword from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Halfword (register) calculates an address from a base register value and an offset register value, loads a halfword from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRSB": + return { + "tooltip": "Load Register Signed Byte (immediate) loads a byte from memory, sign-extends it to either 32 bits or 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Byte (immediate) loads a byte from memory, sign-extends it to either 32 bits or 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRSB": + return { + "tooltip": "Load Register Signed Byte (register) calculates an address from a base register value and an offset register value, loads a byte from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Byte (register) calculates an address from a base register value and an offset register value, loads a byte from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRSH": + return { + "tooltip": "Load Register Signed Halfword (immediate) loads a halfword from memory, sign-extends it to 32 bits or 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Halfword (immediate) loads a halfword from memory, sign-extends it to 32 bits or 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRSH": + return { + "tooltip": "Load Register Signed Halfword (register) calculates an address from a base register value and an offset register value, loads a halfword from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Halfword (register) calculates an address from a base register value and an offset register value, loads a halfword from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRSW": + return { + "tooltip": "Load Register Signed Word (immediate) loads a word from memory, sign-extends it to 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Word (immediate) loads a word from memory, sign-extends it to 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRSW": + return { + "tooltip": "Load Register Signed Word (literal) calculates an address from the PC value and an immediate offset, loads a word from memory, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Word (literal) calculates an address from the PC value and an immediate offset, loads a word from memory, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDRSW": + return { + "tooltip": "Load Register Signed Word (register) calculates an address from a base register value and an offset register value, loads a word from memory, sign-extends it to form a 64-bit value, and writes it to a register. The offset register value can be shifted left by 0 or 2 bits. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Word (register) calculates an address from a base register value and an offset register value, loads a word from memory, sign-extends it to form a 64-bit value, and writes it to a register. The offset register value can be shifted left by 0 or 2 bits. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSET": + case "LDSETA": + case "LDSETAL": + case "LDSETL": + return { + "tooltip": "Atomic bit set on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic bit set on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics see Load-Acquire, Store-Release.

For information about memory accesses see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSETAB": + case "LDSETALB": + case "LDSETB": + case "LDSETLB": + return { + "tooltip": "Atomic bit set on byte in memory atomically loads an 8-bit byte from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic bit set on byte in memory atomically loads an 8-bit byte from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSETAH": + case "LDSETALH": + case "LDSETH": + case "LDSETLH": + return { + "tooltip": "Atomic bit set on halfword in memory atomically loads a 16-bit halfword from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic bit set on halfword in memory atomically loads a 16-bit halfword from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSETP": + case "LDSETPA": + case "LDSETPAL": + case "LDSETPL": + return { + "tooltip": "Atomic bit set on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise OR with the value held in a pair of registers on it, and stores the result back to memory. The value initially loaded from memory is returned in the same pair of registers.", + "html": "

Atomic bit set on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise OR with the value held in a pair of registers on it, and stores the result back to memory. The value initially loaded from memory is returned in the same pair of registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSMAX": + case "LDSMAXA": + case "LDSMAXAL": + case "LDSMAXL": + return { + "tooltip": "Atomic signed maximum on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic signed maximum on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSMAXAB": + case "LDSMAXALB": + case "LDSMAXB": + case "LDSMAXLB": + return { + "tooltip": "Atomic signed maximum on byte in memory atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic signed maximum on byte in memory atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSMAXAH": + case "LDSMAXALH": + case "LDSMAXH": + case "LDSMAXLH": + return { + "tooltip": "Atomic signed maximum on halfword in memory atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic signed maximum on halfword in memory atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSMIN": + case "LDSMINA": + case "LDSMINAL": + case "LDSMINL": + return { + "tooltip": "Atomic signed minimum on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic signed minimum on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSMINAB": + case "LDSMINALB": + case "LDSMINB": + case "LDSMINLB": + return { + "tooltip": "Atomic signed minimum on byte in memory atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic signed minimum on byte in memory atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDSMINAH": + case "LDSMINALH": + case "LDSMINH": + case "LDSMINLH": + return { + "tooltip": "Atomic signed minimum on halfword in memory atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic signed minimum on halfword in memory atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDTR": + return { + "tooltip": "Load Register (unprivileged) loads a word or doubleword from memory, and writes it to a register. The address that is used for the load is calculated from a base register and an immediate offset.", + "html": "

Load Register (unprivileged) loads a word or doubleword from memory, and writes it to a register. The address that is used for the load is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDTRB": + return { + "tooltip": "Load Register Byte (unprivileged) loads a byte from memory, zero-extends it, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.", + "html": "

Load Register Byte (unprivileged) loads a byte from memory, zero-extends it, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDTRH": + return { + "tooltip": "Load Register Halfword (unprivileged) loads a halfword from memory, zero-extends it, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.", + "html": "

Load Register Halfword (unprivileged) loads a halfword from memory, zero-extends it, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDTRSB": + return { + "tooltip": "Load Register Signed Byte (unprivileged) loads a byte from memory, sign-extends it to 32 bits or 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.", + "html": "

Load Register Signed Byte (unprivileged) loads a byte from memory, sign-extends it to 32 bits or 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDTRSH": + return { + "tooltip": "Load Register Signed Halfword (unprivileged) loads a halfword from memory, sign-extends it to 32 bits or 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.", + "html": "

Load Register Signed Halfword (unprivileged) loads a halfword from memory, sign-extends it to 32 bits or 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDTRSW": + return { + "tooltip": "Load Register Signed Word (unprivileged) loads a word from memory, sign-extends it to 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.", + "html": "

Load Register Signed Word (unprivileged) loads a word from memory, sign-extends it to 64 bits, and writes the result to a register. The address that is used for the load is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDUMAX": + case "LDUMAXA": + case "LDUMAXAL": + case "LDUMAXL": + return { + "tooltip": "Atomic unsigned maximum on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic unsigned maximum on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDUMAXAB": + case "LDUMAXALB": + case "LDUMAXB": + case "LDUMAXLB": + return { + "tooltip": "Atomic unsigned maximum on byte in memory atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic unsigned maximum on byte in memory atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDUMAXAH": + case "LDUMAXALH": + case "LDUMAXH": + case "LDUMAXLH": + return { + "tooltip": "Atomic unsigned maximum on halfword in memory atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic unsigned maximum on halfword in memory atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDUMIN": + case "LDUMINA": + case "LDUMINAL": + case "LDUMINL": + return { + "tooltip": "Atomic unsigned minimum on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic unsigned minimum on word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDUMINAB": + case "LDUMINALB": + case "LDUMINB": + case "LDUMINLB": + return { + "tooltip": "Atomic unsigned minimum on byte in memory atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic unsigned minimum on byte in memory atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDUMINAH": + case "LDUMINALH": + case "LDUMINH": + case "LDUMINLH": + return { + "tooltip": "Atomic unsigned minimum on halfword in memory atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.", + "html": "

Atomic unsigned minimum on halfword in memory atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDUR": + return { + "tooltip": "Load SIMD&FP Register (unscaled offset). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from a base register value and an optional immediate offset.", + "html": "

Load SIMD&FP Register (unscaled offset). This instruction loads a SIMD&FP register from memory. The address that is used for the load is calculated from a base register value and an optional immediate offset.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDUR": + return { + "tooltip": "Load Register (unscaled) calculates an address from a base register and an immediate offset, loads a 32-bit word or 64-bit doubleword from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register (unscaled) calculates an address from a base register and an immediate offset, loads a 32-bit word or 64-bit doubleword from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDURB": + return { + "tooltip": "Load Register Byte (unscaled) calculates an address from a base register and an immediate offset, loads a byte from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Byte (unscaled) calculates an address from a base register and an immediate offset, loads a byte from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDURH": + return { + "tooltip": "Load Register Halfword (unscaled) calculates an address from a base register and an immediate offset, loads a halfword from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Halfword (unscaled) calculates an address from a base register and an immediate offset, loads a halfword from memory, zero-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDURSB": + return { + "tooltip": "Load Register Signed Byte (unscaled) calculates an address from a base register and an immediate offset, loads a signed byte from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Byte (unscaled) calculates an address from a base register and an immediate offset, loads a signed byte from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDURSH": + return { + "tooltip": "Load Register Signed Halfword (unscaled) calculates an address from a base register and an immediate offset, loads a signed halfword from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Halfword (unscaled) calculates an address from a base register and an immediate offset, loads a signed halfword from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDURSW": + return { + "tooltip": "Load Register Signed Word (unscaled) calculates an address from a base register and an immediate offset, loads a signed word from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Register Signed Word (unscaled) calculates an address from a base register and an immediate offset, loads a signed word from memory, sign-extends it, and writes it to a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDXP": + return { + "tooltip": "Load Exclusive Pair of Registers derives an address from a base register value, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Exclusive Pair of Registers derives an address from a base register value, loads two 32-bit words or two 64-bit doublewords from memory, and writes them to two registers. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDXR": + return { + "tooltip": "Load Exclusive Register derives an address from a base register value, loads a 32-bit word or a 64-bit doubleword from memory, and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Exclusive Register derives an address from a base register value, loads a 32-bit word or a 64-bit doubleword from memory, and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDXRB": + return { + "tooltip": "Load Exclusive Register Byte derives an address from a base register value, loads a byte from memory, zero-extends it and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Exclusive Register Byte derives an address from a base register value, loads a byte from memory, zero-extends it and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LDXRH": + return { + "tooltip": "Load Exclusive Register Halfword derives an address from a base register value, loads a halfword from memory, zero-extends it and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Load Exclusive Register Halfword derives an address from a base register value, loads a halfword from memory, zero-extends it and writes it to a register. The memory access is atomic. The PE marks the physical address being accessed as an exclusive access. This exclusive access mark is checked by Store Exclusive instructions. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSL": + return { + "tooltip": "Logical Shift Left (register) shifts a register value left by a variable number of bits, shifting in zeros, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is left-shifted.", + "html": "

Logical Shift Left (register) shifts a register value left by a variable number of bits, shifting in zeros, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is left-shifted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSL": + return { + "tooltip": "Logical Shift Left (immediate) shifts a register value left by an immediate number of bits, shifting in zeros, and writes the result to the destination register.", + "html": "

Logical Shift Left (immediate) shifts a register value left by an immediate number of bits, shifting in zeros, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSL": + return { + "tooltip": "Shift left by immediate each active element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift left by immediate each active element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSL": + return { + "tooltip": "Shift left active elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift left active elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSL": + return { + "tooltip": "Shift left active elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift left active elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSL": + return { + "tooltip": "Shift left by immediate each element of the source vector, and place the results in the corresponding elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.", + "html": "

Shift left by immediate each element of the source vector, and place the results in the corresponding elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSL": + return { + "tooltip": "Shift left all elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and place the first in the corresponding elements of the destination vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift left all elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and place the first in the corresponding elements of the destination vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSLR": + return { + "tooltip": "Reversed shift left active elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reversed shift left active elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSLV": + return { + "tooltip": "Logical Shift Left Variable shifts a register value left by a variable number of bits, shifting in zeros, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is left-shifted.", + "html": "

Logical Shift Left Variable shifts a register value left by a variable number of bits, shifting in zeros, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is left-shifted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSR": + return { + "tooltip": "Logical Shift Right (register) shifts a register value right by a variable number of bits, shifting in zeros, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.", + "html": "

Logical Shift Right (register) shifts a register value right by a variable number of bits, shifting in zeros, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSR": + return { + "tooltip": "Logical Shift Right (immediate) shifts a register value right by an immediate number of bits, shifting in zeros, and writes the result to the destination register.", + "html": "

Logical Shift Right (immediate) shifts a register value right by an immediate number of bits, shifting in zeros, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSR": + return { + "tooltip": "Shift right by immediate, inserting zeroes, each active element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right by immediate, inserting zeroes, each active element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSR": + return { + "tooltip": "Shift right, inserting zeroes, active elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right, inserting zeroes, active elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSR": + return { + "tooltip": "Shift right, inserting zeroes, active elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right, inserting zeroes, active elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSR": + return { + "tooltip": "Shift right by immediate, inserting zeroes, each element of the source vector, and place the results in the corresponding elements of the destination vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift right by immediate, inserting zeroes, each element of the source vector, and place the results in the corresponding elements of the destination vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSR": + return { + "tooltip": "Shift right, inserting zeroes, all elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and place the first in the corresponding elements of the destination vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. This instruction is unpredicated.", + "html": "

Shift right, inserting zeroes, all elements of the first source vector by corresponding overlapping 64-bit elements of the second source vector and place the first in the corresponding elements of the destination vector. The shift amount is a vector of unsigned 64-bit doubleword elements in which all bits are significant, and not used modulo the destination element size. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSRR": + return { + "tooltip": "Reversed shift right, inserting zeroes, active elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reversed shift right, inserting zeroes, active elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. The shift amount operand is a vector of unsigned elements in which all bits are significant, and not used modulo the element size. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LSRV": + return { + "tooltip": "Logical Shift Right Variable shifts a register value right by a variable number of bits, shifting in zeros, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.", + "html": "

Logical Shift Right Variable shifts a register value right by a variable number of bits, shifting in zeros, and writes the result to the destination register. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LUTI2": + return { + "tooltip": "Copy 8-bit, 16-bit or 32-bit elements from ZT0 to two destination vectors using packed 2-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.", + "html": "

Copy 8-bit, 16-bit or 32-bit elements from ZT0 to two destination vectors using packed 2-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LUTI2": + return { + "tooltip": "Copy 8-bit, 16-bit or 32-bit elements from ZT0 to four destination vectors using packed 2-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.", + "html": "

Copy 8-bit, 16-bit or 32-bit elements from ZT0 to four destination vectors using packed 2-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LUTI2": + return { + "tooltip": "Copy 8-bit, 16-bit or 32-bit elements from ZT0 to one destination vector using packed 2-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.", + "html": "

Copy 8-bit, 16-bit or 32-bit elements from ZT0 to one destination vector using packed 2-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LUTI4": + return { + "tooltip": "Copy 8-bit, 16-bit or 32-bit elements from ZT0 to two destination vectors using packed 4-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.", + "html": "

Copy 8-bit, 16-bit or 32-bit elements from ZT0 to two destination vectors using packed 4-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LUTI4": + return { + "tooltip": "Copy 16-bit or 32-bit elements from ZT0 to four destination vectors using packed 4-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.", + "html": "

Copy 16-bit or 32-bit elements from ZT0 to four destination vectors using packed 4-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "LUTI4": + return { + "tooltip": "Copy 8-bit, 16-bit or 32-bit elements from ZT0 to one destination vector using packed 4-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.", + "html": "

Copy 8-bit, 16-bit or 32-bit elements from ZT0 to one destination vector using packed 4-bit indices from a segment of the source vector register. A segment corresponds to a portion of the source vector that is consumed in order to fill the destination vector. The segment is selected by the vector segment index modulo the total number of segments.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MAD": + return { + "tooltip": "Multiply the corresponding active elements of the first and second source vectors and add to elements of the third (addend) vector. Destructively place the results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active elements of the first and second source vectors and add to elements of the third (addend) vector. Destructively place the results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MADD": + return { + "tooltip": "Multiply-Add multiplies two register values, adds a third register value, and writes the result to the destination register.", + "html": "

Multiply-Add multiplies two register values, adds a third register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MATCH": + return { + "tooltip": "This instruction compares each active 8-bit or 16-bit character in the first source vector with all of the characters in the corresponding 128-bit segment of the second source vector. Where the first source element detects any matching characters in the second segment it places true in the corresponding element of the destination predicate, otherwise false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

This instruction compares each active 8-bit or 16-bit character in the first source vector with all of the characters in the corresponding 128-bit segment of the second source vector. Where the first source element detects any matching characters in the second segment it places true in the corresponding element of the destination predicate, otherwise false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MLA": + return { + "tooltip": "Multiply-Add to accumulator (vector, by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values.", + "html": "

Multiply-Add to accumulator (vector, by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MLA": + return { + "tooltip": "Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.", + "html": "

Multiply-Add to accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MLA": + return { + "tooltip": "Multiply the corresponding active elements of the first and second source vectors and add to elements of the third source (addend) vector. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active elements of the first and second source vectors and add to elements of the third source (addend) vector. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MLA": + return { + "tooltip": "Multiply all integer elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively added to the corresponding elements of the addend and destination vector.", + "html": "

Multiply all integer elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively added to the corresponding elements of the addend and destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MLS": + return { + "tooltip": "Multiply-Subtract from accumulator (vector, by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and subtracts the results from the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values.", + "html": "

Multiply-Subtract from accumulator (vector, by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, and subtracts the results from the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MLS": + return { + "tooltip": "Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.", + "html": "

Multiply-Subtract from accumulator (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MLS": + return { + "tooltip": "Multiply the corresponding active elements of the first and second source vectors and subtract from elements of the third source (addend) vector. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active elements of the first and second source vectors and subtract from elements of the third source (addend) vector. Destructively place the results in the destination and third source (addend) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MLS": + return { + "tooltip": "Multiply all integer elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively subtracted from the corresponding elements of the addend and destination vector.", + "html": "

Multiply all integer elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The products are then destructively subtracted from the corresponding elements of the addend and destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MNEG": + return { + "tooltip": "Multiply-Negate multiplies two register values, negates the product, and writes the result to the destination register.", + "html": "

Multiply-Negate multiplies two register values, negates the product, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Read active elements from the source predicate and place in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Read active elements from the source predicate and place in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move a signed integer immediate to each active element in the destination vector. Inactive elements in the destination vector register are set to zero.", + "html": "

Move a signed integer immediate to each active element in the destination vector. Inactive elements in the destination vector register are set to zero.

The immediate operand is a signed value in the range -128 to +127, and for element widths of 16 bits or higher it may also be a signed multiple of 256 in the range -32768 to +32512 (excluding 0).

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<simm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move a signed integer immediate to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Move a signed integer immediate to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

The immediate operand is a signed value in the range -128 to +127, and for element widths of 16 bits or higher it may also be a signed multiple of 256 in the range -32768 to +32512 (excluding 0).

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<simm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move the general-purpose scalar source register to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Move the general-purpose scalar source register to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move the SIMD & floating-point scalar source register to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Move the SIMD & floating-point scalar source register to each active element in the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move vector element to scalar. This instruction duplicates the specified vector element in the SIMD&FP source register into a scalar, and writes the result to the SIMD&FP destination register.", + "html": "

Move vector element to scalar. This instruction duplicates the specified vector element in the SIMD&FP source register into a scalar, and writes the result to the SIMD&FP destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Unconditionally broadcast the signed integer immediate into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the signed integer immediate into each element of the destination vector. This instruction is unpredicated.

The immediate operand is a signed value in the range -128 to +127, and for element widths of 16 bits or higher it may also be a signed multiple of 256 in the range -32768 to +32512 (excluding 0).

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<simm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Unconditionally broadcast the general-purpose scalar source register into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the general-purpose scalar source register into each element of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Unconditionally broadcast the SIMD&FP scalar into each element of the destination vector. This instruction is unpredicated.", + "html": "

Unconditionally broadcast the SIMD&FP scalar into each element of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Unconditionally broadcast the logical bitmask immediate into each element of the destination vector. This instruction is unpredicated. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits.", + "html": "

Unconditionally broadcast the logical bitmask immediate into each element of the destination vector. This instruction is unpredicated. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move vector element to another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.", + "html": "

Move vector element to another vector element. This instruction copies the vector element of the source SIMD&FP register to the specified vector element of the destination SIMD&FP register.

This instruction can insert data into individual elements within a SIMD&FP register without clearing the remaining bits to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move general-purpose register to a vector element. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.", + "html": "

Move general-purpose register to a vector element. This instruction copies the contents of the source general-purpose register to the specified vector element in the destination SIMD&FP register.

This instruction can insert data into individual elements within a SIMD&FP register without clearing the remaining bits to zero.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size.", + "html": "

The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 2 in the range 0 to the number of elements in a 128-bit vector segment minus 2.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size.", + "html": "

The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 4 in the range 0 to the number of elements in a 128-bit vector segment minus 4.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on two ZA single-vector groups. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.", + "html": "

The instruction operates on two ZA single-vector groups. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the instruction operates on two ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on four ZA single-vector groups. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.", + "html": "

The instruction operates on four ZA single-vector groups. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the instruction operates on four ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on individual horizontal or vertical slices within a named ZA tile of the specified element size. The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.", + "html": "

The instruction operates on individual horizontal or vertical slices within a named ZA tile of the specified element size. The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.

Inactive elements in the destination vector remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size.", + "html": "

The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 2 in the range 0 to the number of elements in a 128-bit vector segment minus 2.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size.", + "html": "

The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 4 in the range 0 to the number of elements in a 128-bit vector segment minus 4.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on two ZA single-vector groups. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.", + "html": "

The instruction operates on two ZA single-vector groups. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the instruction operates on two ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on four ZA single-vector groups. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.", + "html": "

The instruction operates on four ZA single-vector groups. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the instruction operates on four ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "The instruction operates on individual horizontal or vertical slices within a named ZA tile of the specified element size. The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.", + "html": "

The instruction operates on individual horizontal or vertical slices within a named ZA tile of the specified element size. The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.

Inactive elements in the destination slice remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move (inverted wide immediate) moves an inverted 16-bit immediate value to a register.", + "html": "

Move (inverted wide immediate) moves an inverted 16-bit immediate value to a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move (wide immediate) moves a 16-bit immediate value to a register.", + "html": "

Move (wide immediate) moves a 16-bit immediate value to a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move vector. This instruction copies the vector in the source SIMD&FP register into the destination SIMD&FP register.", + "html": "

Move vector. This instruction copies the vector in the source SIMD&FP register into the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move (bitmask immediate) writes a bitmask immediate value to a register.", + "html": "

Move (bitmask immediate) writes a bitmask immediate value to a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move (register) copies the value in a source register to the destination register.", + "html": "

Move (register) copies the value in a source register to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Read all elements from the source predicate and place in the destination predicate. This instruction is unpredicated. Does not set the condition flags.", + "html": "

Read all elements from the source predicate and place in the destination predicate. This instruction is unpredicated. Does not set the condition flags.

For programmer convenience, an assembler must also accept predicate-as-counter register names for the source and destination predicate registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move vector register. This instruction is unpredicated.", + "html": "

Move vector register. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Read active elements from the source predicate and place in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register remain unmodified. Does not set the condition flags.", + "html": "

Read active elements from the source predicate and place in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register remain unmodified. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move elements from the source vector to the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Move elements from the source vector to the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOV": + return { + "tooltip": "Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.", + "html": "

Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size.", + "html": "

The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 2 in the range 0 to the number of elements in a 128-bit vector segment minus 2.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size.", + "html": "

The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 4 in the range 0 to the number of elements in a 128-bit vector segment minus 4.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on two ZA single-vector groups. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.", + "html": "

The instruction operates on two ZA single-vector groups. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the instruction operates on two ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on four ZA single-vector groups. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.", + "html": "

The instruction operates on four ZA single-vector groups. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the instruction operates on four ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on individual horizontal or vertical slices within a named ZA tile of the specified element size. The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.", + "html": "

The instruction operates on individual horizontal or vertical slices within a named ZA tile of the specified element size. The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.

Inactive elements in the destination vector remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size.", + "html": "

The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 2 in the range 0 to the number of elements in a 128-bit vector segment minus 2.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size.", + "html": "

The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 4 in the range 0 to the number of elements in a 128-bit vector segment minus 4.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on two ZA single-vector groups. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.", + "html": "

The instruction operates on two ZA single-vector groups. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the instruction operates on two ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on four ZA single-vector groups. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.", + "html": "

The instruction operates on four ZA single-vector groups. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the instruction operates on four ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVA": + return { + "tooltip": "The instruction operates on individual horizontal or vertical slices within a named ZA tile of the specified element size. The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.", + "html": "

The instruction operates on individual horizontal or vertical slices within a named ZA tile of the specified element size. The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.

Inactive elements in the destination slice remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVAZ": + return { + "tooltip": "The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size. The tile slices are zeroed after moving their contents to the destination vectors.", + "html": "

The instruction operates on two consecutive horizontal or vertical slices within a named ZA tile of the specified element size. The tile slices are zeroed after moving their contents to the destination vectors.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 2 in the range 0 to the number of elements in a 128-bit vector segment minus 2.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVAZ": + return { + "tooltip": "The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size. The tile slices are zeroed after moving their contents to the destination vectors.", + "html": "

The instruction operates on four consecutive horizontal or vertical slices within a named ZA tile of the specified element size. The tile slices are zeroed after moving their contents to the destination vectors.

The consecutive slice numbers within the tile are selected starting from the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is a multiple of 4 in the range 0 to the number of elements in a 128-bit vector segment minus 4.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVAZ": + return { + "tooltip": "The instruction operates on two ZA single-vector groups. The ZA single-vector groups are zeroed after moving their contents to the destination vectors. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.", + "html": "

The instruction operates on two ZA single-vector groups. The ZA single-vector groups are zeroed after moving their contents to the destination vectors. The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the instruction operates on two ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVAZ": + return { + "tooltip": "The instruction operates on four ZA single-vector groups. The ZA single-vector groups are zeroed after moving their contents to the destination vectors. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.", + "html": "

The instruction operates on four ZA single-vector groups. The ZA single-vector groups are zeroed after moving their contents to the destination vectors. The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the instruction operates on four ZA single-vector groups.

The preferred disassembly syntax uses a 64-bit element size, but an assembler should accept any element size if it is used consistently for all operands. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVAZ": + return { + "tooltip": "The instruction operates on a horizontal or vertical slice within a named ZA tile of the specified element size. The tile slice is zeroed after moving its contents to the destination vector.", + "html": "

The instruction operates on a horizontal or vertical slice within a named ZA tile of the specified element size. The tile slice is zeroed after moving its contents to the destination vector.

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of such elements in a vector. The immediate offset is in the range 0 to the number of elements in a 128-bit vector segment minus 1.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVI": + return { + "tooltip": "Move Immediate (vector). This instruction places an immediate constant into every vector element of the destination SIMD&FP register.", + "html": "

Move Immediate (vector). This instruction places an immediate constant into every vector element of the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVK": + return { + "tooltip": "Move wide with keep moves an optionally-shifted 16-bit immediate value into a register, keeping other bits unchanged.", + "html": "

Move wide with keep moves an optionally-shifted 16-bit immediate value into a register, keeping other bits unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVN": + return { + "tooltip": "Move wide with NOT moves the inverse of an optionally-shifted 16-bit immediate value to a register.", + "html": "

Move wide with NOT moves the inverse of an optionally-shifted 16-bit immediate value to a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVPRFX": + return { + "tooltip": "The predicated MOVPRFX instruction is a hint to hardware that the instruction may be combined with the destructive instruction which follows it in program order to create a single constructive operation. Since it is a hint it is also permitted to be implemented as a discrete vector copy, and the result of executing the pair of instructions with or without combining is identical. The choice of combined versus discrete operation may vary dynamically.", + "html": "

The predicated MOVPRFX instruction is a hint to hardware that the instruction may be combined with the destructive instruction which follows it in program order to create a single constructive operation. Since it is a hint it is also permitted to be implemented as a discrete vector copy, and the result of executing the pair of instructions with or without combining is identical. The choice of combined versus discrete operation may vary dynamically.

Unless the combination of a constructive operation with merging predication is specifically required, it is strongly recommended that for performance reasons software should prefer to use the zeroing form of predicated MOVPRFX or the unpredicated MOVPRFX instruction.

Although the operation of the instruction is defined as a simple predicated vector copy, it is required that the prefixed instruction at PC+4 must be an SVE destructive binary or ternary instruction encoding, or a unary operation with merging predication, but excluding other MOVPRFX instructions. The prefixed instruction must specify the same predicate register, and have the same maximum element size (ignoring a fixed 64-bit \"wide vector\" operand), and the same destination vector as the MOVPRFX instruction. The prefixed instruction must not use the destination register in any other operand position, even if they have different names but refer to the same architectural register state. Any other use is UNPREDICTABLE.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVPRFX": + return { + "tooltip": "The unpredicated MOVPRFX instruction is a hint to hardware that the instruction may be combined with the destructive instruction which follows it in program order to create a single constructive operation. Since it is a hint it is also permitted to be implemented as a discrete vector copy, and the result of executing the pair of instructions with or without combining is identical. The choice of combined versus discrete operation may vary dynamically.", + "html": "

The unpredicated MOVPRFX instruction is a hint to hardware that the instruction may be combined with the destructive instruction which follows it in program order to create a single constructive operation. Since it is a hint it is also permitted to be implemented as a discrete vector copy, and the result of executing the pair of instructions with or without combining is identical. The choice of combined versus discrete operation may vary dynamically.

Although the operation of the instruction is defined as a simple unpredicated vector copy, it is required that the prefixed instruction at PC+4 must be an SVE destructive binary or ternary instruction encoding, or a unary operation with merging predication, but excluding other MOVPRFX instructions. The prefixed instruction must specify the same destination vector as the MOVPRFX instruction. The prefixed instruction must not use the destination register in any other operand position, even if they have different names but refer to the same architectural register state. Any other use is UNPREDICTABLE.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVS": + return { + "tooltip": "Read active elements from the source predicate and place in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Read active elements from the source predicate and place in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVS": + return { + "tooltip": "Read all elements from the source predicate and place in the destination predicate. This instruction is unpredicated. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Read all elements from the source predicate and place in the destination predicate. This instruction is unpredicated. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVT": + return { + "tooltip": "Move 8 bytes to a general-purpose register from the ZT0 register at the byte offset specified by the immediate index. This instruction is UNDEFINED in Non-debug state.", + "html": "

Move 8 bytes to a general-purpose register from the ZT0 register at the byte offset specified by the immediate index. This instruction is UNDEFINED in Non-debug state.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVT": + return { + "tooltip": "Move 8 bytes to the ZT0 register at the byte offset specified by the immediate index from a general-purpose register. This instruction is UNDEFINED in Non-debug state.", + "html": "

Move 8 bytes to the ZT0 register at the byte offset specified by the immediate index from a general-purpose register. This instruction is UNDEFINED in Non-debug state.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MOVZ": + return { + "tooltip": "Move wide with zero moves an optionally-shifted 16-bit immediate value to a register.", + "html": "

Move wide with zero moves an optionally-shifted 16-bit immediate value to a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MRRS": + return { + "tooltip": "Move System Register to two adjacent general-purpose registers allows the PE to read an AArch64 128-bit System register into two adjacent 64-bit general-purpose registers.", + "html": "

Move System Register to two adjacent general-purpose registers allows the PE to read an AArch64 128-bit System register into two adjacent 64-bit general-purpose registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MRS": + return { + "tooltip": "Move System Register to general-purpose register allows the PE to read an AArch64 System register into a general-purpose register.", + "html": "

Move System Register to general-purpose register allows the PE to read an AArch64 System register into a general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MSB": + return { + "tooltip": "Multiply the corresponding active elements of the first and second source vectors and subtract from elements of the third (addend) vector. Destructively place the results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply the corresponding active elements of the first and second source vectors and subtract from elements of the third (addend) vector. Destructively place the results in the destination and first source (multiplicand) vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MSR": + return { + "tooltip": "Move immediate value to Special Register moves an immediate value to selected bits of the PSTATE. For more information, see Process state, PSTATE.", + "html": "

Move immediate value to Special Register moves an immediate value to selected bits of the PSTATE. For more information, see Process state, PSTATE.

The bits that can be written by this instruction are:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MSR": + return { + "tooltip": "Move general-purpose register to System Register allows the PE to write an AArch64 System register from a general-purpose register.", + "html": "

Move general-purpose register to System Register allows the PE to write an AArch64 System register from a general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MSRR": + return { + "tooltip": "Move two adjacent general-purpose registers to System Register allows the PE to write an AArch64 128-bit System register from two adjacent 64-bit general-purpose registers.", + "html": "

Move two adjacent general-purpose registers to System Register allows the PE to write an AArch64 128-bit System register from two adjacent 64-bit general-purpose registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MSUB": + return { + "tooltip": "Multiply-Subtract multiplies two register values, subtracts the product from a third register value, and writes the result to the destination register.", + "html": "

Multiply-Subtract multiplies two register values, subtracts the product from a third register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MUL": + return { + "tooltip": "Multiply (vector, by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.", + "html": "

Multiply (vector, by element). This instruction multiplies the vector elements in the first source SIMD&FP register by the specified value in the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MUL": + return { + "tooltip": "Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Multiply (vector). This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MUL": + return { + "tooltip": "Multiply active elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Multiply active elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MUL": + return { + "tooltip": "Multiply by an immediate each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a signed 8-bit value in the range -128 to +127, inclusive. This instruction is unpredicated.", + "html": "

Multiply by an immediate each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a signed 8-bit value in the range -128 to +127, inclusive. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MUL": + return { + "tooltip": "Multiply all elements of the first source vector by corresponding elements of the second source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Multiply all elements of the first source vector by corresponding elements of the second source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MUL": + return { + "tooltip": "Multiply all integer elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The results are placed in the corresponding elements of the destination vector.", + "html": "

Multiply all integer elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment. The results are placed in the corresponding elements of the destination vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MVN": + return { + "tooltip": "Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MVN": + return { + "tooltip": "Bitwise NOT writes the bitwise inverse of a register value to the destination register.", + "html": "

Bitwise NOT writes the bitwise inverse of a register value to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "MVNI": + return { + "tooltip": "Move inverted Immediate (vector). This instruction places the inverse of an immediate constant into every vector element of the destination SIMD&FP register.", + "html": "

Move inverted Immediate (vector). This instruction places the inverse of an immediate constant into every vector element of the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NAND": + return { + "tooltip": "Bitwise NAND active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Bitwise NAND active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NANDS": + return { + "tooltip": "Bitwise NAND active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Bitwise NAND active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NBSL": + return { + "tooltip": "Selects bits from the first source vector where the corresponding bit in the third source vector is '1', and from the second source vector where the corresponding bit in the third source vector is '0'. The inverted result is placed destructively in the destination and first source vector. This instruction is unpredicated.", + "html": "

Selects bits from the first source vector where the corresponding bit in the third source vector is '1', and from the second source vector where the corresponding bit in the third source vector is '0'. The inverted result is placed destructively in the destination and first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NEG": + return { + "tooltip": "Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Negate (vector). This instruction reads each vector element from the source SIMD&FP register, negates each value, puts the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NEG": + return { + "tooltip": "Negate (shifted register) negates an optionally-shifted register value, and writes the result to the destination register.", + "html": "

Negate (shifted register) negates an optionally-shifted register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NEG": + return { + "tooltip": "Negate the signed integer value in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Negate the signed integer value in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NEGS": + return { + "tooltip": "Negate, setting flags, negates an optionally-shifted register value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Negate, setting flags, negates an optionally-shifted register value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NGC": + return { + "tooltip": "Negate with Carry negates the sum of a register value and the value of NOT (Carry flag), and writes the result to the destination register.", + "html": "

Negate with Carry negates the sum of a register value and the value of NOT (Carry flag), and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NGCS": + return { + "tooltip": "Negate with Carry, setting flags, negates the sum of a register value and the value of NOT (Carry flag), and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Negate with Carry, setting flags, negates the sum of a register value and the value of NOT (Carry flag), and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NMATCH": + return { + "tooltip": "This instruction compares each active 8-bit or 16-bit character in the first source vector with all of the characters in the corresponding 128-bit segment of the second source vector. Where the first source element detects no matching characters in the second segment it places true in the corresponding element of the destination predicate, otherwise false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

This instruction compares each active 8-bit or 16-bit character in the first source vector with all of the characters in the corresponding 128-bit segment of the second source vector. Where the first source element detects no matching characters in the second segment it places true in the corresponding element of the destination predicate, otherwise false. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NOP": + return { + "tooltip": "No Operation does nothing, other than advance the value of the program counter by 4. This instruction can be used for instruction alignment purposes.", + "html": "

No Operation does nothing, other than advance the value of the program counter by 4. This instruction can be used for instruction alignment purposes.

The timing effects of including a NOP instruction in a program are not guaranteed. It can increase execution time, leave it unchanged, or even reduce it. Therefore, NOP instructions are not suitable for timing loops.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NOR": + return { + "tooltip": "Bitwise NOR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Bitwise NOR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NORS": + return { + "tooltip": "Bitwise NOR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Bitwise NOR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NOT": + return { + "tooltip": "Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Bitwise NOT (vector). This instruction reads each vector element from the source SIMD&FP register, places the inverse of each value into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NOT": + return { + "tooltip": "Bitwise invert each active element of the source predicate, and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Bitwise invert each active element of the source predicate, and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NOT": + return { + "tooltip": "Bitwise invert each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Bitwise invert each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "NOTS": + return { + "tooltip": "Bitwise invert each active element of the source predicate, and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Bitwise invert each active element of the source predicate, and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORN": + return { + "tooltip": "Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.", + "html": "

Bitwise inclusive OR NOT (vector). This instruction performs a bitwise OR NOT between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORN": + return { + "tooltip": "Bitwise OR NOT (shifted register) performs a bitwise (inclusive) OR of a register value and the complement of an optionally-shifted register value, and writes the result to the destination register.", + "html": "

Bitwise OR NOT (shifted register) performs a bitwise (inclusive) OR of a register value and the complement of an optionally-shifted register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORN": + return { + "tooltip": "Bitwise inclusive OR an inverted immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.", + "html": "

Bitwise inclusive OR an inverted immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORN": + return { + "tooltip": "Bitwise inclusive OR inverted active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Bitwise inclusive OR inverted active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORNS": + return { + "tooltip": "Bitwise inclusive OR inverted active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Bitwise inclusive OR inverted active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORQV": + return { + "tooltip": "Bitwise inclusive OR of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as all zeros.", + "html": "

Bitwise inclusive OR of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as all zeros.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORR": + return { + "tooltip": "Bitwise inclusive OR (vector, immediate). This instruction reads each vector element from the destination SIMD&FP register, performs a bitwise OR between each result and an immediate constant, places the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Bitwise inclusive OR (vector, immediate). This instruction reads each vector element from the destination SIMD&FP register, performs a bitwise OR between each result and an immediate constant, places the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORR": + return { + "tooltip": "Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.", + "html": "

Bitwise inclusive OR (vector, register). This instruction performs a bitwise OR between the two source SIMD&FP registers, and writes the result to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORR": + return { + "tooltip": "Bitwise OR (immediate) performs a bitwise (inclusive) OR of a register value and an immediate register value, and writes the result to the destination register.", + "html": "

Bitwise OR (immediate) performs a bitwise (inclusive) OR of a register value and an immediate register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORR": + return { + "tooltip": "Bitwise OR (shifted register) performs a bitwise (inclusive) OR of a register value and an optionally-shifted register value, and writes the result to the destination register.", + "html": "

Bitwise OR (shifted register) performs a bitwise (inclusive) OR of a register value and an optionally-shifted register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORR": + return { + "tooltip": "Bitwise inclusive OR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Bitwise inclusive OR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORR": + return { + "tooltip": "Bitwise inclusive OR active elements of the second source vector with corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Bitwise inclusive OR active elements of the second source vector with corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORR": + return { + "tooltip": "Bitwise inclusive OR an immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.", + "html": "

Bitwise inclusive OR an immediate with each 64-bit element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a 64-bit value consisting of a single run of ones or zeros repeating every 2, 4, 8, 16, 32 or 64 bits. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORR": + return { + "tooltip": "Bitwise inclusive OR all elements of the second source vector with corresponding elements of the first source vector and place the first in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Bitwise inclusive OR all elements of the second source vector with corresponding elements of the first source vector and place the first in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORRS": + return { + "tooltip": "Bitwise inclusive OR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Bitwise inclusive OR active elements of the second source predicate with corresponding elements of the first source predicate and place the results in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ORV": + return { + "tooltip": "Bitwise inclusive OR horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as zero.", + "html": "

Bitwise inclusive OR horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PACDA": + case "PACDZA": + return { + "tooltip": "Pointer Authentication Code for Data address, using key A. This instruction computes and inserts a pointer authentication code for a data address, using a modifier and key A.", + "html": "

Pointer Authentication Code for Data address, using key A. This instruction computes and inserts a pointer authentication code for a data address, using a modifier and key A.

The address is in the general-purpose register that is specified by <Xd>.

The modifier is:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PACDB": + case "PACDZB": + return { + "tooltip": "Pointer Authentication Code for Data address, using key B. This instruction computes and inserts a pointer authentication code for a data address, using a modifier and key B.", + "html": "

Pointer Authentication Code for Data address, using key B. This instruction computes and inserts a pointer authentication code for a data address, using a modifier and key B.

The address is in the general-purpose register that is specified by <Xd>.

The modifier is:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PACGA": + return { + "tooltip": "Pointer Authentication Code, using Generic key. This instruction computes the pointer authentication code for a 64-bit value in the first source register, using a modifier in the second source register, and the Generic key. The computed pointer authentication code is written to the most significant 32 bits of the destination register, and the least significant 32 bits of the destination register are set to zero.", + "html": "

Pointer Authentication Code, using Generic key. This instruction computes the pointer authentication code for a 64-bit value in the first source register, using a modifier in the second source register, and the Generic key. The computed pointer authentication code is written to the most significant 32 bits of the destination register, and the least significant 32 bits of the destination register are set to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PACIA": + case "PACIA1716": + case "PACIASP": + case "PACIAZ": + case "PACIZA": + return { + "tooltip": "Pointer Authentication Code for Instruction address, using key A. This instruction computes and inserts a pointer authentication code for an instruction address, using a modifier and key A.", + "html": "

Pointer Authentication Code for Instruction address, using key A. This instruction computes and inserts a pointer authentication code for an instruction address, using a modifier and key A.

The address is:

The modifier is:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PACIB": + case "PACIB1716": + case "PACIBSP": + case "PACIBZ": + case "PACIZB": + return { + "tooltip": "Pointer Authentication Code for Instruction address, using key B. This instruction computes and inserts a pointer authentication code for an instruction address, using a modifier and key B.", + "html": "

Pointer Authentication Code for Instruction address, using key B. This instruction computes and inserts a pointer authentication code for an instruction address, using a modifier and key B.

The address is:

The modifier is:

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PEXT": + return { + "tooltip": "Expands the source predicate-as-counter into a four-predicate wide mask and copies one quarter of it into the destination predicate register.", + "html": "

Expands the source predicate-as-counter into a four-predicate wide mask and copies one quarter of it into the destination predicate register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PEXT": + return { + "tooltip": "Expands the source predicate-as-counter into a four-predicate wide mask and copies two quarters of it into the destination predicate registers.", + "html": "

Expands the source predicate-as-counter into a four-predicate wide mask and copies two quarters of it into the destination predicate registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PFALSE": + return { + "tooltip": "Set all elements in the destination predicate to false.", + "html": "

Set all elements in the destination predicate to false.

For programmer convenience, an assembler must also accept predicate-as-counter register name for the destination predicate register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PFIRST": + return { + "tooltip": "Sets the first active element in the destination predicate to true, otherwise elements from the source predicate are passed through unchanged. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Sets the first active element in the destination predicate to true, otherwise elements from the source predicate are passed through unchanged. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PMOV": + return { + "tooltip": "Copy a packed bitmap, where bit value 0b1 represents TRUE and bit value 0b0 represents FALSE, from part of a source vector register to elements of a destination SVE predicate register.", + "html": "

Copy a packed bitmap, where bit value 0b1 represents TRUE and bit value 0b0 represents FALSE, from part of a source vector register to elements of a destination SVE predicate register.

Because the number of bits in an SVE predicate element scales with the vector element size, the behavior varies according to the specified element size.

The immediate index is optional, defaulting to 0 if omitted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PMOV": + return { + "tooltip": "Copy the source SVE predicate register elements into the destination vector register as a packed bitmap with one bit per predicate element, where bit value 0b1 represents a TRUE predicate element, and bit value 0b0 represents a FALSE predicate element.", + "html": "

Copy the source SVE predicate register elements into the destination vector register as a packed bitmap with one bit per predicate element, where bit value 0b1 represents a TRUE predicate element, and bit value 0b0 represents a FALSE predicate element.

Because the number of bits in an SVE predicate element scales with the the vector element size, the behavior varies according to the specified element size.

The immediate index is optional, defaulting to 0 if omitted. When the index is zero, the instruction writes zeroes to the most significant VL-(VL/esize) bits of the destination vector register. When a non-zero index is specified, the packed bitmap is inserted into the destination vector register, and the unindexed blocks remain unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PMUL": + return { + "tooltip": "Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Polynomial Multiply. This instruction multiplies corresponding elements in the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

For information about multiplying polynomials see Polynomial arithmetic over {0, 1}.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PMUL": + return { + "tooltip": "Polynomial multiply over [0, 1] all elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Polynomial multiply over [0, 1] all elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PMULL": + case "PMULL2": + return { + "tooltip": "Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Polynomial Multiply Long. This instruction multiplies corresponding elements in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

For information about multiplying polynomials, see Polynomial arithmetic over {0, 1}.

The PMULL instruction extracts each source vector from the lower half of each source register. The PMULL2 instruction extracts each source vector from the upper half of each source register.

The PMULL and PMULL2 variants that operate on 64-bit source elements are defined only when FEAT_PMULL is implemented.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PMULLB": + return { + "tooltip": "Polynomial multiply over [0, 1] the corresponding even-numbered elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Polynomial multiply over [0, 1] the corresponding even-numbered elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.AES indicates whether the 128-bit element variant is implemented. The 128-bit element variant is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PMULLT": + return { + "tooltip": "Polynomial multiply over [0, 1] the corresponding odd-numbered elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Polynomial multiply over [0, 1] the corresponding odd-numbered elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.AES indicates whether the 128-bit element variant is implemented. The 128-bit element variant is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PNEXT": + return { + "tooltip": "An instruction used to construct a loop which iterates over all true elements in the vector select predicate register. If all elements in the first source predicate register are false it determines the first true element in the vector select predicate register, otherwise it determines the next true element in the vector select predicate register that follows the last true element in the first source predicate register. All elements of the destination predicate register are set to false, except the element corresponding to the determined vector select element, if any, which is set to true. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

An instruction used to construct a loop which iterates over all true elements in the vector select predicate register. If all elements in the first source predicate register are false it determines the first true element in the vector select predicate register, otherwise it determines the next true element in the vector select predicate register that follows the last true element in the first source predicate register. All elements of the destination predicate register are set to false, except the element corresponding to the determined vector select element, if any, which is set to true. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFB": + return { + "tooltip": "Gather prefetch of bytes from the active memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive addresses are not prefetched from memory.", + "html": "

Gather prefetch of bytes from the active memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive addresses are not prefetched from memory.

The <prfop> symbol specifies the prefetch hint as a combination of three options: access type PLD for load or PST for store; target cache level L1, L2 or L3; temporality (KEEP for temporal or STRM for non-temporal).

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFB": + return { + "tooltip": "Contiguous prefetch of byte elements from the memory address generated by a 64-bit scalar base and immediate index in the range -32 to 31 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous prefetch of byte elements from the memory address generated by a 64-bit scalar base and immediate index in the range -32 to 31 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

The predicate may be used to suppress prefetches from unwanted addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFB": + return { + "tooltip": "Contiguous prefetch of byte elements from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element prefetch the index value is incremented, but the index register is not updated.", + "html": "

Contiguous prefetch of byte elements from the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element prefetch the index value is incremented, but the index register is not updated.

The predicate may be used to suppress prefetches from unwanted addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFB": + return { + "tooltip": "Gather prefetch of bytes from the active memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive addresses are not prefetched from memory.", + "html": "

Gather prefetch of bytes from the active memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive addresses are not prefetched from memory.

The <prfop> symbol specifies the prefetch hint as a combination of three options: access type PLD for load or PST for store; target cache level L1, L2 or L3; temporality (KEEP for temporal or STRM for non-temporal).

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFD": + return { + "tooltip": "Gather prefetch of doublewords from the active memory addresses generated by a vector base plus immediate index. The index is a multiple of 8 in the range 0 to 248. Inactive addresses are not prefetched from memory.", + "html": "

Gather prefetch of doublewords from the active memory addresses generated by a vector base plus immediate index. The index is a multiple of 8 in the range 0 to 248. Inactive addresses are not prefetched from memory.

The <prfop> symbol specifies the prefetch hint as a combination of three options: access type PLD for load or PST for store; target cache level L1, L2 or L3; temporality (KEEP for temporal or STRM for non-temporal).

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFD": + return { + "tooltip": "Contiguous prefetch of doubleword elements from the memory address generated by a 64-bit scalar base and immediate index in the range -32 to 31 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous prefetch of doubleword elements from the memory address generated by a 64-bit scalar base and immediate index in the range -32 to 31 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

The predicate may be used to suppress prefetches from unwanted addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFD": + return { + "tooltip": "Contiguous prefetch of doubleword elements from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element prefetch the index value is incremented, but the index register is not updated.", + "html": "

Contiguous prefetch of doubleword elements from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element prefetch the index value is incremented, but the index register is not updated.

The predicate may be used to suppress prefetches from unwanted addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFD": + return { + "tooltip": "Gather prefetch of doublewords from the active memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then multiplied by 8. Inactive addresses are not prefetched from memory.", + "html": "

Gather prefetch of doublewords from the active memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then multiplied by 8. Inactive addresses are not prefetched from memory.

The <prfop> symbol specifies the prefetch hint as a combination of three options: access type PLD for load or PST for store; target cache level L1, L2 or L3; temporality (KEEP for temporal or STRM for non-temporal).

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFH": + return { + "tooltip": "Gather prefetch of halfwords from the active memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive addresses are not prefetched from memory.", + "html": "

Gather prefetch of halfwords from the active memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive addresses are not prefetched from memory.

The <prfop> symbol specifies the prefetch hint as a combination of three options: access type PLD for load or PST for store; target cache level L1, L2 or L3; temporality (KEEP for temporal or STRM for non-temporal).

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFH": + return { + "tooltip": "Contiguous prefetch of halfword elements from the memory address generated by a 64-bit scalar base and immediate index in the range -32 to 31 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous prefetch of halfword elements from the memory address generated by a 64-bit scalar base and immediate index in the range -32 to 31 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

The predicate may be used to suppress prefetches from unwanted addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFH": + return { + "tooltip": "Contiguous prefetch of halfword elements from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element prefetch the index value is incremented, but the index register is not updated.", + "html": "

Contiguous prefetch of halfword elements from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element prefetch the index value is incremented, but the index register is not updated.

The predicate may be used to suppress prefetches from unwanted addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFH": + return { + "tooltip": "Gather prefetch of halfwords from the active memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then multiplied by 2. Inactive addresses are not prefetched from memory.", + "html": "

Gather prefetch of halfwords from the active memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then multiplied by 2. Inactive addresses are not prefetched from memory.

The <prfop> symbol specifies the prefetch hint as a combination of three options: access type PLD for load or PST for store; target cache level L1, L2 or L3; temporality (KEEP for temporal or STRM for non-temporal).

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFM": + return { + "tooltip": "Prefetch Memory (immediate) signals the memory system that data memory accesses from a specified address are likely to occur in the near future. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as preloading the cache line containing the specified address into one or more caches.", + "html": "

Prefetch Memory (immediate) signals the memory system that data memory accesses from a specified address are likely to occur in the near future. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as preloading the cache line containing the specified address into one or more caches.

The effect of a PRFM instruction is implementation defined. For more information, see Prefetch memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFM": + return { + "tooltip": "Prefetch Memory (literal) signals the memory system that data memory accesses from a specified address are likely to occur in the near future. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as preloading the cache line containing the specified address into one or more caches.", + "html": "

Prefetch Memory (literal) signals the memory system that data memory accesses from a specified address are likely to occur in the near future. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as preloading the cache line containing the specified address into one or more caches.

The effect of a PRFM instruction is implementation defined. For more information, see Prefetch memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFM": + return { + "tooltip": "Prefetch Memory (register) signals the memory system that data memory accesses from a specified address are likely to occur in the near future. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as preloading the cache line containing the specified address into one or more caches.", + "html": "

Prefetch Memory (register) signals the memory system that data memory accesses from a specified address are likely to occur in the near future. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as preloading the cache line containing the specified address into one or more caches.

The effect of a PRFM instruction is implementation defined. For more information, see Prefetch memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFUM": + return { + "tooltip": "Prefetch Memory (unscaled offset) signals the memory system that data memory accesses from a specified address are likely to occur in the near future. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as preloading the cache line containing the specified address into one or more caches.", + "html": "

Prefetch Memory (unscaled offset) signals the memory system that data memory accesses from a specified address are likely to occur in the near future. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as preloading the cache line containing the specified address into one or more caches.

The effect of a PRFUM instruction is implementation defined. For more information, see Prefetch memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFW": + return { + "tooltip": "Gather prefetch of words from the active memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive addresses are not prefetched from memory.", + "html": "

Gather prefetch of words from the active memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive addresses are not prefetched from memory.

The <prfop> symbol specifies the prefetch hint as a combination of three options: access type PLD for load or PST for store; target cache level L1, L2 or L3; temporality (KEEP for temporal or STRM for non-temporal).

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFW": + return { + "tooltip": "Contiguous prefetch of word elements from the memory address generated by a 64-bit scalar base and immediate index in the range -32 to 31 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous prefetch of word elements from the memory address generated by a 64-bit scalar base and immediate index in the range -32 to 31 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

The predicate may be used to suppress prefetches from unwanted addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFW": + return { + "tooltip": "Contiguous prefetch of word elements from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element prefetch the index value is incremented, but the index register is not updated.", + "html": "

Contiguous prefetch of word elements from the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element prefetch the index value is incremented, but the index register is not updated.

The predicate may be used to suppress prefetches from unwanted addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PRFW": + return { + "tooltip": "Gather prefetch of words from the active memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then multiplied by 4. Inactive addresses are not prefetched from memory.", + "html": "

Gather prefetch of words from the active memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then multiplied by 4. Inactive addresses are not prefetched from memory.

The <prfop> symbol specifies the prefetch hint as a combination of three options: access type PLD for load or PST for store; target cache level L1, L2 or L3; temporality (KEEP for temporal or STRM for non-temporal).

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PSBCSYNC": + return { + "tooltip": "Profiling Synchronization Barrier. This instruction is a barrier that ensures that all existing profiling data for the current PE has been formatted, and profiling buffer addresses have been translated such that all writes to the profiling buffer have been initiated. A following DSB instruction completes when the writes to the profiling buffer have completed.", + "html": "

Profiling Synchronization Barrier. This instruction is a barrier that ensures that all existing profiling data for the current PE has been formatted, and profiling buffer addresses have been translated such that all writes to the profiling buffer have been initiated. A following DSB instruction completes when the writes to the profiling buffer have completed.

If the Statistical Profiling Extension is not implemented, this instruction executes as a NOP.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PSEL": + return { + "tooltip": "If the indexed element of the second source predicate is true, place the contents of the first source predicate register into the destination predicate register, otherwise set the destination predicate to all-false. The indexed element is determined by the sum of a general-purpose index register and an immediate, modulo the number of elements. Does not set the condition flags.", + "html": "

If the indexed element of the second source predicate is true, place the contents of the first source predicate register into the destination predicate register, otherwise set the destination predicate to all-false. The indexed element is determined by the sum of a general-purpose index register and an immediate, modulo the number of elements. Does not set the condition flags.

For programmer convenience, an assembler must also accept predicate-as-counter register names for the destination predicate register and the first source predicate register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PSSBB": + return { + "tooltip": "Physical Speculative Store Bypass Barrier is a memory barrier that prevents speculative loads from bypassing earlier stores to the same physical address under certain conditions. For more information and details of the semantics, see Physical Speculative Store Bypass Barrier (PSSBB).", + "html": "

Physical Speculative Store Bypass Barrier is a memory barrier that prevents speculative loads from bypassing earlier stores to the same physical address under certain conditions. For more information and details of the semantics, see Physical Speculative Store Bypass Barrier (PSSBB).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PTEST": + return { + "tooltip": "Sets the First (N), None (Z), !Last (C) condition flags based on the predicate source register, and the V flag to zero.", + "html": "

Sets the First (N), None (Z), !Last (C) condition flags based on the predicate source register, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PTRUE": + return { + "tooltip": "Set elements of the destination predicate to true if the element number satisfies the named predicate constraint, or to false otherwise. If the constraint specifies more elements than are available at the current vector length then all elements of the destination predicate are set to false.", + "html": "

Set elements of the destination predicate to true if the element number satisfies the named predicate constraint, or to false otherwise. If the constraint specifies more elements than are available at the current vector length then all elements of the destination predicate are set to false.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PTRUE": + return { + "tooltip": "Set the destination predicate as all-active elements, using the predicate-as-counter encoding.", + "html": "

Set the destination predicate as all-active elements, using the predicate-as-counter encoding.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PTRUES": + return { + "tooltip": "Set elements of the destination predicate to true if the element number satisfies the named predicate constraint, or to false otherwise. If the constraint specifies more elements than are available at the current vector length then all elements of the destination predicate are set to false.", + "html": "

Set elements of the destination predicate to true if the element number satisfies the named predicate constraint, or to false otherwise. If the constraint specifies more elements than are available at the current vector length then all elements of the destination predicate are set to false.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "PUNPKHI": + case "PUNPKLO": + return { + "tooltip": "Unpack elements from the lowest or highest half of the source predicate and place in elements of twice their size within the destination predicate. This instruction is unpredicated.", + "html": "

Unpack elements from the lowest or highest half of the source predicate and place in elements of twice their size within the destination predicate. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RADDHN": + case "RADDHN2": + return { + "tooltip": "Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.", + "html": "

Rounding Add returning High Narrow. This instruction adds each vector element in the first source SIMD&FP register to the corresponding vector element in the second source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

The results are rounded. For truncated results, see ADDHN.

The RADDHN instruction writes the vector to the lower half of the destination register and clears the upper half, while the RADDHN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RADDHNB": + return { + "tooltip": "Add each vector element of the first source vector to the corresponding vector element of the second source vector, and place the most significant rounded half of the result in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. This instruction is unpredicated.", + "html": "

Add each vector element of the first source vector to the corresponding vector element of the second source vector, and place the most significant rounded half of the result in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RADDHNT": + return { + "tooltip": "Add each vector element of the first source vector to the corresponding vector element of the second source vector, and place the most significant rounded half of the result in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. This instruction is unpredicated.", + "html": "

Add each vector element of the first source vector to the corresponding vector element of the second source vector, and place the most significant rounded half of the result in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RAX1": + return { + "tooltip": "Rotate and Exclusive-OR rotates each 64-bit element of the 128-bit vector in a source SIMD&FP register left by 1, performs a bitwise exclusive-OR of the resulting 128-bit vector and the vector in another source SIMD&FP register, and writes the result to the destination SIMD&FP register.", + "html": "

Rotate and Exclusive-OR rotates each 64-bit element of the 128-bit vector in a source SIMD&FP register left by 1, performs a bitwise exclusive-OR of the resulting 128-bit vector and the vector in another source SIMD&FP register, and writes the result to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SHA3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RAX1": + return { + "tooltip": "Rotate each 64-bit element of the second source vector left by 1 and exclusive OR with the corresponding elements of the first source vector. The results are placed in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Rotate each 64-bit element of the second source vector left by 1 and exclusive OR with the corresponding elements of the first source vector. The results are placed in the corresponding elements of the destination vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.SHA3 indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled, or FEAT_SME2p1 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RBIT": + return { + "tooltip": "Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Reverse Bit order (vector). This instruction reads each vector element from the source SIMD&FP register, reverses the bits of the element, places the results into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RBIT": + return { + "tooltip": "Reverse Bits reverses the bit order in a register.", + "html": "

Reverse Bits reverses the bit order in a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RBIT": + return { + "tooltip": "Reverse bits in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reverse bits in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWCAS": + case "RCWCASA": + case "RCWCASAL": + case "RCWCASL": + return { + "tooltip": "Read Check Write Compare and Swap doubleword in memory reads a 64-bit doubleword from memory, and compares it against the value held in a register. If the comparison is equal, the value in a second register is conditionally written to memory. Storing back to memory is conditional on RCW Checks. If the write is performed, the read and the write occur atomically such that no other modification of the memory location can take place between the read and the write. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Compare and Swap doubleword in memory reads a 64-bit doubleword from memory, and compares it against the value held in a register. If the comparison is equal, the value in a second register is conditionally written to memory. Storing back to memory is conditional on RCW Checks. If the write is performed, the read and the write occur atomically such that no other modification of the memory location can take place between the read and the write. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWCASP": + case "RCWCASPA": + case "RCWCASPAL": + case "RCWCASPL": + return { + "tooltip": "Read Check Write Compare and Swap quadword in memory reads a 128-bit quadword from memory, and compares it against the value held in a pair of registers. If the comparison is equal, the value in a second pair of registers is conditionally written to memory. Storing back to memory is conditional on RCW Checks. If the write is performed, the read and the write occur atomically such that no other modification of the memory location can take place between the read and the write. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Compare and Swap quadword in memory reads a 128-bit quadword from memory, and compares it against the value held in a pair of registers. If the comparison is equal, the value in a second pair of registers is conditionally written to memory. Storing back to memory is conditional on RCW Checks. If the write is performed, the read and the write occur atomically such that no other modification of the memory location can take place between the read and the write. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWCLR": + case "RCWCLRA": + case "RCWCLRAL": + case "RCWCLRL": + return { + "tooltip": "Read Check Write atomic bit Clear on doubleword in memory atomically loads a 64-bit doubleword from memory, performs a bitwise AND with the complement of the value held in a register on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write atomic bit Clear on doubleword in memory atomically loads a 64-bit doubleword from memory, performs a bitwise AND with the complement of the value held in a register on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWCLRP": + case "RCWCLRPA": + case "RCWCLRPAL": + case "RCWCLRPL": + return { + "tooltip": "Read Check Write atomic bit Clear on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise AND with the complement of the value held in a pair of registers on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write atomic bit Clear on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise AND with the complement of the value held in a pair of registers on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSCAS": + case "RCWSCASA": + case "RCWSCASAL": + case "RCWSCASL": + return { + "tooltip": "Read Check Write Software Compare and Swap doubleword in memory reads a 64-bit doubleword from memory, and compares it against the value held in a register. If the comparison is equal, the value in a second register is conditionally written to memory. Storing back to memory is conditional on RCW Checks and RCWS Checks. If the write is performed, the read and the write occur atomically such that no other modification of the memory location can take place between the read and the write. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Software Compare and Swap doubleword in memory reads a 64-bit doubleword from memory, and compares it against the value held in a register. If the comparison is equal, the value in a second register is conditionally written to memory. Storing back to memory is conditional on RCW Checks and RCWS Checks. If the write is performed, the read and the write occur atomically such that no other modification of the memory location can take place between the read and the write. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSCASP": + case "RCWSCASPA": + case "RCWSCASPAL": + case "RCWSCASPL": + return { + "tooltip": "Read Check Write Software Compare and Swap quadword in memory reads a 128-bit quadword from memory, and compares it against the value held in a pair of registers. If the comparison is equal, the value in a second pair of registers is conditionally written to memory. Storing back to memory is conditional on RCW Checks and RCWS Checks. If the write is performed, the read and the write occur atomically such that no other modification of the memory location can take place between the read and the write. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Software Compare and Swap quadword in memory reads a 128-bit quadword from memory, and compares it against the value held in a pair of registers. If the comparison is equal, the value in a second pair of registers is conditionally written to memory. Storing back to memory is conditional on RCW Checks and RCWS Checks. If the write is performed, the read and the write occur atomically such that no other modification of the memory location can take place between the read and the write. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSCLR": + case "RCWSCLRA": + case "RCWSCLRAL": + case "RCWSCLRL": + return { + "tooltip": "Read Check Write Software atomic bit Clear on doubleword in memory atomically loads a 64-bit doubleword from memory, performs a bitwise AND with the complement of the value held in a register on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Software atomic bit Clear on doubleword in memory atomically loads a 64-bit doubleword from memory, performs a bitwise AND with the complement of the value held in a register on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSCLRP": + case "RCWSCLRPA": + case "RCWSCLRPAL": + case "RCWSCLRPL": + return { + "tooltip": "Read Check Write Software atomic bit Clear on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise AND with the complement of the value held in a pair of registers on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Software atomic bit Clear on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise AND with the complement of the value held in a pair of registers on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSET": + case "RCWSETA": + case "RCWSETAL": + case "RCWSETL": + return { + "tooltip": "Read Check Write atomic bit Set on doubleword in memory atomically loads a 64-bit doubleword from memory, performs a bitwise OR with the complement of the value held in a register on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write atomic bit Set on doubleword in memory atomically loads a 64-bit doubleword from memory, performs a bitwise OR with the complement of the value held in a register on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSETP": + case "RCWSETPA": + case "RCWSETPAL": + case "RCWSETPL": + return { + "tooltip": "Read Check Write atomic bit Set on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise OR with the value held in a pair of registers on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write atomic bit Set on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise OR with the value held in a pair of registers on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSSET": + case "RCWSSETA": + case "RCWSSETAL": + case "RCWSSETL": + return { + "tooltip": "Read Check Write Software atomic bit Set on doubleword in memory atomically loads a 64-bit doubleword from memory, performs a bitwise OR with the complement of the value held in a register on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Software atomic bit Set on doubleword in memory atomically loads a 64-bit doubleword from memory, performs a bitwise OR with the complement of the value held in a register on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSSETP": + case "RCWSSETPA": + case "RCWSSETPAL": + case "RCWSSETPL": + return { + "tooltip": "Read Check Write Software atomic bit Set on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise OR with the value held in a pair of registers on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Software atomic bit Set on quadword in memory atomically loads a 128-bit quadword from memory, performs a bitwise OR with the value held in a pair of registers on it, and conditionally stores the result back to memory. Storing of the result back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSSWP": + case "RCWSSWPA": + case "RCWSSWPAL": + case "RCWSSWPL": + return { + "tooltip": "Read Check Write Software Swap doubleword in memory atomically loads a 64-bit doubleword from a memory location, and conditionally stores the value held in a register back to the same memory location. Storing back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Software Swap doubleword in memory atomically loads a 64-bit doubleword from a memory location, and conditionally stores the value held in a register back to the same memory location. Storing back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSSWPP": + case "RCWSSWPPA": + case "RCWSSWPPAL": + case "RCWSSWPPL": + return { + "tooltip": "Read Check Write Software Swap quadword in memory atomically loads a 128-bit quadword from a memory location, and conditionally stores the value held in a pair of registers back to the same memory location. Storing back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Software Swap quadword in memory atomically loads a 128-bit quadword from a memory location, and conditionally stores the value held in a pair of registers back to the same memory location. Storing back to memory is conditional on RCW Checks and RCWS Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSWP": + case "RCWSWPA": + case "RCWSWPAL": + case "RCWSWPL": + return { + "tooltip": "Read Check Write Swap doubleword in memory atomically loads a 64-bit doubleword from a memory location, and conditionally stores the value held in a register back to the same memory location. Storing back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Swap doubleword in memory atomically loads a 64-bit doubleword from a memory location, and conditionally stores the value held in a register back to the same memory location. Storing back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the destination register. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RCWSWPP": + case "RCWSWPPA": + case "RCWSWPPAL": + case "RCWSWPPL": + return { + "tooltip": "Read Check Write Swap quadword in memory atomically loads a 128-bit quadword from a memory location, and conditionally stores the value held in a pair of registers back to the same memory location. Storing back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.", + "html": "

Read Check Write Swap quadword in memory atomically loads a 128-bit quadword from a memory location, and conditionally stores the value held in a pair of registers back to the same memory location. Storing back to memory is conditional on RCW Checks. The value initially loaded from memory is returned in the same pair of registers. This instruction updates the condition flags based on the result of the update of memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RDFFR": + return { + "tooltip": "Read the first-fault register (FFR) and place in the destination predicate without predication.", + "html": "

Read the first-fault register (FFR) and place in the destination predicate without predication.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RDFFR": + return { + "tooltip": "Read the first-fault register (FFR) and place active elements in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.", + "html": "

Read the first-fault register (FFR) and place active elements in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Does not set the condition flags.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RDFFRS": + return { + "tooltip": "Read the first-fault register (FFR) and place active elements in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

Read the first-fault register (FFR) and place active elements in the corresponding elements of the destination predicate. Inactive elements in the destination predicate register are set to zero. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RDSVL": + return { + "tooltip": "Multiply the Streaming SVE vector register size in bytes by an immediate in the range -32 to 31 and place the result in the 64-bit destination general-purpose register.", + "html": "

Multiply the Streaming SVE vector register size in bytes by an immediate in the range -32 to 31 and place the result in the 64-bit destination general-purpose register.

This instruction does not require the PE to be in Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RDVL": + return { + "tooltip": "Multiply the current vector register size in bytes by an immediate in the range -32 to 31 and place the result in the 64-bit destination general-purpose register.", + "html": "

Multiply the current vector register size in bytes by an immediate in the range -32 to 31 and place the result in the 64-bit destination general-purpose register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RET": + return { + "tooltip": "Return from subroutine branches unconditionally to an address in a register, with a hint that this is a subroutine return.", + "html": "

Return from subroutine branches unconditionally to an address in a register, with a hint that this is a subroutine return.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RETAA": + case "RETAB": + return { + "tooltip": "Return from subroutine, with pointer authentication. This instruction authenticates the address that is held in LR, using SP as the modifier and the specified key, branches to the authenticated address, with a hint that this instruction is a subroutine return.", + "html": "

Return from subroutine, with pointer authentication. This instruction authenticates the address that is held in LR, using SP as the modifier and the specified key, branches to the authenticated address, with a hint that this instruction is a subroutine return.

Key A is used for RETAA. Key B is used for RETAB.

If the authentication passes, the PE continues execution at the target of the branch. For information on behavior if the authentication fails, see Faulting on pointer authentication.

The authenticated address is not written back to LR.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV": + return { + "tooltip": "Reverse Bytes reverses the byte order in a register.", + "html": "

Reverse Bytes reverses the byte order in a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV16": + return { + "tooltip": "Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Reverse elements in 16-bit halfwords (vector). This instruction reverses the order of 8-bit elements in each halfword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV16": + return { + "tooltip": "Reverse bytes in 16-bit halfwords reverses the byte order in each 16-bit halfword of a register.", + "html": "

Reverse bytes in 16-bit halfwords reverses the byte order in each 16-bit halfword of a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV32": + return { + "tooltip": "Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Reverse elements in 32-bit words (vector). This instruction reverses the order of 8-bit or 16-bit elements in each word of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV32": + return { + "tooltip": "Reverse bytes in 32-bit words reverses the byte order in each 32-bit word of a register.", + "html": "

Reverse bytes in 32-bit words reverses the byte order in each 32-bit word of a register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV64": + return { + "tooltip": "Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword of the vector in the source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV64": + return { + "tooltip": "Reverse Bytes reverses the byte order in a 64-bit general-purpose register.", + "html": "

Reverse Bytes reverses the byte order in a 64-bit general-purpose register.

When assembling for Armv8.2, an assembler must support this pseudo-instruction. It is optional whether an assembler supports this pseudo-instruction when assembling for an architecture earlier than Armv8.2.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV": + return { + "tooltip": "Reverse the order of all elements in the source predicate and place in the destination predicate. This instruction is unpredicated.", + "html": "

Reverse the order of all elements in the source predicate and place in the destination predicate. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REV": + return { + "tooltip": "Reverse the order of all elements in the source vector and place in the destination vector. This instruction is unpredicated.", + "html": "

Reverse the order of all elements in the source vector and place in the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REVB": + case "REVH": + case "REVW": + return { + "tooltip": "Reverse the order of 8-bit bytes, 16-bit halfwords or 32-bit words within each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reverse the order of 8-bit bytes, 16-bit halfwords or 32-bit words within each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "REVD": + return { + "tooltip": "Reverse the order of 64-bit doublewords within each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reverse the order of 64-bit doublewords within each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RMIF": + return { + "tooltip": "Performs a rotation right of a value held in a general purpose register by an immediate value, and then inserts a selection of the bottom four bits of the result of the rotation into the PSTATE flags, under the control of a second immediate mask.", + "html": "

Performs a rotation right of a value held in a general purpose register by an immediate value, and then inserts a selection of the bottom four bits of the result of the rotation into the PSTATE flags, under the control of a second immediate mask.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ROR": + return { + "tooltip": "Rotate right (immediate) provides the value of the contents of a register rotated by a variable number of bits. The bits that are rotated off the right end are inserted into the vacated bit positions on the left.", + "html": "

Rotate right (immediate) provides the value of the contents of a register rotated by a variable number of bits. The bits that are rotated off the right end are inserted into the vacated bit positions on the left.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ROR": + return { + "tooltip": "Rotate Right (register) provides the value of the contents of a register rotated by a variable number of bits. The bits that are rotated off the right end are inserted into the vacated bit positions on the left. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.", + "html": "

Rotate Right (register) provides the value of the contents of a register rotated by a variable number of bits. The bits that are rotated off the right end are inserted into the vacated bit positions on the left. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RORV": + return { + "tooltip": "Rotate Right Variable provides the value of the contents of a register rotated by a variable number of bits. The bits that are rotated off the right end are inserted into the vacated bit positions on the left. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.", + "html": "

Rotate Right Variable provides the value of the contents of a register rotated by a variable number of bits. The bits that are rotated off the right end are inserted into the vacated bit positions on the left. The remainder obtained by dividing the second source register by the data size defines the number of bits by which the first source register is right-shifted.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RPRFM": + return { + "tooltip": "Range Prefetch Memory signals the memory system that data memory accesses from a specified range of addresses are likely to occur in the near future. The instruction may also signal the memory system about the likelihood of data reuse of the specified range of addresses. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as prefetching locations within the specified address ranges into one or more caches. The memory system may also exploit the data reuse hints to decide whether to retain the data in other caches upon eviction from the innermost caches or to discard it.", + "html": "

Range Prefetch Memory signals the memory system that data memory accesses from a specified range of addresses are likely to occur in the near future. The instruction may also signal the memory system about the likelihood of data reuse of the specified range of addresses. The memory system can respond by taking actions that are expected to speed up the memory accesses when they do occur, such as prefetching locations within the specified address ranges into one or more caches. The memory system may also exploit the data reuse hints to decide whether to retain the data in other caches upon eviction from the innermost caches or to discard it.

The effect of an RPRFM instruction is implementation defined, but because these signals are only hints, the instruction cannot cause a synchronous Data Abort exception and is guaranteed not to access Device memory. It is valid for the PE to treat this instruction as a NOP.

An RPRFM instruction specifies the type of accesses and range of addresses using the following parameters:

Software is expected to honor the parameters it provides to the RPRFM instruction, and the same PE should access all locations in the range, in the direction specified by the sign of the 'Length' and 'Stride' parameters. A range prefetch is considered active on a PE until all locations in the range have been accessed by the PE. A range prefetch might also be inactivated by the PE prior to completion, for example due to a software context switch or lack of hardware resources.

Software should not specify overlapping addresses in multiple active ranges. If a range is expected to be accessed by both load and store instructions (read-modify-write), then a single range with a 'Type' parameter of PST (prefetch for store) should be specified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RSHRN": + case "RSHRN2": + return { + "tooltip": "Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.", + "html": "

Rounding Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the vector in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SHRN.

The RSHRN instruction writes the vector to the lower half of the destination register and clears the upper half, while the RSHRN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RSHRNB": + return { + "tooltip": "Shift each unsigned integer value in the source vector elements right by an immediate value, and place the rounded results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each unsigned integer value in the source vector elements right by an immediate value, and place the rounded results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RSHRNT": + return { + "tooltip": "Shift each unsigned integer value in the source vector elements right by an immediate value, and place the rounded results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each unsigned integer value in the source vector elements right by an immediate value, and place the rounded results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RSUBHN": + case "RSUBHN2": + return { + "tooltip": "Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.", + "html": "

Rounding Subtract returning High Narrow. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register.

The results are rounded. For truncated results, see SUBHN.

The RSUBHN instruction writes the vector to the lower half of the destination register and clears the upper half, while the RSUBHN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RSUBHNB": + return { + "tooltip": "Subtract each vector element of the second source vector from the corresponding vector element in the first source vector, and place the most significant rounded half of the result in the even-numbered half-width destination elements, while setting the odd-numbered half-width destination elements to zero. This instruction is unpredicated.", + "html": "

Subtract each vector element of the second source vector from the corresponding vector element in the first source vector, and place the most significant rounded half of the result in the even-numbered half-width destination elements, while setting the odd-numbered half-width destination elements to zero. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "RSUBHNT": + return { + "tooltip": "Subtract each vector element of the second source vector from the corresponding vector element in the first source vector, and place the most significant rounded half of the result in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. This instruction is unpredicated.", + "html": "

Subtract each vector element of the second source vector from the corresponding vector element in the first source vector, and place the most significant rounded half of the result in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABA": + return { + "tooltip": "Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.", + "html": "

Signed Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABA": + return { + "tooltip": "Compute the absolute difference between signed integer values in elements of the second source vector and corresponding elements of the first source vector, and add the difference to the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between signed integer values in elements of the second source vector and corresponding elements of the first source vector, and add the difference to the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABAL": + case "SABAL2": + return { + "tooltip": "Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Signed Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

The SABAL instruction extracts each source vector from the lower half of each source register. The SABAL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABALB": + return { + "tooltip": "Compute the absolute difference between even-numbered signed integer values in elements of the second source vector and corresponding elements of the first source vector, and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between even-numbered signed integer values in elements of the second source vector and corresponding elements of the first source vector, and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABALT": + return { + "tooltip": "Compute the absolute difference between odd-numbered signed elements of the second source vector and corresponding elements of the first source vector, and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between odd-numbered signed elements of the second source vector and corresponding elements of the first source vector, and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABD": + return { + "tooltip": "Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Absolute Difference. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABD": + return { + "tooltip": "Compute the absolute difference between signed integer values in active elements of the second source vector and corresponding elements of the first source vector and destructively place the difference in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Compute the absolute difference between signed integer values in active elements of the second source vector and corresponding elements of the first source vector and destructively place the difference in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABDL": + case "SABDL2": + return { + "tooltip": "Signed Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Signed Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

The SABDL instruction extracts each source vector from the lower half of each source register. The SABDL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABDLB": + return { + "tooltip": "Compute the absolute difference between even-numbered signed integer values in elements of the second source vector and corresponding elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between even-numbered signed integer values in elements of the second source vector and corresponding elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SABDLT": + return { + "tooltip": "Compute the absolute difference between odd-numbered signed integer values in elements of the second source vector and corresponding elements of the first source vector, and place the results in overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between odd-numbered signed integer values in elements of the second source vector and corresponding elements of the first source vector, and place the results in overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADALP": + return { + "tooltip": "Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register and accumulates the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Signed Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register and accumulates the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADALP": + return { + "tooltip": "Add pairs of adjacent signed integer values and accumulate the results into the overlapping double-width elements of the destination vector.", + "html": "

Add pairs of adjacent signed integer values and accumulate the results into the overlapping double-width elements of the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDL": + case "SADDL2": + return { + "tooltip": "Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.", + "html": "

Signed Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

The SADDL instruction extracts each source vector from the lower half of each source register. The SADDL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDLB": + return { + "tooltip": "Add the corresponding even-numbered signed elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the corresponding even-numbered signed elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDLBT": + return { + "tooltip": "Add the even-numbered signed elements of the first source vector to the odd-numbered signed elements of the second source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the even-numbered signed elements of the first source vector to the odd-numbered signed elements of the second source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDLP": + return { + "tooltip": "Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Signed Add Long Pairwise. This instruction adds pairs of adjacent signed integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDLT": + return { + "tooltip": "Add the corresponding odd-numbered signed elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the corresponding odd-numbered signed elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDLV": + return { + "tooltip": "Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.", + "html": "

Signed Add Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are signed integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDV": + return { + "tooltip": "Signed add horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Narrow elements are first sign-extended to 64 bits. Inactive elements in the source vector are treated as zero.", + "html": "

Signed add horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Narrow elements are first sign-extended to 64 bits. Inactive elements in the source vector are treated as zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDW": + case "SADDW2": + return { + "tooltip": "Signed Add Wide. This instruction adds vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the results in a vector, and writes the vector to the SIMD&FP destination register.", + "html": "

Signed Add Wide. This instruction adds vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the results in a vector, and writes the vector to the SIMD&FP destination register.

The SADDW instruction extracts the second source vector from the lower half of the second source register. The SADDW2 instruction extracts the second source vector from the upper half of the second source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDWB": + return { + "tooltip": "Add the even-numbered signed elements of the second source vector to the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the even-numbered signed elements of the second source vector to the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SADDWT": + return { + "tooltip": "Add the odd-numbered signed elements of the second source vector to the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the odd-numbered signed elements of the second source vector to the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SB": + return { + "tooltip": "Speculation Barrier is a barrier that controls speculation.", + "html": "

Speculation Barrier is a barrier that controls speculation.

The semantics of the Speculation Barrier are that the execution, until the barrier completes, of any instruction that appears later in the program order than the barrier:

In particular, any instruction that appears later in the program order than the barrier cannot cause a speculative allocation into any caching structure where the allocation of that entry could be indicative of any data value present in memory or in the registers.

The SB instruction:

When the prediction of the instruction stream is not informed by data taken from the register outputs of the speculative execution of instructions appearing in program order after an uncompleted SB instruction, the SB instruction has no effect on the use of prediction resources to predict the instruction stream that is being fetched.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SBC": + return { + "tooltip": "Subtract with Carry subtracts a register value and the value of NOT (Carry flag) from a register value, and writes the result to the destination register.", + "html": "

Subtract with Carry subtracts a register value and the value of NOT (Carry flag) from a register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SBCLB": + return { + "tooltip": "Subtract the even-numbered elements of the first source vector and the inverted 1-bit carry from the least-significant bit of the odd-numbered elements of the second source vector from the even-numbered elements of the destination and accumulator vector. The 1-bit carry output is placed in the corresponding odd-numbered element of the destination vector.", + "html": "

Subtract the even-numbered elements of the first source vector and the inverted 1-bit carry from the least-significant bit of the odd-numbered elements of the second source vector from the even-numbered elements of the destination and accumulator vector. The 1-bit carry output is placed in the corresponding odd-numbered element of the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SBCLT": + return { + "tooltip": "Subtract the odd-numbered elements of the first source vector and the inverted 1-bit carry from the least-significant bit of the odd-numbered elements of the second source vector from the even-numbered elements of the destination and accumulator vector. The 1-bit carry output is placed in the corresponding odd-numbered element of the destination vector.", + "html": "

Subtract the odd-numbered elements of the first source vector and the inverted 1-bit carry from the least-significant bit of the odd-numbered elements of the second source vector from the even-numbered elements of the destination and accumulator vector. The 1-bit carry output is placed in the corresponding odd-numbered element of the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SBCS": + return { + "tooltip": "Subtract with Carry, setting flags, subtracts a register value and the value of NOT (Carry flag) from a register value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Subtract with Carry, setting flags, subtracts a register value and the value of NOT (Carry flag) from a register value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SBFIZ": + return { + "tooltip": "Signed Bitfield Insert in Zeros copies a bitfield of bits from the least significant bits of the source register to bit position of the destination register, setting the destination bits below the bitfield to zero, and the bits above the bitfield to a copy of the most significant bit of the bitfield.", + "html": "

Signed Bitfield Insert in Zeros copies a bitfield of <width> bits from the least significant bits of the source register to bit position <lsb> of the destination register, setting the destination bits below the bitfield to zero, and the bits above the bitfield to a copy of the most significant bit of the bitfield.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SBFM": + return { + "tooltip": "Signed Bitfield Move is usually accessed via one of its aliases, which are always preferred for disassembly.", + "html": "

Signed Bitfield Move is usually accessed via one of its aliases, which are always preferred for disassembly.

If <imms> is greater than or equal to <immr>, this copies a bitfield of (<imms>-<immr>+1) bits starting from bit position <immr> in the source register to the least significant bits of the destination register.

If <imms> is less than <immr>, this copies a bitfield of (<imms>+1) bits from the least significant bits of the source register to bit position (regsize-<immr>) of the destination register, where regsize is the destination register size of 32 or 64 bits.

In both cases the destination bits below the bitfield are set to zero, and the bits above the bitfield are set to a copy of the most significant bit of the bitfield.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SBFX": + return { + "tooltip": "Signed Bitfield Extract copies a bitfield of bits starting from bit position in the source register to the least significant bits of the destination register, and sets destination bits above the bitfield to a copy of the most significant bit of the bitfield.", + "html": "

Signed Bitfield Extract copies a bitfield of <width> bits starting from bit position <lsb> in the source register to the least significant bits of the destination register, and sets destination bits above the bitfield to a copy of the most significant bit of the bitfield.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SCLAMP": + return { + "tooltip": "Clamp each signed element in the two or four destination vectors to between the signed minimum value in the corresponding element of the first source vector and the signed maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the two or four destination vectors.", + "html": "

Clamp each signed element in the two or four destination vectors to between the signed minimum value in the corresponding element of the first source vector and the signed maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the two or four destination vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SCLAMP": + return { + "tooltip": "Clamp each signed element in the destination vector to between the signed minimum value in the corresponding element of the first source vector and the signed maximum value in the corresponding element of the second source vector and destructively write the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Clamp each signed element in the destination vector to between the signed minimum value in the corresponding element of the first source vector and the signed maximum value in the corresponding element of the second source vector and destructively write the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SCVTF": + return { + "tooltip": "Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Signed fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SCVTF": + return { + "tooltip": "Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Signed integer Convert to Floating-point (vector). This instruction converts each element in a vector from signed integer to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SCVTF": + return { + "tooltip": "Signed fixed-point Convert to Floating-point (scalar). This instruction converts the signed value in the 32-bit or 64-bit general-purpose source register to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Signed fixed-point Convert to Floating-point (scalar). This instruction converts the signed value in the 32-bit or 64-bit general-purpose source register to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SCVTF": + return { + "tooltip": "Signed integer Convert to Floating-point (scalar). This instruction converts the signed integer value in the general-purpose source register to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Signed integer Convert to Floating-point (scalar). This instruction converts the signed integer value in the general-purpose source register to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SCVTF": + return { + "tooltip": "Convert to single-precision from signed 32-bit integer, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.", + "html": "

Convert to single-precision from signed 32-bit integer, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SCVTF": + return { + "tooltip": "Convert to floating-point from the signed integer in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert to floating-point from the signed integer in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

If the input and result types have a different size the smaller type is held unpacked in the least significant bits of elements of the larger size. When the input is the smaller type the upper bits of each source element are ignored. When the result is the smaller type the results are zero-extended to fill each destination element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDIV": + return { + "tooltip": "Signed Divide divides a signed integer register value by another signed integer register value, and writes the result to the destination register. The condition flags are not affected.", + "html": "

Signed Divide divides a signed integer register value by another signed integer register value, and writes the result to the destination register. The condition flags are not affected.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDIV": + return { + "tooltip": "Signed divide active elements of the first source vector by corresponding elements of the second source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Signed divide active elements of the first source vector by corresponding elements of the second source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDIVR": + return { + "tooltip": "Signed reversed divide active elements of the second source vector by corresponding elements of the first source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Signed reversed divide active elements of the second source vector by corresponding elements of the first source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "Dot Product signed arithmetic (vector, by element). This instruction performs the dot product of the four 8-bit elements in each 32-bit element of the first source register with the four 8-bit elements of an indexed 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.", + "html": "

Dot Product signed arithmetic (vector, by element). This instruction performs the dot product of the four 8-bit elements in each 32-bit element of the first source register with the four 8-bit elements of an indexed 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

In Armv8.2 and Armv8.3, this is an optional instruction. From Armv8.4 it is mandatory for all implementations to support it.

ID_AA64ISAR0_EL1.DP indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "Dot Product signed arithmetic (vector). This instruction performs the dot product of the four signed 8-bit elements in each 32-bit element of the first source register with the four signed 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.", + "html": "

Dot Product signed arithmetic (vector). This instruction performs the dot product of the four signed 8-bit elements in each 32-bit element of the first source register with the four signed 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

In Armv8.2 and Armv8.3, this is an optional instruction. From Armv8.4 it is mandatory for all implementations to support it.

ID_AA64ISAR0_EL1.DP indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer dot product instruction computes the dot product of a group of two signed 16-bit integer values held in each 32-bit element of the first source vector multiplied by a group of two signed 16-bit integer values in the corresponding 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.", + "html": "

The signed integer dot product instruction computes the dot product of a group of two signed 16-bit integer values held in each 32-bit element of the first source vector multiplied by a group of two signed 16-bit integer values in the corresponding 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer indexed dot product instruction computes the dot product of a group of two signed 16-bit integer values held in each 32-bit element of the first source vector multiplied by a group of two signed 16-bit integer values in an indexed 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.", + "html": "

The signed integer indexed dot product instruction computes the dot product of a group of two signed 16-bit integer values held in each 32-bit element of the first source vector multiplied by a group of two signed 16-bit integer values in an indexed 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.

The groups within the second source vector are specified using an immediate index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer dot product instruction computes the dot product of a group of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the first source vector multiplied by a group of four signed 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit or 64-bit element of the destination vector.", + "html": "

The signed integer dot product instruction computes the dot product of a group of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the first source vector multiplied by a group of four signed 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit or 64-bit element of the destination vector.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer indexed dot product instruction computes the dot product of a group of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the first source vector multiplied by a group of four signed 8-bit or 16-bit integer values in an indexed 32-bit or 64-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit or 64-bit element of the destination vector.", + "html": "

The signed integer indexed dot product instruction computes the dot product of a group of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the first source vector multiplied by a group of four signed 8-bit or 16-bit integer values in an indexed 32-bit or 64-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit or 64-bit element of the destination vector.

The groups within the second source vector are specified using an immediate index which selects the same group position within each 128-bit vector segment. The index range is from 0 to one less than the number of groups per 128-bit segment, encoded in 1 to 2 bits depending on the size of the group. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer dot product instruction computes the dot product of two signed 16-bit integer values held in each 32-bit element of the two or four first source vectors and two signed 16-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.", + "html": "

The signed integer dot product instruction computes the dot product of two signed 16-bit integer values held in each 32-bit element of the two or four first source vectors and two signed 16-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3, encoded in 2 bits. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer dot product instruction computes the dot product of two signed 16-bit integer values held in each 32-bit element of the two or four first source vectors and two signed 16-bit integer values in the corresponding 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The signed integer dot product instruction computes the dot product of two signed 16-bit integer values held in each 32-bit element of the two or four first source vectors and two signed 16-bit integer values in the corresponding 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer dot product instruction computes the dot product of two signed 16-bit integer values held in each 32-bit element of the two or four first source vectors and two signed 16-bit integer values in the corresponding 32-bit element of the two or four second source vectors. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The signed integer dot product instruction computes the dot product of two signed 16-bit integer values held in each 32-bit element of the two or four first source vectors and two signed 16-bit integer values in the corresponding 32-bit element of the two or four second source vectors. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer dot product instruction computes the dot product of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four signed 8-bit or 16-bit integer values in the corresponding indexed 32-bit or 64-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups.", + "html": "

The signed integer dot product instruction computes the dot product of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four signed 8-bit or 16-bit integer values in the corresponding indexed 32-bit or 64-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to one less than the number of groups per 128-bit segment, encoded in 1 to 2 bits depending on the size of the group. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer dot product instruction computes the dot product of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four signed 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The signed integer dot product instruction computes the dot product of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four signed 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SDOT": + return { + "tooltip": "The signed integer dot product instruction computes the dot product of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four signed 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the two or four second source vectors. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The signed integer dot product instruction computes the dot product of four signed 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four signed 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the two or four second source vectors. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SEL": + return { + "tooltip": "Read active elements from the two or four first source vectors and inactive elements from the two or four second source vectors and place in the corresponding elements of the two or four destination vectors.", + "html": "

Read active elements from the two or four first source vectors and inactive elements from the two or four second source vectors and place in the corresponding elements of the two or four destination vectors.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SEL": + return { + "tooltip": "Read active elements from the first source predicate and inactive elements from the second source predicate and place in the corresponding elements of the destination predicate. Does not set the condition flags.", + "html": "

Read active elements from the first source predicate and inactive elements from the second source predicate and place in the corresponding elements of the destination predicate. Does not set the condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SEL": + return { + "tooltip": "Select elements from the first source vector where the corresponding vector select predicate element is true, and from the second source vector where the predicate element is false, placing them in the corresponding elements of the destination vector.", + "html": "

Select elements from the first source vector where the corresponding vector select predicate element is true, and from the second source vector where the predicate element is false, placing them in the corresponding elements of the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETF16": + case "SETF8": + return { + "tooltip": "Set the PSTATE.NZV flags based on the value in the specified general-purpose register. SETF8 treats the value as an 8 bit value, and SETF16 treats the value as an 16 bit value.", + "html": "

Set the PSTATE.NZV flags based on the value in the specified general-purpose register. SETF8 treats the value as an 8 bit value, and SETF16 treats the value as an 16 bit value.

The PSTATE.C flag is not affected by these instructions.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETFFR": + return { + "tooltip": "Initialise the first-fault register (FFR) to all true prior to a sequence of first-fault or non-fault loads. This instruction is unpredicated.", + "html": "

Initialise the first-fault register (FFR) to all true prior to a sequence of first-fault or non-fault loads. This instruction is unpredicated.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETGE": + case "SETGM": + case "SETGP": + return { + "tooltip": "Memory Set with tag setting. These instructions perform a memory set using the value in the bottom byte of the source register and store an Allocation Tag to memory for each Tag Granule written. The Allocation Tag is calculated from the Logical Address Tag in the register which holds the first address that the set is made to. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETGP, then SETGM, and then SETGE.", + "html": "

Memory Set with tag setting. These instructions perform a memory set using the value in the bottom byte of the source register and store an Allocation Tag to memory for each Tag Granule written. The Allocation Tag is calculated from the Logical Address Tag in the register which holds the first address that the set is made to. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETGP, then SETGM, and then SETGE.

SETGP performs some preconditioning of the arguments suitable for using the SETGM instruction, and performs an implementation defined amount of the memory set. SETGM performs an implementation defined amount of the memory set. SETGE performs the last part of the memory set.

The inclusion of implementation defined amounts of memory set allows some optimization of the size that can be performed.

The architecture supports two algorithms for the memory set: option A and option B. Which algorithm is used is implementation defined.

Portable software should not assume that the choice of algorithm is constant.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETGEN": + case "SETGMN": + case "SETGPN": + return { + "tooltip": "Memory Set with tag setting, non-temporal. These instructions perform a memory set using the value in the bottom byte of the source register and store an Allocation Tag to memory for each Tag Granule written. The Allocation Tag is calculated from the Logical Address Tag in the register which holds the first address that the set is made to. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETGPN, then SETGMN, and then SETGEN.", + "html": "

Memory Set with tag setting, non-temporal. These instructions perform a memory set using the value in the bottom byte of the source register and store an Allocation Tag to memory for each Tag Granule written. The Allocation Tag is calculated from the Logical Address Tag in the register which holds the first address that the set is made to. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETGPN, then SETGMN, and then SETGEN.

SETGPN performs some preconditioning of the arguments suitable for using the SETGMN instruction, and performs an implementation defined amount of the memory set. SETGMN performs an implementation defined amount of the memory set. SETGEN performs the last part of the memory set.

The inclusion of implementation defined amounts of memory set allows some optimization of the size that can be performed.

The architecture supports two algorithms for the memory set: option A and option B. Which algorithm is used is implementation defined.

Portable software should not assume that the choice of algorithm is constant.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETGET": + case "SETGMT": + case "SETGPT": + return { + "tooltip": "Memory Set with tag setting, unprivileged. These instructions perform a memory set using the value in the bottom byte of the source register and store an Allocation Tag to memory for each Tag Granule written. The Allocation Tag is calculated from the Logical Address Tag in the register which holds the first address that the set is made to. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETGPT, then SETGMT, and then SETGET.", + "html": "

Memory Set with tag setting, unprivileged. These instructions perform a memory set using the value in the bottom byte of the source register and store an Allocation Tag to memory for each Tag Granule written. The Allocation Tag is calculated from the Logical Address Tag in the register which holds the first address that the set is made to. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETGPT, then SETGMT, and then SETGET.

SETGPT performs some preconditioning of the arguments suitable for using the SETGMT instruction, and performs an implementation defined amount of the memory set. SETGMT performs an implementation defined amount of the memory set. SETGET performs the last part of the memory set.

The inclusion of implementation defined amounts of memory set allows some optimization of the size that can be performed.

The architecture supports two algorithms for the memory set: option A and option B. Which algorithm is used is implementation defined.

Portable software should not assume that the choice of algorithm is constant.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETGETN": + case "SETGMTN": + case "SETGPTN": + return { + "tooltip": "Memory Set with tag setting, unprivileged and non-temporal. These instructions perform a memory set using the value in the bottom byte of the source register and store an Allocation Tag to memory for each Tag Granule written. The Allocation Tag is calculated from the Logical Address Tag in the register which holds the first address that the set is made to. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETGPTN, then SETGMTN, and then SETGETN.", + "html": "

Memory Set with tag setting, unprivileged and non-temporal. These instructions perform a memory set using the value in the bottom byte of the source register and store an Allocation Tag to memory for each Tag Granule written. The Allocation Tag is calculated from the Logical Address Tag in the register which holds the first address that the set is made to. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETGPTN, then SETGMTN, and then SETGETN.

SETGPTN performs some preconditioning of the arguments suitable for using the SETGMTN instruction, and performs an implementation defined amount of the memory set. SETGMTN performs an implementation defined amount of the memory set. SETGETN performs the last part of the memory set.

The inclusion of implementation defined amounts of memory set allows some optimization of the size that can be performed.

The architecture supports two algorithms for the memory set: option A and option B. Which algorithm is used is implementation defined.

Portable software should not assume that the choice of algorithm is constant.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETE": + case "SETM": + case "SETP": + return { + "tooltip": "Memory Set. These instructions perform a memory set using the value in the bottom byte of the source register. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETP, then SETM, and then SETE.", + "html": "

Memory Set. These instructions perform a memory set using the value in the bottom byte of the source register. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETP, then SETM, and then SETE.

SETP performs some preconditioning of the arguments suitable for using the SETM instruction, and performs an implementation defined amount of the memory set. SETM performs an implementation defined amount of the memory set. SETE performs the last part of the memory set.

The inclusion of implementation defined amounts of memory set allows some optimization of the size that can be performed.

The architecture supports two algorithms for the memory set: option A and option B. Which algorithm is used is implementation defined.

Portable software should not assume that the choice of algorithm is constant.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETEN": + case "SETMN": + case "SETPN": + return { + "tooltip": "Memory Set, non-temporal. These instructions perform a memory set using the value in the bottom byte of the source register. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETPN, then SETMN, and then SETEN.", + "html": "

Memory Set, non-temporal. These instructions perform a memory set using the value in the bottom byte of the source register. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETPN, then SETMN, and then SETEN.

SETPN performs some preconditioning of the arguments suitable for using the SETMN instruction, and performs an implementation defined amount of the memory set. SETMN performs an implementation defined amount of the memory set. SETEN performs the last part of the memory set.

The inclusion of implementation defined amounts of memory set allows some optimization of the size that can be performed.

The architecture supports two algorithms for the memory set: option A and option B. Which algorithm is used is implementation defined.

Portable software should not assume that the choice of algorithm is constant.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETET": + case "SETMT": + case "SETPT": + return { + "tooltip": "Memory Set, unprivileged. These instructions perform a memory set using the value in the bottom byte of the source register. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETPT, then SETMT, and then SETET.", + "html": "

Memory Set, unprivileged. These instructions perform a memory set using the value in the bottom byte of the source register. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETPT, then SETMT, and then SETET.

SETPT performs some preconditioning of the arguments suitable for using the SETMT instruction, and performs an implementation defined amount of the memory set. SETMT performs an implementation defined amount of the memory set. SETET performs the last part of the memory set.

The inclusion of implementation defined amounts of memory set allows some optimization of the size that can be performed.

The architecture supports two algorithms for the memory set: option A and option B. Which algorithm is used is implementation defined.

Portable software should not assume that the choice of algorithm is constant.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SETETN": + case "SETMTN": + case "SETPTN": + return { + "tooltip": "Memory Set, unprivileged and non-temporal. These instructions perform a memory set using the value in the bottom byte of the source register. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETPTN, then SETMTN, and then SETETN.", + "html": "

Memory Set, unprivileged and non-temporal. These instructions perform a memory set using the value in the bottom byte of the source register. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: SETPTN, then SETMTN, and then SETETN.

SETPTN performs some preconditioning of the arguments suitable for using the SETMTN instruction, and performs an implementation defined amount of the memory set. SETMTN performs an implementation defined amount of the memory set. SETETN performs the last part of the memory set.

The inclusion of implementation defined amounts of memory set allows some optimization of the size that can be performed.

The architecture supports two algorithms for the memory set: option A and option B. Which algorithm is used is implementation defined.

Portable software should not assume that the choice of algorithm is constant.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SEV": + return { + "tooltip": "Send Event is a hint instruction. It causes an event to be signaled to all PEs in the multiprocessor system. For more information, see Wait for Event mechanism and Send event.", + "html": "

Send Event is a hint instruction. It causes an event to be signaled to all PEs in the multiprocessor system. For more information, see Wait for Event mechanism and Send event.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SEVL": + return { + "tooltip": "Send Event Local is a hint instruction that causes an event to be signaled locally without requiring the event to be signaled to other PEs in the multiprocessor system. It can prime a wait-loop which starts with a WFE instruction.", + "html": "

Send Event Local is a hint instruction that causes an event to be signaled locally without requiring the event to be signaled to other PEs in the multiprocessor system. It can prime a wait-loop which starts with a WFE instruction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA1C": + return { + "tooltip": "SHA1 hash update (choose).", + "html": "

SHA1 hash update (choose).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA1H": + return { + "tooltip": "SHA1 fixed rotate.", + "html": "

SHA1 fixed rotate.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA1M": + return { + "tooltip": "SHA1 hash update (majority).", + "html": "

SHA1 hash update (majority).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA1P": + return { + "tooltip": "SHA1 hash update (parity).", + "html": "

SHA1 hash update (parity).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA1SU0": + return { + "tooltip": "SHA1 schedule update 0.", + "html": "

SHA1 schedule update 0.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA1SU1": + return { + "tooltip": "SHA1 schedule update 1.", + "html": "

SHA1 schedule update 1.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA256H2": + return { + "tooltip": "SHA256 hash update (part 2).", + "html": "

SHA256 hash update (part 2).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA256H": + return { + "tooltip": "SHA256 hash update (part 1).", + "html": "

SHA256 hash update (part 1).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA256SU0": + return { + "tooltip": "SHA256 schedule update 0.", + "html": "

SHA256 schedule update 0.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA256SU1": + return { + "tooltip": "SHA256 schedule update 1.", + "html": "

SHA256 schedule update 1.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA512H2": + return { + "tooltip": "SHA512 Hash update part 2 takes the values from the three 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the sigma0 and majority functions of two iterations of the SHA512 computation. It returns this value to the destination SIMD&FP register.", + "html": "

SHA512 Hash update part 2 takes the values from the three 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the sigma0 and majority functions of two iterations of the SHA512 computation. It returns this value to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SHA512 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA512H": + return { + "tooltip": "SHA512 Hash update part 1 takes the values from the three 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the sigma1 and chi functions of two iterations of the SHA512 computation. It returns this value to the destination SIMD&FP register.", + "html": "

SHA512 Hash update part 1 takes the values from the three 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the sigma1 and chi functions of two iterations of the SHA512 computation. It returns this value to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SHA512 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA512SU0": + return { + "tooltip": "SHA512 Schedule Update 0 takes the values from the two 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the gamma0 functions of two iterations of the SHA512 schedule update that are performed after the first 16 iterations within a block. It returns this value to the destination SIMD&FP register.", + "html": "

SHA512 Schedule Update 0 takes the values from the two 128-bit source SIMD&FP registers and produces a 128-bit output value that combines the gamma0 functions of two iterations of the SHA512 schedule update that are performed after the first 16 iterations within a block. It returns this value to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SHA512 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHA512SU1": + return { + "tooltip": "SHA512 Schedule Update 1 takes the values from the three source SIMD&FP registers and produces a 128-bit output value that combines the gamma1 functions of two iterations of the SHA512 schedule update that are performed after the first 16 iterations within a block. It returns this value to the destination SIMD&FP register.", + "html": "

SHA512 Schedule Update 1 takes the values from the three source SIMD&FP registers and produces a 128-bit output value that combines the gamma1 functions of two iterations of the SHA512 schedule update that are performed after the first 16 iterations within a block. It returns this value to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SHA512 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHADD": + return { + "tooltip": "Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

The results are truncated. For rounded results, see SRHADD.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHADD": + return { + "tooltip": "Add active signed elements of the first source vector to corresponding signed elements of the second source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active signed elements of the first source vector to corresponding signed elements of the second source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHL": + return { + "tooltip": "Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Shift Left (immediate). This instruction reads each value from a vector, left shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHLL": + case "SHLL2": + return { + "tooltip": "Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Shift Left Long (by element size). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, left shifts each result by the element size, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

The SHLL instruction extracts vector elements from the lower half of the source register. The SHLL2 instruction extracts vector elements from the upper half of the source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHRN": + case "SHRN2": + return { + "tooltip": "Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.", + "html": "

Shift Right Narrow (immediate). This instruction reads each unsigned integer value from the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. The results are truncated. For rounded results, see RSHRN.

The RSHRN instruction writes the vector to the lower half of the destination register and clears the upper half, while the RSHRN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHRNB": + return { + "tooltip": "Shift each unsigned integer value in the source vector elements right by an immediate value, and place the truncated results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each unsigned integer value in the source vector elements right by an immediate value, and place the truncated results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHRNT": + return { + "tooltip": "Shift each unsigned integer value in the source vector elements right by an immediate value, and place the truncated results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each unsigned integer value in the source vector elements right by an immediate value, and place the truncated results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHSUB": + return { + "tooltip": "Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&FP register from the corresponding elements in the vector in the first source SIMD&FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Halving Subtract. This instruction subtracts the elements in the vector in the second source SIMD&FP register from the corresponding elements in the vector in the first source SIMD&FP register, shifts each result right one bit, places each result into elements of a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHSUB": + return { + "tooltip": "Subtract active signed elements of the second source vector from corresponding signed elements of the first source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active signed elements of the second source vector from corresponding signed elements of the first source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SHSUBR": + return { + "tooltip": "Subtract active signed elements of the first source vector from corresponding signed elements of the second source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active signed elements of the first source vector from corresponding signed elements of the second source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SLI": + return { + "tooltip": "Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.", + "html": "

Shift Left and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, left shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the left of each vector element in the source register are lost.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SLI": + return { + "tooltip": "Shift each source vector element left by an immediate value, and insert the result into the corresponding vector element in the destination vector register, merging the shifted bits from each source element with existing bits in each destination vector element. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.", + "html": "

Shift each source vector element left by an immediate value, and insert the result into the corresponding vector element in the destination vector register, merging the shifted bits from each source element with existing bits in each destination vector element. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM3PARTW1": + return { + "tooltip": "SM3PARTW1 takes three 128-bit vectors from the three source SIMD&FP registers and returns a 128-bit result in the destination SIMD&FP register. The result is obtained by a three-way exclusive-OR of the elements within the input vectors with some fixed rotations, see the Operation pseudocode for more information.", + "html": "

SM3PARTW1 takes three 128-bit vectors from the three source SIMD&FP registers and returns a 128-bit result in the destination SIMD&FP register. The result is obtained by a three-way exclusive-OR of the elements within the input vectors with some fixed rotations, see the Operation pseudocode for more information.

This instruction is implemented only when FEAT_SM3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM3PARTW2": + return { + "tooltip": "SM3PARTW2 takes three 128-bit vectors from three source SIMD&FP registers and returns a 128-bit result in the destination SIMD&FP register. The result is obtained by a three-way exclusive-OR of the elements within the input vectors with some fixed rotations, see the Operation pseudocode for more information.", + "html": "

SM3PARTW2 takes three 128-bit vectors from three source SIMD&FP registers and returns a 128-bit result in the destination SIMD&FP register. The result is obtained by a three-way exclusive-OR of the elements within the input vectors with some fixed rotations, see the Operation pseudocode for more information.

This instruction is implemented only when FEAT_SM3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM3SS1": + return { + "tooltip": "SM3SS1 rotates the top 32 bits of the 128-bit vector in the first source SIMD&FP register by 12, and adds that 32-bit value to the two other 32-bit values held in the top 32 bits of each of the 128-bit vectors in the second and third source SIMD&FP registers, rotating this result left by 7 and writing the final result into the top 32 bits of the vector in the destination SIMD&FP register, with the bottom 96 bits of the vector being written to 0.", + "html": "

SM3SS1 rotates the top 32 bits of the 128-bit vector in the first source SIMD&FP register by 12, and adds that 32-bit value to the two other 32-bit values held in the top 32 bits of each of the 128-bit vectors in the second and third source SIMD&FP registers, rotating this result left by 7 and writing the final result into the top 32 bits of the vector in the destination SIMD&FP register, with the bottom 96 bits of the vector being written to 0.

This instruction is implemented only when FEAT_SM3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM3TT1A": + return { + "tooltip": "SM3TT1A takes three 128-bit vectors from three source SIMD&FP registers and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a three-way exclusive-OR of the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values", + "html": "

SM3TT1A takes three 128-bit vectors from three source SIMD&FP registers and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a three-way exclusive-OR of the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values:

The result of this addition is returned as the top element of the result. The other elements of the result are taken from elements of the first source vector, with the element returned in bits<63:32> being rotated left by 9.

This instruction is implemented only when FEAT_SM3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM3TT1B": + return { + "tooltip": "SM3TT1B takes three 128-bit vectors from three source SIMD&FP registers and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a 32-bit majority function between the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values", + "html": "

SM3TT1B takes three 128-bit vectors from three source SIMD&FP registers and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a 32-bit majority function between the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values:

The result of this addition is returned as the top element of the result. The other elements of the result are taken from elements of the first source vector, with the element returned in bits<63:32> being rotated left by 9.

This instruction is implemented only when FEAT_SM3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM3TT2A": + return { + "tooltip": "SM3TT2A takes three 128-bit vectors from three source SIMD&FP register and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a three-way exclusive-OR of the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values", + "html": "

SM3TT2A takes three 128-bit vectors from three source SIMD&FP register and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a three-way exclusive-OR of the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values:

A three-way exclusive-OR is performed of the result of this addition, the result of the addition rotated left by 9, and the result of the addition rotated left by 17. The result of this exclusive-OR is returned as the top element of the returned result. The other elements of this result are taken from elements of the first source vector, with the element returned in bits<63:32> being rotated left by 19.

This instruction is implemented only when FEAT_SM3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM3TT2B": + return { + "tooltip": "SM3TT2B takes three 128-bit vectors from three source SIMD&FP registers, and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a 32-bit majority function between the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values", + "html": "

SM3TT2B takes three 128-bit vectors from three source SIMD&FP registers, and a 2-bit immediate index value, and returns a 128-bit result in the destination SIMD&FP register. It performs a 32-bit majority function between the three 32-bit fields held in the upper three elements of the first source vector, and adds the resulting 32-bit value and the following three other 32-bit values:

A three-way exclusive-OR is performed of the result of this addition, the result of the addition rotated left by 9, and the result of the addition rotated left by 17. The result of this exclusive-OR is returned as the top element of the returned result. The other elements of this result are taken from elements of the first source vector, with the element returned in bits<63:32> being rotated left by 19.

This instruction is implemented only when FEAT_SM3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM4E": + return { + "tooltip": "SM4 Encode takes input data as a 128-bit vector from the first source SIMD&FP register, and four iterations of the round key held as the elements of the 128-bit vector in the second source SIMD&FP register. It encrypts the data by four rounds, in accordance with the SM4 standard, returning the 128-bit result to the destination SIMD&FP register.", + "html": "

SM4 Encode takes input data as a 128-bit vector from the first source SIMD&FP register, and four iterations of the round key held as the elements of the 128-bit vector in the second source SIMD&FP register. It encrypts the data by four rounds, in accordance with the SM4 standard, returning the 128-bit result to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SM4 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM4E": + return { + "tooltip": "The SM4E instruction reads 16 bytes of input data from each 128-bit segment of the first source vector, together with four iterations of 32-bit round keys from the corresponding 128-bit segments of the second source vector. Each block of data is encrypted by four rounds in accordance with the SM4 standard, and destructively placed in the corresponding segments of the first source vector. This instruction is unpredicated.", + "html": "

The SM4E instruction reads 16 bytes of input data from each 128-bit segment of the first source vector, together with four iterations of 32-bit round keys from the corresponding 128-bit segments of the second source vector. Each block of data is encrypted by four rounds in accordance with the SM4 standard, and destructively placed in the corresponding segments of the first source vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.SM4 indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM4EKEY": + return { + "tooltip": "SM4 Key takes an input as a 128-bit vector from the first source SIMD&FP register and a 128-bit constant from the second SIMD&FP register. It derives four iterations of the output key, in accordance with the SM4 standard, returning the 128-bit result to the destination SIMD&FP register.", + "html": "

SM4 Key takes an input as a 128-bit vector from the first source SIMD&FP register and a 128-bit constant from the second SIMD&FP register. It derives four iterations of the output key, in accordance with the SM4 standard, returning the 128-bit result to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SM4 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SM4EKEY": + return { + "tooltip": "The SM4EKEY instruction reads four rounds of 32-bit input key values from each 128-bit segment of the first source vector, along with four rounds of 32-bit constants from the corresponding 128-bit segment of the second source vector. The four rounds of output key values are derived in accordance with the SM4 standard, and placed in the corresponding segments of the destination vector. This instruction is unpredicated.", + "html": "

The SM4EKEY instruction reads four rounds of 32-bit input key values from each 128-bit segment of the first source vector, along with four rounds of 32-bit constants from the corresponding 128-bit segment of the second source vector. The four rounds of output key values are derived in accordance with the SM4 standard, and placed in the corresponding segments of the destination vector. This instruction is unpredicated.

ID_AA64ZFR0_EL1.SM4 indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMADDL": + return { + "tooltip": "Signed Multiply-Add Long multiplies two 32-bit register values, adds a 64-bit register value, and writes the result to the 64-bit destination register.", + "html": "

Signed Multiply-Add Long multiplies two 32-bit register values, adds a 64-bit register value, and writes the result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAX": + return { + "tooltip": "Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAX": + return { + "tooltip": "Signed Maximum (immediate) determines the signed maximum of the source register value and immediate, and writes the result to the destination register.", + "html": "

Signed Maximum (immediate) determines the signed maximum of the source register value and immediate, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAX": + return { + "tooltip": "Determine the signed maximum of elements of the second source vector and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the signed maximum of elements of the second source vector and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAX": + return { + "tooltip": "Determine the signed maximum of elements of the two or four second source vectors and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the signed maximum of elements of the two or four second source vectors and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAX": + return { + "tooltip": "Signed Maximum (register) determines the signed maximum of the two source register values and writes the result to the destination register.", + "html": "

Signed Maximum (register) determines the signed maximum of the two source register values and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAX": + return { + "tooltip": "Determine the signed maximum of active elements of the second source vector and corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Determine the signed maximum of active elements of the second source vector and corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAX": + return { + "tooltip": "Determine the signed maximum of an immediate and each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a signed 8-bit value in the range -128 to +127, inclusive. This instruction is unpredicated.", + "html": "

Determine the signed maximum of an immediate and each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a signed 8-bit value in the range -128 to +127, inclusive. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAXP": + return { + "tooltip": "Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAXP": + return { + "tooltip": "Compute the maximum value of each pair of adjacent signed integer elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Compute the maximum value of each pair of adjacent signed integer elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAXQV": + return { + "tooltip": "Signed maximum of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the minimum signed integer for the element size.", + "html": "

Signed maximum of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the minimum signed integer for the element size.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAXV": + return { + "tooltip": "Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.", + "html": "

Signed Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMAXV": + return { + "tooltip": "Signed maximum horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the minimum signed integer for the element size.", + "html": "

Signed maximum horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the minimum signed integer for the element size.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMC": + return { + "tooltip": "Secure Monitor Call causes an exception to EL3.", + "html": "

Secure Monitor Call causes an exception to EL3.

SMC is available only for software executing at EL1 or higher. It is undefined in EL0.

If the values of HCR_EL2.TSC and SCR_EL3.SMD are both 0, execution of an SMC instruction at EL1 or higher generates a Secure Monitor Call exception, recording it in ESR_ELx, using the EC value 0x17, that is taken to EL3.

If the value of HCR_EL2.TSC is 1 and EL2 is enabled in the current Security state, execution of an SMC instruction at EL1 generates an exception that is taken to EL2, regardless of the value of SCR_EL3.SMD.

If the value of HCR_EL2.TSC is 0 and the value of SCR_EL3.SMD is 1, the SMC instruction is undefined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMIN": + return { + "tooltip": "Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Minimum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the smaller of each of the two signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMIN": + return { + "tooltip": "Signed Minimum (immediate) determines the signed minimum of the source register value and immediate, and writes the result to the destination register.", + "html": "

Signed Minimum (immediate) determines the signed minimum of the source register value and immediate, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMIN": + return { + "tooltip": "Determine the signed minimum of elements of the second source vector and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the signed minimum of elements of the second source vector and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMIN": + return { + "tooltip": "Determine the signed minimum of elements of the two or four second source vectors and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the signed minimum of elements of the two or four second source vectors and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMIN": + return { + "tooltip": "Signed Minimum (register) determines the signed minimum of the two source register values and writes the result to the destination register.", + "html": "

Signed Minimum (register) determines the signed minimum of the two source register values and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMIN": + return { + "tooltip": "Determine the signed minimum of active elements of the second source vector and corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Determine the signed minimum of active elements of the second source vector and corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMIN": + return { + "tooltip": "Determine the signed minimum of an immediate and each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a signed 8-bit value in the range -128 to +127, inclusive. This instruction is unpredicated.", + "html": "

Determine the signed minimum of an immediate and each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is a signed 8-bit value in the range -128 to +127, inclusive. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMINP": + return { + "tooltip": "Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of signed integer values into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMINP": + return { + "tooltip": "Compute the minimum value of each pair of adjacent signed integer elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Compute the minimum value of each pair of adjacent signed integer elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMINQV": + return { + "tooltip": "Signed minimum of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the maximum signed integer for the element size.", + "html": "

Signed minimum of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the maximum signed integer for the element size.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMINV": + return { + "tooltip": "Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.", + "html": "

Signed Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are signed integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMINV": + return { + "tooltip": "Signed minimum horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the maximum signed integer for the element size.", + "html": "

Signed minimum horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the maximum signed integer for the element size.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLAL": + case "SMLAL2": + return { + "tooltip": "Signed Multiply-Add Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element in the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are signed integer values.", + "html": "

Signed Multiply-Add Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element in the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are signed integer values.

The SMLAL instruction extracts vector elements from the lower half of the first source register. The SMLAL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLAL": + case "SMLAL2": + return { + "tooltip": "Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Signed Multiply-Add Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

The SMLAL instruction extracts each source vector from the lower half of each source register. The SMLAL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLAL": + return { + "tooltip": "This signed integer multiply-add long instruction multiplies each signed 16-bit element in the one, two, or four first source vectors with each signed 16-bit indexed element of the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups.", + "html": "

This signed integer multiply-add long instruction multiplies each signed 16-bit element in the one, two, or four first source vectors with each signed 16-bit indexed element of the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7, encoded in 3 bits. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLAL": + return { + "tooltip": "This signed integer multiply-add long instruction multiplies each signed 16-bit element in the one, two, or four first source vectors with each signed 16-bit element in the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This signed integer multiply-add long instruction multiplies each signed 16-bit element in the one, two, or four first source vectors with each signed 16-bit element in the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLAL": + return { + "tooltip": "This signed integer multiply-add long instruction multiplies each signed 16-bit element in the two or four first source vectors with each signed 16-bit element in the two or four second source vectors, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This signed integer multiply-add long instruction multiplies each signed 16-bit element in the two or four first source vectors with each signed 16-bit element in the two or four second source vectors, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLALB": + return { + "tooltip": "Multiply the corresponding even-numbered signed elements of the first and second source vectors and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding even-numbered signed elements of the first and second source vectors and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLALB": + return { + "tooltip": "Multiply the even-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment and destructively add to the overlapping double-width elements of the addend vector.", + "html": "

Multiply the even-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment and destructively add to the overlapping double-width elements of the addend vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLALL": + return { + "tooltip": "This signed integer multiply-add long-long instruction multiplies each signed 8-bit or 16-bit element in the one, two, or four first source vectors with each signed 8-bit or 16-bit indexed element of second source vector, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups.", + "html": "

This signed integer multiply-add long-long instruction multiplies each signed 8-bit or 16-bit element in the one, two, or four first source vectors with each signed 8-bit or 16-bit indexed element of second source vector, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 3 to 4 bits depending on the size of the element. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLALL": + return { + "tooltip": "This signed integer multiply-add long-long instruction multiplies each signed 8-bit or 16-bit element in the one, two, or four first source vectors with each signed 8-bit or 16-bit element in the second source vector, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This signed integer multiply-add long-long instruction multiplies each signed 8-bit or 16-bit element in the one, two, or four first source vectors with each signed 8-bit or 16-bit element in the second source vector, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLALL": + return { + "tooltip": "This signed integer multiply-add long-long instruction multiplies each signed 8-bit or 16-bit element in the two or four first source vectors with each signed 8-bit or 16-bit element in the one, two, or four second source vectors, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This signed integer multiply-add long-long instruction multiplies each signed 8-bit or 16-bit element in the two or four first source vectors with each signed 8-bit or 16-bit element in the one, two, or four second source vectors, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLALT": + return { + "tooltip": "Multiply the corresponding odd-numbered signed elements of the first and second source vectors and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding odd-numbered signed elements of the first and second source vectors and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLALT": + return { + "tooltip": "Multiply the odd-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment and destructively add to the overlapping double-width elements of the addend vector.", + "html": "

Multiply the odd-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment and destructively add to the overlapping double-width elements of the addend vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSL": + case "SMLSL2": + return { + "tooltip": "Signed Multiply-Subtract Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Signed Multiply-Subtract Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

The SMLSL instruction extracts vector elements from the lower half of the first source register. The SMLSL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSL": + case "SMLSL2": + return { + "tooltip": "Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Signed Multiply-Subtract Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

The SMLSL instruction extracts each source vector from the lower half of each source register. The SMLSL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSL": + return { + "tooltip": "This signed integer multiply-subtract long instruction multiplies each signed 16-bit element in the one, two, or four first source vectors with each signed 16-bit indexed element of the second source vector, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups.", + "html": "

This signed integer multiply-subtract long instruction multiplies each signed 16-bit element in the one, two, or four first source vectors with each signed 16-bit indexed element of the second source vector, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7, encoded in 3 bits. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSL": + return { + "tooltip": "This signed integer multiply-subtract long instruction multiplies each signed 16-bit element in the one, two, or four first source vectors with each signed 16-bit element in the second source vector, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This signed integer multiply-subtract long instruction multiplies each signed 16-bit element in the one, two, or four first source vectors with each signed 16-bit element in the second source vector, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSL": + return { + "tooltip": "This signed integer multiply-subtract long instruction multiplies each signed 16-bit element in the two or four first source vectors with each signed 16-bit element in the two or four second source vectors, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This signed integer multiply-subtract long instruction multiplies each signed 16-bit element in the two or four first source vectors with each signed 16-bit element in the two or four second source vectors, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSLB": + return { + "tooltip": "Multiply the corresponding even-numbered signed elements of the first and second source vectors and destructively subtract from the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding even-numbered signed elements of the first and second source vectors and destructively subtract from the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSLB": + return { + "tooltip": "Multiply the even-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment and destructively subtract from the overlapping double-width elements of the addend vector.", + "html": "

Multiply the even-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment and destructively subtract from the overlapping double-width elements of the addend vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSLL": + return { + "tooltip": "This signed integer multiply-subtract long-long instruction multiplies each signed 8-bit or 16-bit element in the one, two, or four first source vectors with each signed 8-bit or 16-bit indexed element of second source vector, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups.", + "html": "

This signed integer multiply-subtract long-long instruction multiplies each signed 8-bit or 16-bit element in the one, two, or four first source vectors with each signed 8-bit or 16-bit indexed element of second source vector, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 3 to 4 bits depending on the size of the element. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSLL": + return { + "tooltip": "This signed integer multiply-subtract long-long instruction multiplies each signed 8-bit or 16-bit element in the one, two, or four first source vectors with each signed 8-bit or 16-bit element in the second source vector, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This signed integer multiply-subtract long-long instruction multiplies each signed 8-bit or 16-bit element in the one, two, or four first source vectors with each signed 8-bit or 16-bit element in the second source vector, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSLL": + return { + "tooltip": "This signed integer multiply-subtract long-long instruction multiplies each signed 8-bit or 16-bit element in the two or four first source vectors with each signed 8-bit or 16-bit element in the one, two, or four second source vectors, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This signed integer multiply-subtract long-long instruction multiplies each signed 8-bit or 16-bit element in the two or four first source vectors with each signed 8-bit or 16-bit element in the one, two, or four second source vectors, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSLT": + return { + "tooltip": "Multiply the corresponding odd-numbered signed elements of the first and second source vectors and destructively subtract from the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding odd-numbered signed elements of the first and second source vectors and destructively subtract from the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMLSLT": + return { + "tooltip": "Multiply the odd-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment and destructively subtract from the overlapping double-width elements of the addend vector.", + "html": "

Multiply the odd-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment and destructively subtract from the overlapping double-width elements of the addend vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMMLA": + return { + "tooltip": "Signed 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of signed 8-bit integer values in the first source vector by the 8x2 matrix of signed 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.", + "html": "

Signed 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of signed 8-bit integer values in the first source vector by the 8x2 matrix of signed 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.

From Armv8.2 to Armv8.5, this is an optional instruction. From Armv8.6 it is mandatory for implementations that include Advanced SIMD to support it. ID_AA64ISAR1_EL1.I8MM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMMLA": + return { + "tooltip": "The signed integer matrix multiply-accumulate instruction multiplies the 2\u00d78 matrix of signed 8-bit integer values held in each 128-bit segment of the first source vector by the 8\u00d72 matrix of signed 8-bit integer values in the corresponding segment of the second source vector. The resulting 2\u00d72 widened 32-bit integer matrix product is then destructively added to the 32-bit integer matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing an 8-way dot product per destination element.", + "html": "

The signed integer matrix multiply-accumulate instruction multiplies the 2\u00d78 matrix of signed 8-bit integer values held in each 128-bit segment of the first source vector by the 8\u00d72 matrix of signed 8-bit integer values in the corresponding segment of the second source vector. The resulting 2\u00d72 widened 32-bit integer matrix product is then destructively added to the 32-bit integer matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing an 8-way dot product per destination element.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.I8MM indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMNEGL": + return { + "tooltip": "Signed Multiply-Negate Long multiplies two 32-bit register values, negates the product, and writes the result to the 64-bit destination register.", + "html": "

Signed Multiply-Negate Long multiplies two 32-bit register values, negates the product, and writes the result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMOPA": + return { + "tooltip": "This instruction works with a 32-bit element ZA tile.", + "html": "

This instruction works with a 32-bit element ZA tile.

The signed integer sum of outer products and accumulate instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. The first source holds SVLS\u00d72 sub-matrix of signed 16-bit integer values, and the second source holds 2\u00d7SVLS sub-matrix of signed 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When a 16-bit source element is inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer sum of outer products is then destructively added to the 32-bit integer destination tile. This is equivalent to performing a 2-way dot product and accumulate to each of the destination tile elements.

Each 32-bit container of the first source vector holds 2 consecutive column elements of each row of a SVLS\u00d72 sub-matrix, and each 32-bit container of the second source vector holds 2 consecutive row elements of each column of a 2\u00d7SVLS sub-matrix.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMOPA": + return { + "tooltip": "The 8-bit integer variant works with a 32-bit element ZA tile.", + "html": "

The 8-bit integer variant works with a 32-bit element ZA tile.

The 16-bit integer variant works with a 64-bit element ZA tile.

The signed integer sum of outer products and accumulate instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. In case of the 8-bit integer variant, the first source holds SVLS\u00d74 sub-matrix of signed 8-bit integer values, and the second source holds 4\u00d7SVLS sub-matrix of signed 8-bit integer values. In case of the 16-bit integer variant, the first source holds SVLD\u00d74 sub-matrix of signed 16-bit integer values, and the second source holds 4\u00d7SVLD sub-matrix of signed 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When an 8-bit source element in case of 8-bit integer variant or a 16-bit source element in case of 16-bit integer variant is Inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer or SVLD\u00d7SVLD widened 64-bit integer sum of outer products is then destructively added to the 32-bit integer or 64-bit integer destination tile, respectively for 8-bit integer and 16-bit integer instruction variants. This is equivalent to performing a 4-way dot product and accumulate to each of the destination tile elements.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMOPS": + return { + "tooltip": "This instruction works with a 32-bit element ZA tile.", + "html": "

This instruction works with a 32-bit element ZA tile.

The signed integer sum of outer products and subtract instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. The first source holds SVLS\u00d72 sub-matrix of signed 16-bit integer values, and the second source holds 2\u00d7SVLS sub-matrix of signed 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When a 16-bit source element is inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer sum of outer products is then destructively subtracted from the 32-bit integer destination tile. This is equivalent to performing a 2-way dot product and subtract from each of the destination tile elements.

Each 32-bit container of the first source vector holds 2 consecutive column elements of each row of a SVLS\u00d72 sub-matrix, and each 32-bit container of the second source vector holds 2 consecutive row elements of each column of a 2\u00d7SVLS sub-matrix.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMOPS": + return { + "tooltip": "The 8-bit integer variant works with a 32-bit element ZA tile.", + "html": "

The 8-bit integer variant works with a 32-bit element ZA tile.

The 16-bit integer variant works with a 64-bit element ZA tile.

The signed integer sum of outer products and subtract instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. In case of the 8-bit integer variant, the first source holds SVLS\u00d74 sub-matrix of signed 8-bit integer values, and the second source holds 4\u00d7SVLS sub-matrix of signed 8-bit integer values. In case of the 16-bit integer variant, the first source holds SVLD\u00d74 sub-matrix of signed 16-bit integer values, and the second source holds 4\u00d7SVLD sub-matrix of signed 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When an 8-bit source element in case of 8-bit integer variant or a 16-bit source element in case of 16-bit integer variant is Inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer or SVLD\u00d7SVLD widened 64-bit integer sum of outer products is then destructively subtracted from the 32-bit integer or 64-bit integer destination tile, respectively for 8-bit integer and 16-bit integer instruction variants. This is equivalent to performing a 4-way dot product and subtract from each of the destination tile elements.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMOV": + return { + "tooltip": "Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.", + "html": "

Signed Move vector element to general-purpose register. This instruction reads the signed integer from the source SIMD&FP register, sign-extends it to form a 32-bit or 64-bit value, and writes the result to destination general-purpose register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMSTART": + return { + "tooltip": "Enables access to Streaming SVE mode and SME architectural state.", + "html": "

Enables access to Streaming SVE mode and SME architectural state.

SMSTART enters Streaming SVE mode, and enables the SME ZA storage.

SMSTART SM enters Streaming SVE mode, but does not enable the SME ZA storage.

SMSTART ZA enables the SME ZA storage, but does not cause an entry to Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMSTOP": + return { + "tooltip": "Disables access to Streaming SVE mode and SME architectural state.", + "html": "

Disables access to Streaming SVE mode and SME architectural state.

SMSTOP exits Streaming SVE mode, and disables the SME ZA storage.

SMSTOP SM exits Streaming SVE mode, but does not disable the SME ZA storage.

SMSTOP ZA disables the SME ZA storage, but does not cause an exit from Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMSUBL": + return { + "tooltip": "Signed Multiply-Subtract Long multiplies two 32-bit register values, subtracts the product from a 64-bit register value, and writes the result to the 64-bit destination register.", + "html": "

Signed Multiply-Subtract Long multiplies two 32-bit register values, subtracts the product from a 64-bit register value, and writes the result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULH": + return { + "tooltip": "Signed Multiply High multiplies two 64-bit register values, and writes bits[127:64] of the 128-bit result to the 64-bit destination register.", + "html": "

Signed Multiply High multiplies two 64-bit register values, and writes bits[127:64] of the 128-bit result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULH": + return { + "tooltip": "Widening multiply signed integer values in active elements of the first source vector by corresponding elements of the second source vector and destructively place the high half of the result in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Widening multiply signed integer values in active elements of the first source vector by corresponding elements of the second source vector and destructively place the high half of the result in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULH": + return { + "tooltip": "Widening multiply signed integer values of all elements of the first source vector by corresponding elements of the second source vector and place the high half of the result in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Widening multiply signed integer values of all elements of the first source vector by corresponding elements of the second source vector and place the high half of the result in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULL": + case "SMULL2": + return { + "tooltip": "Signed Multiply Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Signed Multiply Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

The SMULL instruction extracts vector elements from the lower half of the first source register. The SMULL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULL": + case "SMULL2": + return { + "tooltip": "Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Multiply Long (vector). This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, places the results in a vector, and writes the vector to the destination SIMD&FP register.

The destination vector elements are twice as long as the elements that are multiplied.

The SMULL instruction extracts each source vector from the lower half of each source register. The SMULL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULL": + return { + "tooltip": "Signed Multiply Long multiplies two 32-bit register values, and writes the result to the 64-bit destination register.", + "html": "

Signed Multiply Long multiplies two 32-bit register values, and writes the result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULLB": + return { + "tooltip": "Multiply the corresponding even-numbered signed elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding even-numbered signed elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULLB": + return { + "tooltip": "Multiply the even-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment, and place the results in the overlapping double-width elements of the destination vector register.", + "html": "

Multiply the even-numbered signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment, and place the results in the overlapping double-width elements of the destination vector register.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULLT": + return { + "tooltip": "Multiply the corresponding odd-numbered signed elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding odd-numbered signed elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SMULLT": + return { + "tooltip": "Multiply the odd-numbered signed elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment, and place the results in the overlapping double-width elements of the destination vector register.", + "html": "

Multiply the odd-numbered signed elements within each 128-bit segment of the first source vector by the specified element in the corresponding second source vector segment, and place the results in the overlapping double-width elements of the destination vector register.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SPLICE": + return { + "tooltip": "Select a region from the first source vector and copy it to the lowest-numbered elements of the result. Then set any remaining elements of the result to a copy of the lowest-numbered elements from the second source vector. The region is selected using the first and last true elements in the vector select predicate register. The result is placed destructively in the destination and first source vector, or constructively in the destination vector.", + "html": "

Select a region from the first source vector and copy it to the lowest-numbered elements of the result. Then set any remaining elements of the result to a copy of the lowest-numbered elements from the second source vector. The region is selected using the first and last true elements in the vector select predicate register. The result is placed destructively in the destination and first source vector, or constructively in the destination vector.

The Destructive encoding of this instruction might be immediately preceded in program order by a MOVPRFX instruction. The MOVPRFX instruction must conform to all of the following requirements, otherwise the behavior of the MOVPRFX and this instruction is UNPREDICTABLE: The MOVPRFX instruction must be unpredicated. The MOVPRFX instruction must specify the same destination register as this instruction. The destination register must not refer to architectural register state referenced by any other source operand register of this instruction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQABS": + return { + "tooltip": "Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.", + "html": "

Signed saturating Absolute value. This instruction reads each vector element from the source SIMD&FP register, puts the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQABS": + return { + "tooltip": "Compute the absolute value of the signed integer in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Compute the absolute value of the signed integer in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQADD": + return { + "tooltip": "Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQADD": + return { + "tooltip": "Add active signed elements of the first source vector to corresponding signed elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active signed elements of the first source vector to corresponding signed elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQADD": + return { + "tooltip": "Signed saturating add of an unsigned immediate to each element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Signed saturating add of an unsigned immediate to each element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

The immediate is an unsigned value in the range 0 to 255, and for element widths of 16 bits or higher it may also be a positive multiple of 256 in the range 256 to 65280.

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<uimm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQADD": + return { + "tooltip": "Signed saturating add all elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Signed saturating add all elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCADD": + return { + "tooltip": "Add the real and imaginary components of the integral complex numbers from the first source vector to the complex numbers from the second source vector which have first been rotated by 90 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation, equivalent to multiplying the complex numbers in the second source vector by \u00b1j beforehand. Destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Add the real and imaginary components of the integral complex numbers from the first source vector to the complex numbers from the second source vector which have first been rotated by 90 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation, equivalent to multiplying the complex numbers in the second source vector by \u00b1j beforehand. Destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCVT": + return { + "tooltip": "Saturate the signed integer value in each element of the two source vectors to half the original source element width, and place the results in the half-width destination elements.", + "html": "

Saturate the signed integer value in each element of the two source vectors to half the original source element width, and place the results in the half-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCVT": + return { + "tooltip": "Saturate the signed integer value in each element of the four source vectors to quarter the original source element width, and place the results in the quarter-width destination elements.", + "html": "

Saturate the signed integer value in each element of the four source vectors to quarter the original source element width, and place the results in the quarter-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCVTN": + return { + "tooltip": "Saturate the signed integer value in each element of the group of two source vectors to half the original source element width, and place the two-way interleaved results in the half-width destination elements.", + "html": "

Saturate the signed integer value in each element of the group of two source vectors to half the original source element width, and place the two-way interleaved results in the half-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCVTN": + return { + "tooltip": "Saturate the signed integer value in each element of the four source vectors to quarter the original source element width, and place the four-way interleaved results in the quarter-width destination elements.", + "html": "

Saturate the signed integer value in each element of the four source vectors to quarter the original source element width, and place the four-way interleaved results in the quarter-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCVTU": + return { + "tooltip": "Saturate the signed integer value in each element of the two source vectors to unsigned integer value that is half the original source element width, and place the results in the half-width destination elements.", + "html": "

Saturate the signed integer value in each element of the two source vectors to unsigned integer value that is half the original source element width, and place the results in the half-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCVTU": + return { + "tooltip": "Saturate the signed integer value in each element of the four source vectors to unsigned integer value that is quarter the original source element width, and place the results in the quarter-width destination elements.", + "html": "

Saturate the signed integer value in each element of the four source vectors to unsigned integer value that is quarter the original source element width, and place the results in the quarter-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCVTUN": + return { + "tooltip": "Saturate the signed integer value in each element of the group of two source vectors to unsigned integer value that is half the original source element width, and place the two-way interleaved results in the half-width destination elements.", + "html": "

Saturate the signed integer value in each element of the group of two source vectors to unsigned integer value that is half the original source element width, and place the two-way interleaved results in the half-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQCVTUN": + return { + "tooltip": "Saturate the signed integer value in each element of the four source vectors to unsigned integer value that is quarter the original source element width, and place the four-way interleaved results in the quarter-width destination elements.", + "html": "

Saturate the signed integer value in each element of the four source vectors to unsigned integer value that is quarter the original source element width, and place the four-way interleaved results in the quarter-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECB": + return { + "tooltip": "Determines the number of active 8-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Determines the number of active 8-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECD": + return { + "tooltip": "Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECD": + return { + "tooltip": "Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 64-bit signed integer range.", + "html": "

Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 64-bit signed integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECH": + return { + "tooltip": "Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECH": + return { + "tooltip": "Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 16-bit signed integer range.", + "html": "

Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 16-bit signed integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to decrement all destination vector elements. The results are saturated to the element signed integer range.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to decrement all destination vector elements. The results are saturated to the element signed integer range.

The predicate size specifier may be omitted in assembler source code, but this is deprecated and will be prohibited in a future release of the architecture.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECW": + return { + "tooltip": "Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDECW": + return { + "tooltip": "Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 32-bit signed integer range.", + "html": "

Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 32-bit signed integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLAL": + case "SQDMLAL2": + return { + "tooltip": "Signed saturating Doubling Multiply-Add Long (by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Signed saturating Doubling Multiply-Add Long (by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The SQDMLAL instruction extracts vector elements from the lower half of the first source register. The SQDMLAL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLAL": + case "SQDMLAL2": + return { + "tooltip": "Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Signed saturating Doubling Multiply-Add Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and accumulates the final results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The SQDMLAL instruction extracts each source vector from the lower half of each source register. The SQDMLAL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLALB": + return { + "tooltip": "Multiply then double the corresponding even-numbered signed elements of the first and second source vectors. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding even-numbered signed elements of the first and second source vectors. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLALB": + return { + "tooltip": "Multiply then double the even-numbered signed elements within each 128-bit segment of the first source vector and specified signed element in the corresponding second source vector segment. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply then double the even-numbered signed elements within each 128-bit segment of the first source vector and specified signed element in the corresponding second source vector segment. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLALBT": + return { + "tooltip": "Multiply then double the corresponding even-numbered signed elements of the first and odd-numbered signed elements of the second source vector. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding even-numbered signed elements of the first and odd-numbered signed elements of the second source vector. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLALT": + return { + "tooltip": "Multiply then double the corresponding odd-numbered signed elements of the first and second source vectors. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding odd-numbered signed elements of the first and second source vectors. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLALT": + return { + "tooltip": "Multiply then double the odd-numbered signed elements within each 128-bit segment of the first source vector and the specified signed element in the corresponding second source vector segment. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply then double the odd-numbered signed elements within each 128-bit segment of the first source vector and the specified signed element in the corresponding second source vector segment. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively add to the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLSL": + case "SQDMLSL2": + return { + "tooltip": "Signed saturating Doubling Multiply-Subtract Long (by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are signed integer values.", + "html": "

Signed saturating Doubling Multiply-Subtract Long (by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are signed integer values.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The SQDMLSL instruction extracts vector elements from the lower half of the first source register. The SQDMLSL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLSL": + case "SQDMLSL2": + return { + "tooltip": "Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Signed saturating Doubling Multiply-Subtract Long. This instruction multiplies corresponding signed integer values in the lower or upper half of the vectors of the two source SIMD&FP registers, doubles the results, and subtracts the final results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The SQDMLSL instruction extracts each source vector from the lower half of each source register. The SQDMLSL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLSLB": + return { + "tooltip": "Multiply then double the corresponding even-numbered signed elements of the first and second source vectors. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding even-numbered signed elements of the first and second source vectors. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLSLB": + return { + "tooltip": "Multiply then double the even-numbered signed elements within each 128-bit segment of the first source vector and the specified signed element in the corresponding second source vector segment. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply then double the even-numbered signed elements within each 128-bit segment of the first source vector and the specified signed element in the corresponding second source vector segment. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLSLBT": + return { + "tooltip": "Multiply then double the corresponding even-numbered signed elements of the first and odd-numbered signed elements of the second source vector. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding even-numbered signed elements of the first and odd-numbered signed elements of the second source vector. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLSLT": + return { + "tooltip": "Multiply then double the corresponding odd-numbered signed elements of the first and second source vectors. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding odd-numbered signed elements of the first and second source vectors. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMLSLT": + return { + "tooltip": "Multiply then double the odd-numbered signed elements within each 128-bit segment of the first source vector and the specified signed element in the corresponding second source vector segment. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply then double the odd-numbered signed elements within each 128-bit segment of the first source vector and the specified signed element in the corresponding second source vector segment. Each intermediate value is saturated to the double-width N-bit value's signed integer range -2(N-1) to (2(N-1) )-1. Then destructively subtract from the overlapping double-width elements of the addend and destination vector. Each destination element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULH": + return { + "tooltip": "Signed saturating Doubling Multiply returning High half (by element). This instruction multiplies each vector element in the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Doubling Multiply returning High half (by element). This instruction multiplies each vector element in the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

The results are truncated. For rounded results, see SQRDMULH.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULH": + return { + "tooltip": "Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

The results are truncated. For rounded results, see SQRDMULH.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULH": + return { + "tooltip": "Multiply then double the corresponding signed elements of the two or four first source vectors and the signed elements of the second source vector, and destructively place the most significant half of the result in the corresponding elements of the two or four first source vectors. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1))-1.", + "html": "

Multiply then double the corresponding signed elements of the two or four first source vectors and the signed elements of the second source vector, and destructively place the most significant half of the result in the corresponding elements of the two or four first source vectors. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1))-1.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULH": + return { + "tooltip": "Multiply then double the corresponding signed elements of the two or four first and second source vectors, and destructively place the most significant half of the result in the corresponding elements of the two or four first source vectors. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1))-1.", + "html": "

Multiply then double the corresponding signed elements of the two or four first and second source vectors, and destructively place the most significant half of the result in the corresponding elements of the two or four first source vectors. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1))-1.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULH": + return { + "tooltip": "Multiply then double the corresponding signed elements of the first and second source vectors, and place the most significant half of the results in the corresponding elements of the destination vector register. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding signed elements of the first and second source vectors, and place the most significant half of the results in the corresponding elements of the destination vector register. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULH": + return { + "tooltip": "Multiply all signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment, double and place the most significant half of the result in the corresponding elements of the destination vector register. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply all signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment, double and place the most significant half of the result in the corresponding elements of the destination vector register. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULL": + case "SQDMULL2": + return { + "tooltip": "Signed saturating Doubling Multiply Long (by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.", + "html": "

Signed saturating Doubling Multiply Long (by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The SQDMULL instruction extracts the first source vector from the lower half of the first source register. The SQDMULL2 instruction extracts the first source vector from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULL": + case "SQDMULL2": + return { + "tooltip": "Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Doubling Multiply Long. This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, doubles the results, places the final results in a vector, and writes the vector to the destination SIMD&FP register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The SQDMULL instruction extracts each source vector from the lower half of each source register. The SQDMULL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULLB": + return { + "tooltip": "Multiply the corresponding even-numbered signed elements of the first and second source vectors, double and place the results in the overlapping double-width elements of the destination vector. Each result element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply the corresponding even-numbered signed elements of the first and second source vectors, double and place the results in the overlapping double-width elements of the destination vector. Each result element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULLB": + return { + "tooltip": "Multiply then double the even-numbered signed elements within each 128-bit segment of the first source vector and the specified element in the corresponding second source vector segment, and place the results in overlapping double-width elements of the destination vector register. Each result element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply then double the even-numbered signed elements within each 128-bit segment of the first source vector and the specified element in the corresponding second source vector segment, and place the results in overlapping double-width elements of the destination vector register. Each result element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULLT": + return { + "tooltip": "Multiply the corresponding odd-numbered signed elements of the first and second source vectors, double and place the results in the overlapping double-width elements of the destination vector. Each result element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply the corresponding odd-numbered signed elements of the first and second source vectors, double and place the results in the overlapping double-width elements of the destination vector. Each result element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQDMULLT": + return { + "tooltip": "Multiply then double the odd-numbered signed elements within each 128-bit segment of the first source vector and the specified element in the corresponding second source vector segment, and place the results in overlapping double-width elements of the destination vector register. Each result element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply then double the odd-numbered signed elements within each 128-bit segment of the first source vector and the specified element in the corresponding second source vector segment, and place the results in overlapping double-width elements of the destination vector register. Each result element is saturated to the double-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCB": + return { + "tooltip": "Determines the number of active 8-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Determines the number of active 8-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCD": + return { + "tooltip": "Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCD": + return { + "tooltip": "Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 64-bit signed integer range.", + "html": "

Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 64-bit signed integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCH": + return { + "tooltip": "Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCH": + return { + "tooltip": "Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 16-bit signed integer range.", + "html": "

Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 16-bit signed integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to increment all destination vector elements. The results are saturated to the element signed integer range.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to increment all destination vector elements. The results are saturated to the element signed integer range.

The predicate size specifier may be omitted in assembler source code, but this is deprecated and will be prohibited in a future release of the architecture.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCW": + return { + "tooltip": "Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.", + "html": "

Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the source general-purpose register's signed integer range. A 32-bit saturated result is then sign-extended to 64 bits.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQINCW": + return { + "tooltip": "Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 32-bit signed integer range.", + "html": "

Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 32-bit signed integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQNEG": + return { + "tooltip": "Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.", + "html": "

Signed saturating Negate. This instruction reads each vector element from the source SIMD&FP register, negates each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQNEG": + return { + "tooltip": "Negate the signed integer value in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Negate the signed integer value in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDCMLAH": + return { + "tooltip": "Multiply without saturation the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the integral numbers in the first source vector by the corresponding complex number in the second source vector rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.", + "html": "

Multiply without saturation the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the integral numbers in the first source vector by the corresponding complex number in the second source vector rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.

Then double and add the products to the corresponding components of the complex numbers in the addend vector. Destructively place the most significant rounded half of the results in the corresponding elements of the addend vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

These transformations permit the creation of a variety of multiply-add and multiply-subtract operations on complex numbers by combining two of these instructions with the same vector operands but with rotations that are 90 degrees apart.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDCMLAH": + return { + "tooltip": "Multiply without saturation the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the integral numbers in each 128-bit segment of the first source vector by the specified complex number in the corresponding the second source vector segment rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.", + "html": "

Multiply without saturation the duplicated real components for rotations 0 and 180, or imaginary components for rotations 90 and 270, of the integral numbers in each 128-bit segment of the first source vector by the specified complex number in the corresponding the second source vector segment rotated by 0, 90, 180 or 270 degrees in the direction from the positive real axis towards the positive imaginary axis, when considered in polar representation.

Then double and add the products to the corresponding components of the complex numbers in the addend vector. Destructively place the most significant rounded half of the results in the corresponding elements of the addend vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

These transformations permit the creation of a variety of multiply-add and multiply-subtract operations on complex numbers by combining two of these instructions with the same vector operands but with rotations that are 90 degrees apart.

Each complex number is represented in a vector register as an even/odd pair of elements with the real part in the even-numbered element and the imaginary part in the odd-numbered element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMLAH": + return { + "tooltip": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (by element). This instruction multiplies the vector elements of the first source SIMD&FP register with the value of a vector element of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.", + "html": "

Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (by element). This instruction multiplies the vector elements of the first source SIMD&FP register with the value of a vector element of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.

If any of the results overflow, they are saturated. The cumulative saturation bit, FPSR.QC, is set if saturation occurs.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMLAH": + return { + "tooltip": "Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.", + "html": "

Signed Saturating Rounding Doubling Multiply Accumulate returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and accumulates the most significant half of the final results with the vector elements of the destination SIMD&FP register. The results are rounded.

If any of the results overflow, they are saturated. The cumulative saturation bit, FPSR.QC, is set if saturation occurs.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMLAH": + return { + "tooltip": "Multiply then double the corresponding signed elements of the first and second source vectors, and destructively add the rounded high half of each result to the corresponding elements of the addend and destination vector. Each destination element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding signed elements of the first and second source vectors, and destructively add the rounded high half of each result to the corresponding elements of the addend and destination vector. Each destination element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMLAH": + return { + "tooltip": "Multiply then double all signed elements within each 128-bit segment of the first source vector and the specified signed element of the corresponding second source vector segment, and destructively add the rounded high half of each result to the corresponding elements of the addend and destination vector. Each destination element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply then double all signed elements within each 128-bit segment of the first source vector and the specified signed element of the corresponding second source vector segment, and destructively add the rounded high half of each result to the corresponding elements of the addend and destination vector. Each destination element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMLSH": + return { + "tooltip": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (by element). This instruction multiplies the vector elements of the first source SIMD&FP register with the value of a vector element of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.", + "html": "

Signed Saturating Rounding Doubling Multiply Subtract returning High Half (by element). This instruction multiplies the vector elements of the first source SIMD&FP register with the value of a vector element of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.

If any of the results overflow, they are saturated. The cumulative saturation bit, FPSR.QC, is set if saturation occurs.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMLSH": + return { + "tooltip": "Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.", + "html": "

Signed Saturating Rounding Doubling Multiply Subtract returning High Half (vector). This instruction multiplies the vector elements of the first source SIMD&FP register with the corresponding vector elements of the second source SIMD&FP register without saturating the multiply results, doubles the results, and subtracts the most significant half of the final results from the vector elements of the destination SIMD&FP register. The results are rounded.

If any of the results overflow, they are saturated. The cumulative saturation bit, FPSR.QC, is set if saturation occurs.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMLSH": + return { + "tooltip": "Multiply then double the corresponding signed elements of the first and second source vectors, and destructively subtract the rounded high half of each result from the corresponding elements of the addend and destination vector. Each destination element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding signed elements of the first and second source vectors, and destructively subtract the rounded high half of each result from the corresponding elements of the addend and destination vector. Each destination element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMLSH": + return { + "tooltip": "Multiply then double all signed elements within each 128-bit segment of the first source vector and the specified signed element of the corresponding second source vector segment, and destructively subtract the rounded high half of each result to the corresponding elements of the addend and destination vector. Each destination element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply then double all signed elements within each 128-bit segment of the first source vector and the specified signed element of the corresponding second source vector segment, and destructively subtract the rounded high half of each result to the corresponding elements of the addend and destination vector. Each destination element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMULH": + return { + "tooltip": "Signed saturating Rounding Doubling Multiply returning High half (by element). This instruction multiplies each vector element in the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Rounding Doubling Multiply returning High half (by element). This instruction multiplies each vector element in the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

The results are rounded. For truncated results, see SQDMULH.

If any of the results overflows, they are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMULH": + return { + "tooltip": "Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Rounding Doubling Multiply returning High half. This instruction multiplies the values of corresponding elements of the two source SIMD&FP registers, doubles the results, places the most significant half of the final results into a vector, and writes the vector to the destination SIMD&FP register.

The results are rounded. For truncated results, see SQDMULH.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMULH": + return { + "tooltip": "Multiply then double the corresponding signed elements of the first and second source vectors, and place the most significant rounded half of the result in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Multiply then double the corresponding signed elements of the first and second source vectors, and place the most significant rounded half of the result in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRDMULH": + return { + "tooltip": "Multiply all signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment, double and place the most significant rounded half of the result in the corresponding elements of the destination vector register. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.", + "html": "

Multiply all signed elements within each 128-bit segment of the first source vector by the specified signed element in the corresponding second source vector segment, double and place the most significant rounded half of the result in the corresponding elements of the destination vector register. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 1 to 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHL": + return { + "tooltip": "Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Rounding Shift Left (register). This instruction takes each vector element in the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

If the shift value is positive, the operation is a left shift. Otherwise, it is a right shift. The results are rounded. For truncated results, see SQSHL.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHL": + return { + "tooltip": "Shift active signed elements of the first source vector by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active signed elements of the first source vector by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHLR": + return { + "tooltip": "Shift active signed elements of the second source vector by corresponding elements of the first source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active signed elements of the second source vector by corresponding elements of the first source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHR": + return { + "tooltip": "Shift right by an immediate value, the signed integer value in each element of the two source vectors and place the rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1))-1. The immediate shift amount is an unsigned value in the range 1 to 16.", + "html": "

Shift right by an immediate value, the signed integer value in each element of the two source vectors and place the rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1))-1. The immediate shift amount is an unsigned value in the range 1 to 16.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHR": + return { + "tooltip": "Shift right by an immediate value, the signed integer value in each element of the four source vectors and place the rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's signed integer range -2(N-1) to (2(N-1))-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.", + "html": "

Shift right by an immediate value, the signed integer value in each element of the four source vectors and place the rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's signed integer range -2(N-1) to (2(N-1))-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRN": + case "SQRSHRN2": + return { + "tooltip": "Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.", + "html": "

Signed saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. The results are rounded. For truncated results, see SQSHRN.

The SQRSHRN instruction writes the vector to the lower half of the destination register and clears the upper half, while the SQRSHRN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRN": + return { + "tooltip": "Shift right by an immediate value, the signed integer value in each element of the group of two source vectors and place the two-way interleaved rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to 16.", + "html": "

Shift right by an immediate value, the signed integer value in each element of the group of two source vectors and place the two-way interleaved rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to 16.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRN": + return { + "tooltip": "Shift right by an immediate value, the signed integer value in each element of the four source vectors and place the four-way interleaved rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's signed integer range -2(N-1) to (2(N-1))-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.", + "html": "

Shift right by an immediate value, the signed integer value in each element of the four source vectors and place the four-way interleaved rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's signed integer range -2(N-1) to (2(N-1))-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRNB": + return { + "tooltip": "Shift each signed integer value in the source vector elements right by an immediate value, and place the rounded results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each signed integer value in the source vector elements right by an immediate value, and place the rounded results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRNT": + return { + "tooltip": "Shift each signed integer value in the source vector elements right by an immediate value, and place the rounded results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each signed integer value in the source vector elements right by an immediate value, and place the rounded results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRU": + return { + "tooltip": "Shift right by an immediate value, the signed integer value in each element of the two source vectors and place the rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to 16.", + "html": "

Shift right by an immediate value, the signed integer value in each element of the two source vectors and place the rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to 16.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRU": + return { + "tooltip": "Shift right by an immediate value, the signed integer value in each element of the four source vectors and place the rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.", + "html": "

Shift right by an immediate value, the signed integer value in each element of the four source vectors and place the rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRUN": + case "SQRSHRUN2": + return { + "tooltip": "Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.", + "html": "

Signed saturating Rounded Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are rounded. For truncated results, see SQSHRUN.

The SQRSHRUN instruction writes the vector to the lower half of the destination register and clears the upper half, while the SQRSHRUN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRUN": + return { + "tooltip": "Shift right by an immediate value, the signed integer value in each element of the group of two source vectors and place the two-way interleaved rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to 16.", + "html": "

Shift right by an immediate value, the signed integer value in each element of the group of two source vectors and place the two-way interleaved rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to 16.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRUN": + return { + "tooltip": "Shift right by an immediate value, the signed integer value in each element of the four source vectors and place the four-way interleaved rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.", + "html": "

Shift right by an immediate value, the signed integer value in each element of the four source vectors and place the four-way interleaved rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRUNB": + return { + "tooltip": "Shift each signed integer value in the source vector elements right by an immediate value, and place the rounded results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each signed integer value in the source vector elements right by an immediate value, and place the rounded results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQRSHRUNT": + return { + "tooltip": "Shift each signed integer value in the source vector elements right by an immediate value, and place the rounded results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each signed integer value in the source vector elements right by an immediate value, and place the rounded results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHL": + return { + "tooltip": "Signed saturating Shift Left (immediate). This instruction reads each vector element in the source SIMD&FP register, shifts each result by an immediate value, places the final result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.", + "html": "

Signed saturating Shift Left (immediate). This instruction reads each vector element in the source SIMD&FP register, shifts each result by an immediate value, places the final result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHL": + return { + "tooltip": "Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

If the shift value is positive, the operation is a left shift. Otherwise, it is a right shift. The results are truncated. For rounded results, see SQRSHL.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHL": + return { + "tooltip": "Shift left by immediate each active signed element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift left by immediate each active signed element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHL": + return { + "tooltip": "Shift active signed elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active signed elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHLR": + return { + "tooltip": "Shift active signed elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active signed elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHLU": + return { + "tooltip": "Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.", + "html": "

Signed saturating Shift Left Unsigned (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, shifts each value by an immediate value, saturates the shifted result to an unsigned integer value, places the result in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHLU": + return { + "tooltip": "Shift left by immediate each active signed element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift left by immediate each active signed element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHRN": + case "SQSHRN2": + return { + "tooltip": "Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.", + "html": "

Signed saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts and truncates each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are half as long as the source vector elements. For rounded results, see SQRSHRN.

The SQSHRN instruction writes the vector to the lower half of the destination register and clears the upper half, while the SQSHRN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHRNB": + return { + "tooltip": "Shift each signed integer value in the source vector elements right by an immediate value, and place the truncated results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each signed integer value in the source vector elements right by an immediate value, and place the truncated results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHRNT": + return { + "tooltip": "Shift each signed integer value in the source vector elements right by an immediate value, and place the truncated results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each signed integer value in the source vector elements right by an immediate value, and place the truncated results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHRUN": + case "SQSHRUN2": + return { + "tooltip": "Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.", + "html": "

Signed saturating Shift Right Unsigned Narrow (immediate). This instruction reads each signed integer value in the vector of the source SIMD&FP register, right shifts each value by an immediate value, saturates the result to an unsigned integer value that is half the original width, places the final result into a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see SQRSHRUN.

The SQSHRUN instruction writes the vector to the lower half of the destination register and clears the upper half, while the SQSHRUN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHRUNB": + return { + "tooltip": "Shift each signed integer value in the source vector elements right by an immediate value, and place the truncated results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each signed integer value in the source vector elements right by an immediate value, and place the truncated results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSHRUNT": + return { + "tooltip": "Shift each signed integer value in the source vector elements right by an immediate value, and place the truncated results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each signed integer value in the source vector elements right by an immediate value, and place the truncated results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSUB": + return { + "tooltip": "Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSUB": + return { + "tooltip": "Subtract active signed elements of the second source vector from corresponding signed elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active signed elements of the second source vector from corresponding signed elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSUB": + return { + "tooltip": "Signed saturating subtract of an unsigned immediate from each element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Signed saturating subtract of an unsigned immediate from each element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

The immediate is an unsigned value in the range 0 to 255, and for element widths of 16 bits or higher it may also be a positive multiple of 256 in the range 256 to 65280.

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<uimm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSUB": + return { + "tooltip": "Signed saturating subtract all elements of the second source vector from corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.", + "html": "

Signed saturating subtract all elements of the second source vector from corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQSUBR": + return { + "tooltip": "Subtract active signed elements of the first source vector from corresponding signed elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active signed elements of the first source vector from corresponding signed elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQXTN": + case "SQXTN2": + return { + "tooltip": "Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.", + "html": "

Signed saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates the value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements. All the values in this instruction are signed integer values.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The SQXTN instruction writes the vector to the lower half of the destination register and clears the upper half, while the SQXTN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQXTNB": + return { + "tooltip": "Saturate the signed integer value in each source element to half the original source element width, and place the results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero.", + "html": "

Saturate the signed integer value in each source element to half the original source element width, and place the results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQXTNT": + return { + "tooltip": "Saturate the signed integer value in each source element to half the original source element width, and place the results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged.", + "html": "

Saturate the signed integer value in each source element to half the original source element width, and place the results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQXTUN": + case "SQXTUN2": + return { + "tooltip": "Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.", + "html": "

Signed saturating extract Unsigned Narrow. This instruction reads each signed integer value in the vector of the source SIMD&FP register, saturates the value to an unsigned integer value that is half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The SQXTUN instruction writes the vector to the lower half of the destination register and clears the upper half, while the SQXTUN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQXTUNB": + return { + "tooltip": "Saturate the signed integer value in each source element to an unsigned integer value that is half the original source element width, and place the results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero.", + "html": "

Saturate the signed integer value in each source element to an unsigned integer value that is half the original source element width, and place the results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SQXTUNT": + return { + "tooltip": "Saturate the signed integer value in each source element to an unsigned integer value that is half the original source element width, and place the results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged.", + "html": "

Saturate the signed integer value in each source element to an unsigned integer value that is half the original source element width, and place the results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRHADD": + return { + "tooltip": "Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Rounding Halving Add. This instruction adds corresponding signed integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

The results are rounded. For truncated results, see SHADD.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRHADD": + return { + "tooltip": "Add active signed elements of the first source vector to corresponding signed elements of the second source vector, shift right one bit, and destructively place the rounded results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active signed elements of the first source vector to corresponding signed elements of the second source vector, shift right one bit, and destructively place the rounded results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRI": + return { + "tooltip": "Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.", + "html": "

Shift Right and Insert (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each vector element by an immediate value, and inserts the result into the corresponding vector element in the destination SIMD&FP register such that the new zero bits created by the shift are not inserted but retain their existing value. Bits shifted out of the right of each vector element of the source register are lost.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRI": + return { + "tooltip": "Shift each source vector element right by an immediate value, and insert the result into the corresponding vector element in the destination vector register, merging the shifted bits from each source element with existing bits in each destination vector element. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each source vector element right by an immediate value, and insert the result into the corresponding vector element in the destination vector register, merging the shifted bits from each source element with existing bits in each destination vector element. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSHL": + return { + "tooltip": "Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Rounding Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts it by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

If the shift value is positive, the operation is a left shift. If the shift value is negative, it is a rounding right shift. For a truncating shift, see SSHL.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSHL": + return { + "tooltip": "Shift the signed elements of the two or four first source vectors by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the two or four first source vectors. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed.", + "html": "

Shift the signed elements of the two or four first source vectors by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the two or four first source vectors. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSHL": + return { + "tooltip": "Shift the signed elements of the two or four first source vectors by corresponding elements of the two or four second source vectors and destructively place the rounded results in the corresponding elements of the two or four first source vectors. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed.", + "html": "

Shift the signed elements of the two or four first source vectors by corresponding elements of the two or four second source vectors and destructively place the rounded results in the corresponding elements of the two or four first source vectors. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSHL": + return { + "tooltip": "Shift active signed elements of the first source vector by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active signed elements of the first source vector by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSHLR": + return { + "tooltip": "Shift active signed elements of the second source vector by corresponding elements of the first source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active signed elements of the second source vector by corresponding elements of the first source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSHR": + return { + "tooltip": "Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.", + "html": "

Signed Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSHR.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSHR": + return { + "tooltip": "Shift right by immediate each active signed element of the source vector, and destructively place the rounded results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right by immediate each active signed element of the source vector, and destructively place the rounded results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSRA": + return { + "tooltip": "Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.", + "html": "

Signed Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are rounded. For truncated results, see SSRA.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SRSRA": + return { + "tooltip": "Shift right by immediate each signed element of the source vector, preserving the sign bit, and add the rounded intermediate result destructively to the corresponding elements of the addend vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift right by immediate each signed element of the source vector, preserving the sign bit, and add the rounded intermediate result destructively to the corresponding elements of the addend vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSBB": + return { + "tooltip": "Speculative Store Bypass Barrier is a memory barrier that prevents speculative loads from bypassing earlier stores to the same virtual address under certain conditions. For more information and details of the semantics, see Speculative Store Bypass Barrier (SSBB).", + "html": "

Speculative Store Bypass Barrier is a memory barrier that prevents speculative loads from bypassing earlier stores to the same virtual address under certain conditions. For more information and details of the semantics, see Speculative Store Bypass Barrier (SSBB).

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSHL": + return { + "tooltip": "Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Signed Shift Left (register). This instruction takes each signed integer value in the vector of the first source SIMD&FP register, shifts each value by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

If the shift value is positive, the operation is a left shift. If the shift value is negative, it is a truncating right shift. For a rounding shift, see SRSHL.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSHLL": + case "SSHLL2": + return { + "tooltip": "Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.", + "html": "

Signed Shift Left Long (immediate). This instruction reads each vector element from the source SIMD&FP register, left shifts each vector element by the specified shift amount, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

The SSHLL instruction extracts vector elements from the lower half of the source register. The SSHLL2 instruction extracts vector elements from the upper half of the source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSHLLB": + return { + "tooltip": "Shift left by immediate each even-numbered signed element of the source vector, and place the results in the overlapping double-width elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.", + "html": "

Shift left by immediate each even-numbered signed element of the source vector, and place the results in the overlapping double-width elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSHLLT": + return { + "tooltip": "Shift left by immediate each odd-numbered signed element of the source vector, and place the results in the overlapping double-width elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.", + "html": "

Shift left by immediate each odd-numbered signed element of the source vector, and place the results in the overlapping double-width elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSHR": + return { + "tooltip": "Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.", + "html": "

Signed Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, places the final result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSHR.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSRA": + return { + "tooltip": "Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.", + "html": "

Signed Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are signed integer values. The results are truncated. For rounded results, see SRSRA.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSRA": + return { + "tooltip": "Shift right by immediate each signed element of the source vector, preserving the sign bit, and add the truncated intermediate result destructively to the corresponding elements of the addend vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift right by immediate each signed element of the source vector, preserving the sign bit, and add the truncated intermediate result destructively to the corresponding elements of the addend vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSUBL": + case "SSUBL2": + return { + "tooltip": "Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.", + "html": "

Signed Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are signed integer values. The destination vector elements are twice as long as the source vector elements.

The SSUBL instruction extracts each source vector from the lower half of each source register. The SSUBL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSUBLB": + return { + "tooltip": "Subtract the even-numbered signed elements of the second source vector from the corresponding signed elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the even-numbered signed elements of the second source vector from the corresponding signed elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSUBLBT": + return { + "tooltip": "Subtract the odd-numbered signed elements of the second source vector from the even-numbered signed elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the odd-numbered signed elements of the second source vector from the even-numbered signed elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSUBLT": + return { + "tooltip": "Subtract the odd-numbered signed elements of the second source vector from the corresponding signed elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the odd-numbered signed elements of the second source vector from the corresponding signed elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSUBLTB": + return { + "tooltip": "Subtract the even-numbered signed elements of the second source vector from the odd-numbered signed elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the even-numbered signed elements of the second source vector from the odd-numbered signed elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSUBW": + case "SSUBW2": + return { + "tooltip": "Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.", + "html": "

Signed Subtract Wide. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are signed integer values.

The SSUBW instruction extracts the second source vector from the lower half of the second source register. The SSUBW2 instruction extracts the second source vector from the upper half of the second source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSUBWB": + return { + "tooltip": "Subtract the even-numbered signed elements of the second source vector from the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the even-numbered signed elements of the second source vector from the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SSUBWT": + return { + "tooltip": "Subtract the even-numbered signed elements of the second source vector from the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the even-numbered signed elements of the second source vector from the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1": + return { + "tooltip": "Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.", + "html": "

Store multiple single-element structures from one, two, three, or four registers. This instruction stores elements to memory from one, two, three, or four SIMD&FP registers, without interleaving. Every element of each register is stored.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1": + return { + "tooltip": "Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.", + "html": "

Store a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "Contiguous store of bytes from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store of bytes from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "Contiguous store of bytes from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store of bytes from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "Contiguous store of bytes from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store of bytes from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "Contiguous store of bytes from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store of bytes from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "Scatter store of bytes from the active elements of a vector register to the memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements are not written to memory.", + "html": "

Scatter store of bytes from the active elements of a vector register to the memory addresses generated by a vector base plus immediate index. The index is in the range 0 to 31. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "Contiguous store of bytes from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.", + "html": "

Contiguous store of bytes from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "Contiguous store of bytes from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.", + "html": "

Contiguous store of bytes from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "Scatter store of bytes from the active elements of a vector register to the memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements are not written to memory.", + "html": "

Scatter store of bytes from the active elements of a vector register to the memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally sign or zero-extended from 32 to 64 bits. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1B": + return { + "tooltip": "The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 8-bit elements in a vector. The immediate offset is in the range 0 to 15. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is added to the base address. Inactive elements are not written to memory.", + "html": "

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 8-bit elements in a vector. The immediate offset is in the range 0 to 15. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "Contiguous store of doublewords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store of doublewords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "Contiguous store of doublewords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store of doublewords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "Contiguous store of doublewords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store of doublewords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "Contiguous store of doublewords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store of doublewords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "Scatter store of doublewords from the active elements of a vector register to the memory addresses generated by a vector base plus immediate index. The index is a multiple of 8 in the range 0 to 248. Inactive elements are not written to memory.", + "html": "

Scatter store of doublewords from the active elements of a vector register to the memory addresses generated by a vector base plus immediate index. The index is a multiple of 8 in the range 0 to 248. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "Contiguous store of doublewords from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.", + "html": "

Contiguous store of doublewords from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "Contiguous store of doublewords from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.", + "html": "

Contiguous store of doublewords from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "Scatter store of doublewords from the active elements of a vector register to the memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 8. Inactive elements are not written to memory.", + "html": "

Scatter store of doublewords from the active elements of a vector register to the memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 8. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1D": + return { + "tooltip": "The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 64-bit elements in a vector. The immediate offset is in the range 0 to 1. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 8 and added to the base address. Inactive elements are not written to memory.", + "html": "

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 64-bit elements in a vector. The immediate offset is in the range 0 to 1. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 8 and added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "Contiguous store of halfwords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store of halfwords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "Contiguous store of halfwords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store of halfwords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "Contiguous store of halfwords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store of halfwords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "Contiguous store of halfwords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store of halfwords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "Scatter store of halfwords from the active elements of a vector register to the memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements are not written to memory.", + "html": "

Scatter store of halfwords from the active elements of a vector register to the memory addresses generated by a vector base plus immediate index. The index is a multiple of 2 in the range 0 to 62. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "Contiguous store of halfwords from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.", + "html": "

Contiguous store of halfwords from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "Contiguous store of halfwords from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.", + "html": "

Contiguous store of halfwords from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "Scatter store of halfwords from the active elements of a vector register to the memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements are not written to memory.", + "html": "

Scatter store of halfwords from the active elements of a vector register to the memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 2. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1H": + return { + "tooltip": "The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 16-bit elements in a vector. The immediate offset is in the range 0 to 7. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 2 and added to the base address. Inactive elements are not written to memory.", + "html": "

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 16-bit elements in a vector. The immediate offset is in the range 0 to 7. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 2 and added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1Q": + return { + "tooltip": "Scatter store of quadwords from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.", + "html": "

Scatter store of quadwords from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1Q": + return { + "tooltip": "The slice number in the tile is selected by the slice index register, modulo the number of 128-bit elements in a Streaming SVE vector. The memory address is generated by scalar base and optional scalar offset which is multiplied by 16 and added to the base address. Inactive elements are not written to memory.", + "html": "

The slice number in the tile is selected by the slice index register, modulo the number of 128-bit elements in a Streaming SVE vector. The memory address is generated by scalar base and optional scalar offset which is multiplied by 16 and added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "Contiguous store of words from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store of words from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "Contiguous store of words from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store of words from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "Contiguous store of words from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store of words from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "Contiguous store of words from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store of words from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "Scatter store of words from the active elements of a vector register to the memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements are not written to memory.", + "html": "

Scatter store of words from the active elements of a vector register to the memory addresses generated by a vector base plus immediate index. The index is a multiple of 4 in the range 0 to 124. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "Contiguous store of words from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.", + "html": "

Contiguous store of words from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "Contiguous store of words from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.", + "html": "

Contiguous store of words from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "Scatter store of words from the active elements of a vector register to the memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements are not written to memory.", + "html": "

Scatter store of words from the active elements of a vector register to the memory addresses generated by a 64-bit scalar base plus vector index. The index values are optionally first sign or zero-extended from 32 to 64 bits and then optionally multiplied by 4. Inactive elements are not written to memory.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST1W": + return { + "tooltip": "The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 32-bit elements in a vector. The immediate offset is in the range 0 to 3. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 4 and added to the base address. Inactive elements are not written to memory.", + "html": "

The slice number within the tile is selected by the sum of the slice index register and immediate offset, modulo the number of 32-bit elements in a vector. The immediate offset is in the range 0 to 3. The memory address is generated by a 64-bit scalar base and an optional 64-bit scalar offset which is multiplied by 4 and added to the base address. Inactive elements are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2": + return { + "tooltip": "Store multiple 2-element structures from two registers. This instruction stores multiple 2-element structures from two SIMD&FP registers to memory, with interleaving. Every element of each register is stored.", + "html": "

Store multiple 2-element structures from two registers. This instruction stores multiple 2-element structures from two SIMD&FP registers to memory, with interleaving. Every element of each register is stored.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2": + return { + "tooltip": "Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.", + "html": "

Store single 2-element structure from one lane of two registers. This instruction stores a 2-element structure to memory from corresponding elements of two SIMD&FP registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2B": + return { + "tooltip": "Contiguous store two-byte structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store two-byte structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive bytes in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2B": + return { + "tooltip": "Contiguous store two-byte structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous store two-byte structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive bytes in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2D": + return { + "tooltip": "Contiguous store two-doubleword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store two-doubleword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive doublewords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2D": + return { + "tooltip": "Contiguous store two-doubleword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous store two-doubleword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive doublewords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2G": + return { + "tooltip": "Store Allocation Tags stores an Allocation Tag to two Tag granules of memory. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the source register.", + "html": "

Store Allocation Tags stores an Allocation Tag to two Tag granules of memory. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the source register.

This instruction generates an Unchecked access.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2H": + return { + "tooltip": "Contiguous store two-halfword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store two-halfword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive halfwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2H": + return { + "tooltip": "Contiguous store two-halfword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous store two-halfword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive halfwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2Q": + return { + "tooltip": "Contiguous store two-quadword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store two-quadword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive quadwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2Q": + return { + "tooltip": "Contiguous store two-quadword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous store two-quadword structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive quadwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2W": + return { + "tooltip": "Contiguous store two-word structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store two-word structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 2 in the range -16 to 14 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive words in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST2W": + return { + "tooltip": "Contiguous store two-word structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.", + "html": "

Contiguous store two-word structures, each from the same element number in two vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by two. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the two vector registers, or equivalently to the two consecutive words in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3": + return { + "tooltip": "Store multiple 3-element structures from three registers. This instruction stores multiple 3-element structures to memory from three SIMD&FP registers, with interleaving. Every element of each register is stored.", + "html": "

Store multiple 3-element structures from three registers. This instruction stores multiple 3-element structures to memory from three SIMD&FP registers, with interleaving. Every element of each register is stored.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3": + return { + "tooltip": "Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.", + "html": "

Store single 3-element structure from one lane of three registers. This instruction stores a 3-element structure to memory from corresponding elements of three SIMD&FP registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3B": + return { + "tooltip": "Contiguous store three-byte structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store three-byte structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive bytes in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3B": + return { + "tooltip": "Contiguous store three-byte structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous store three-byte structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive bytes in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3D": + return { + "tooltip": "Contiguous store three-doubleword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store three-doubleword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive doublewords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3D": + return { + "tooltip": "Contiguous store three-doubleword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous store three-doubleword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive doublewords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3H": + return { + "tooltip": "Contiguous store three-halfword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store three-halfword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive halfwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3H": + return { + "tooltip": "Contiguous store three-halfword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous store three-halfword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive halfwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3Q": + return { + "tooltip": "Contiguous store three-quadword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store three-quadword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive quadwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3Q": + return { + "tooltip": "Contiguous store three-quadword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous store three-quadword structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive quadwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3W": + return { + "tooltip": "Contiguous store three-word structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store three-word structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 3 in the range -24 to 21 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive words in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST3W": + return { + "tooltip": "Contiguous store three-word structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.", + "html": "

Contiguous store three-word structures, each from the same element number in three vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by three. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the three vector registers, or equivalently to the three consecutive words in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4": + return { + "tooltip": "Store multiple 4-element structures from four registers. This instruction stores multiple 4-element structures to memory from four SIMD&FP registers, with interleaving. Every element of each register is stored.", + "html": "

Store multiple 4-element structures from four registers. This instruction stores multiple 4-element structures to memory from four SIMD&FP registers, with interleaving. Every element of each register is stored.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4": + return { + "tooltip": "Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.", + "html": "

Store single 4-element structure from one lane of four registers. This instruction stores a 4-element structure to memory from corresponding elements of four SIMD&FP registers.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4B": + return { + "tooltip": "Contiguous store four-byte structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store four-byte structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive bytes in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4B": + return { + "tooltip": "Contiguous store four-byte structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous store four-byte structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive bytes in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4D": + return { + "tooltip": "Contiguous store four-doubleword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store four-doubleword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive doublewords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4D": + return { + "tooltip": "Contiguous store four-doubleword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous store four-doubleword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive doublewords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4H": + return { + "tooltip": "Contiguous store four-halfword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store four-halfword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive halfwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4H": + return { + "tooltip": "Contiguous store four-halfword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous store four-halfword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive halfwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4Q": + return { + "tooltip": "Contiguous store four-quadword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store four-quadword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive quadwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4Q": + return { + "tooltip": "Contiguous store four-quadword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous store four-quadword structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive quadwords in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4W": + return { + "tooltip": "Contiguous store four-word structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication", + "html": "

Contiguous store four-word structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and an immediate index which is a multiple of 4 in the range -32 to 28 that is multiplied by the vector's in-memory size, irrespective of predication,

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive words in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST4W": + return { + "tooltip": "Contiguous store four-word structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.", + "html": "

Contiguous store four-word structures, each from the same element number in four vector registers to the memory address generated by a 64-bit scalar base and a 64-bit scalar index register scaled by the element size (LSL option) and added to the base address. After each structure access the index value is incremented by four. The index register is not updated by the instruction.

Each predicate element applies to the same element number in each of the four vector registers, or equivalently to the four consecutive words in memory which make up each structure. Inactive structures are not written to memory.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST64B": + return { + "tooltip": "Single-copy Atomic 64-byte Store without Return stores eight 64-bit doublewords from consecutive registers, Xt to X(t+7), to a memory location. The data that is stored is atomic and is required to be 64-byte-aligned.", + "html": "

Single-copy Atomic 64-byte Store without Return stores eight 64-bit doublewords from consecutive registers, Xt to X(t+7), to a memory location. The data that is stored is atomic and is required to be 64-byte-aligned.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST64BV": + return { + "tooltip": "Single-copy Atomic 64-byte Store with Return stores eight 64-bit doublewords from consecutive registers, Xt to X(t+7), to a memory location, and writes the status result of the store to a register. The data that is stored is atomic and is required to be 64-byte aligned.", + "html": "

Single-copy Atomic 64-byte Store with Return stores eight 64-bit doublewords from consecutive registers, Xt to X(t+7), to a memory location, and writes the status result of the store to a register. The data that is stored is atomic and is required to be 64-byte aligned.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ST64BV0": + return { + "tooltip": "Single-copy Atomic 64-byte EL0 Store with Return stores eight 64-bit doublewords from consecutive registers, Xt to X(t+7), to a memory location, with the bottom 32 bits taken from ACCDATA_EL1, and writes the status result of the store to a register. The data that is stored is atomic and is required to be 64-byte aligned.", + "html": "

Single-copy Atomic 64-byte EL0 Store with Return stores eight 64-bit doublewords from consecutive registers, Xt to X(t+7), to a memory location, with the bottom 32 bits taken from ACCDATA_EL1, and writes the status result of the store to a register. The data that is stored is atomic and is required to be 64-byte aligned.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STADD": + case "STADDL": + return { + "tooltip": "Atomic add on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, adds the value held in a register to it, and stores the result back to memory.", + "html": "

Atomic add on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, adds the value held in a register to it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STADDB": + case "STADDLB": + return { + "tooltip": "Atomic add on byte in memory, without return, atomically loads an 8-bit byte from memory, adds the value held in a register to it, and stores the result back to memory.", + "html": "

Atomic add on byte in memory, without return, atomically loads an 8-bit byte from memory, adds the value held in a register to it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STADDH": + case "STADDLH": + return { + "tooltip": "Atomic add on halfword in memory, without return, atomically loads a 16-bit halfword from memory, adds the value held in a register to it, and stores the result back to memory.", + "html": "

Atomic add on halfword in memory, without return, atomically loads a 16-bit halfword from memory, adds the value held in a register to it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STCLR": + case "STCLRL": + return { + "tooltip": "Atomic bit clear on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic bit clear on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STCLRB": + case "STCLRLB": + return { + "tooltip": "Atomic bit clear on byte in memory, without return, atomically loads an 8-bit byte from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic bit clear on byte in memory, without return, atomically loads an 8-bit byte from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STCLRH": + case "STCLRLH": + return { + "tooltip": "Atomic bit clear on halfword in memory, without return, atomically loads a 16-bit halfword from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic bit clear on halfword in memory, without return, atomically loads a 16-bit halfword from memory, performs a bitwise AND with the complement of the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STEOR": + case "STEORL": + return { + "tooltip": "Atomic Exclusive-OR on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic Exclusive-OR on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STEORB": + case "STEORLB": + return { + "tooltip": "Atomic Exclusive-OR on byte in memory, without return, atomically loads an 8-bit byte from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic Exclusive-OR on byte in memory, without return, atomically loads an 8-bit byte from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STEORH": + case "STEORLH": + return { + "tooltip": "Atomic Exclusive-OR on halfword in memory, without return, atomically loads a 16-bit halfword from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic Exclusive-OR on halfword in memory, without return, atomically loads a 16-bit halfword from memory, performs an exclusive-OR with the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STG": + return { + "tooltip": "Store Allocation Tag stores an Allocation Tag to memory. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the source register.", + "html": "

Store Allocation Tag stores an Allocation Tag to memory. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the source register.

This instruction generates an Unchecked access.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STGM": + return { + "tooltip": "Store Tag Multiple writes a naturally aligned block of N Allocation Tags, where the size of N is identified in GMID_EL1.BS, and the Allocation Tag written to address A is taken from the source register at 4*A<7:4>+3:4*A<7:4>.", + "html": "

Store Tag Multiple writes a naturally aligned block of N Allocation Tags, where the size of N is identified in GMID_EL1.BS, and the Allocation Tag written to address A is taken from the source register at 4*A<7:4>+3:4*A<7:4>.

This instruction is undefined at EL0.

This instruction generates an Unchecked access.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STGP": + return { + "tooltip": "Store Allocation Tag and Pair of registers stores an Allocation Tag and two 64-bit doublewords to memory, from two registers. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the base register.", + "html": "

Store Allocation Tag and Pair of registers stores an Allocation Tag and two 64-bit doublewords to memory, from two registers. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the base register.

This instruction generates an Unchecked access.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STILP": + return { + "tooltip": "Store-Release ordered Pair of registers calculates an address from a base register value and an optional offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from two registers. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. The instruction also has memory ordering semantics, as described in Load-Acquire, Load-AcquirePC, and Store-Release, with the additional requirement that", + "html": "

Store-Release ordered Pair of registers calculates an address from a base register value and an optional offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from two registers. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. The instruction also has memory ordering semantics, as described in Load-Acquire, Load-AcquirePC, and Store-Release, with the additional requirement that:

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STL1": + return { + "tooltip": "Store-Release a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.", + "html": "

Store-Release a single-element structure from one lane of one register. This instruction stores the specified element of a SIMD&FP register to memory.

The instruction also has memory ordering semantics, as described in Load-Acquire, Load-AcquirePC, and Store-Release. For information about memory accesses, see Load/Store addressing modes.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLLR": + return { + "tooltip": "Store LORelease Register stores a 32-bit word or a 64-bit doubleword to a memory location, from a register. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store LORelease Register stores a 32-bit word or a 64-bit doubleword to a memory location, from a register. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLLRB": + return { + "tooltip": "Store LORelease Register Byte stores a byte from a 32-bit register to a memory location. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store LORelease Register Byte stores a byte from a 32-bit register to a memory location. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLLRH": + return { + "tooltip": "Store LORelease Register Halfword stores a halfword from a 32-bit register to a memory location. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store LORelease Register Halfword stores a halfword from a 32-bit register to a memory location. The instruction also has memory ordering semantics as described in Load LOAcquire, Store LORelease. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLR": + return { + "tooltip": "Store-Release Register stores a 32-bit word or a 64-bit doubleword to a memory location, from a register. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store-Release Register stores a 32-bit word or a 64-bit doubleword to a memory location, from a register. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLRB": + return { + "tooltip": "Store-Release Register Byte stores a byte from a 32-bit register to a memory location. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store-Release Register Byte stores a byte from a 32-bit register to a memory location. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLRH": + return { + "tooltip": "Store-Release Register Halfword stores a halfword from a 32-bit register to a memory location. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store-Release Register Halfword stores a halfword from a 32-bit register to a memory location. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLUR": + return { + "tooltip": "Store-Release SIMD&FP Register (unscaled offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an optional immediate offset.", + "html": "

Store-Release SIMD&FP Register (unscaled offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an optional immediate offset.

The instruction has memory ordering semantics, as described in Load-Acquire, Load-AcquirePC, and Store-Release.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLUR": + return { + "tooltip": "Store-Release Register (unscaled) calculates an address from a base register value and an immediate offset, and stores a 32-bit word or a 64-bit doubleword to the calculated address, from a register.", + "html": "

Store-Release Register (unscaled) calculates an address from a base register value and an immediate offset, and stores a 32-bit word or a 64-bit doubleword to the calculated address, from a register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLURB": + return { + "tooltip": "Store-Release Register Byte (unscaled) calculates an address from a base register value and an immediate offset, and stores a byte to the calculated address, from a 32-bit register.", + "html": "

Store-Release Register Byte (unscaled) calculates an address from a base register value and an immediate offset, and stores a byte to the calculated address, from a 32-bit register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLURH": + return { + "tooltip": "Store-Release Register Halfword (unscaled) calculates an address from a base register value and an immediate offset, and stores a halfword to the calculated address, from a 32-bit register.", + "html": "

Store-Release Register Halfword (unscaled) calculates an address from a base register value and an immediate offset, and stores a halfword to the calculated address, from a 32-bit register.

The instruction has memory ordering semantics as described in Load-Acquire, Load-AcquirePC, and Store-Release

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLXP": + return { + "tooltip": "Store-Release Exclusive Pair of registers stores two 32-bit words or two 64-bit doublewords to a memory location if the PE has exclusive access to the memory address, from two registers, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. If a 64-bit pair Store-Exclusive succeeds, it causes a single-copy atomic update of the 128-bit memory location being updated. The instruction also has memory ordering semantics, as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store-Release Exclusive Pair of registers stores two 32-bit words or two 64-bit doublewords to a memory location if the PE has exclusive access to the memory address, from two registers, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. If a 64-bit pair Store-Exclusive succeeds, it causes a single-copy atomic update of the 128-bit memory location being updated. The instruction also has memory ordering semantics, as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLXR": + return { + "tooltip": "Store-Release Exclusive Register stores a 32-bit word or a 64-bit doubleword to memory if the PE has exclusive access to the memory address, from two registers, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store-Release Exclusive Register stores a 32-bit word or a 64-bit doubleword to memory if the PE has exclusive access to the memory address, from two registers, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLXRB": + return { + "tooltip": "Store-Release Exclusive Register Byte stores a byte from a 32-bit register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store-Release Exclusive Register Byte stores a byte from a 32-bit register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STLXRH": + return { + "tooltip": "Store-Release Exclusive Register Halfword stores a halfword from a 32-bit register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store-Release Exclusive Register Halfword stores a halfword from a 32-bit register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic. The instruction also has memory ordering semantics as described in Load-Acquire, Store-Release. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNP": + return { + "tooltip": "Store Pair of SIMD&FP registers, with Non-temporal hint. This instruction stores a pair of SIMD&FP registers to memory, issuing a hint to the memory system that the access is non-temporal. The address used for the store is calculated from an address from a base register value and an immediate offset. For information about non-temporal pair instructions, see Load/Store SIMD and Floating-point Non-temporal pair.", + "html": "

Store Pair of SIMD&FP registers, with Non-temporal hint. This instruction stores a pair of SIMD&FP registers to memory, issuing a hint to the memory system that the access is non-temporal. The address used for the store is calculated from an address from a base register value and an immediate offset. For information about non-temporal pair instructions, see Load/Store SIMD and Floating-point Non-temporal pair.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNP": + return { + "tooltip": "Store Pair of Registers, with non-temporal hint, calculates an address from a base register value and an immediate offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from two registers. For information about memory accesses, see Load/Store addressing modes. For information about Non-temporal pair instructions, see Load/Store Non-temporal pair.", + "html": "

Store Pair of Registers, with non-temporal hint, calculates an address from a base register value and an immediate offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from two registers. For information about memory accesses, see Load/Store addressing modes. For information about Non-temporal pair instructions, see Load/Store Non-temporal pair.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1B": + return { + "tooltip": "Contiguous store non-temporal of bytes from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store non-temporal of bytes from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1B": + return { + "tooltip": "Contiguous store non-temporal of bytes from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store non-temporal of bytes from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1B": + return { + "tooltip": "Contiguous store non-temporal of bytes from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store non-temporal of bytes from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1B": + return { + "tooltip": "Contiguous store non-temporal of bytes from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store non-temporal of bytes from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1B": + return { + "tooltip": "Scatter store non-temporal of bytes from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.", + "html": "

Scatter store non-temporal of bytes from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1B": + return { + "tooltip": "Contiguous store non-temporal of bytes from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.", + "html": "

Contiguous store non-temporal of bytes from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1B": + return { + "tooltip": "Contiguous store non-temporal of bytes from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.", + "html": "

Contiguous store non-temporal of bytes from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1D": + return { + "tooltip": "Contiguous store non-temporal of doublewords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store non-temporal of doublewords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1D": + return { + "tooltip": "Contiguous store non-temporal of doublewords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store non-temporal of doublewords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1D": + return { + "tooltip": "Contiguous store non-temporal of doublewords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store non-temporal of doublewords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1D": + return { + "tooltip": "Contiguous store non-temporal of doublewords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store non-temporal of doublewords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1D": + return { + "tooltip": "Scatter store non-temporal of doublewords from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.", + "html": "

Scatter store non-temporal of doublewords from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1D": + return { + "tooltip": "Contiguous store non-temporal of doublewords from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.", + "html": "

Contiguous store non-temporal of doublewords from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1D": + return { + "tooltip": "Contiguous store non-temporal of doublewords from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.", + "html": "

Contiguous store non-temporal of doublewords from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 8 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1H": + return { + "tooltip": "Contiguous store non-temporal of halfwords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store non-temporal of halfwords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1H": + return { + "tooltip": "Contiguous store non-temporal of halfwords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store non-temporal of halfwords from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1H": + return { + "tooltip": "Contiguous store non-temporal of halfwords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store non-temporal of halfwords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1H": + return { + "tooltip": "Contiguous store non-temporal of halfwords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store non-temporal of halfwords from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1H": + return { + "tooltip": "Scatter store non-temporal of halfwords from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.", + "html": "

Scatter store non-temporal of halfwords from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1H": + return { + "tooltip": "Contiguous store non-temporal of halfwords from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.", + "html": "

Contiguous store non-temporal of halfwords from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1H": + return { + "tooltip": "Contiguous store non-temporal of halfwords from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.", + "html": "

Contiguous store non-temporal of halfwords from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 2 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1W": + return { + "tooltip": "Contiguous store non-temporal of words from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store non-temporal of words from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1W": + return { + "tooltip": "Contiguous store non-temporal of words from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store non-temporal of words from elements of two or four consecutive vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1W": + return { + "tooltip": "Contiguous store non-temporal of words from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.", + "html": "

Contiguous store non-temporal of words from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and immediate index which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1W": + return { + "tooltip": "Contiguous store non-temporal of words from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.", + "html": "

Contiguous store non-temporal of words from elements of two or four strided vector registers to the memory address generated by a 64-bit scalar base and scalar index which is added to the base address. After each element access the index value is incremented, but the index register is not updated.

Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1W": + return { + "tooltip": "Scatter store non-temporal of words from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.", + "html": "

Scatter store non-temporal of words from the active elements of a vector register to the memory addresses generated by a vector base plus a 64-bit unscaled scalar register offset. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1W": + return { + "tooltip": "Contiguous store non-temporal of words from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.", + "html": "

Contiguous store non-temporal of words from elements of a vector register to the memory address generated by a 64-bit scalar base and immediate index in the range -8 to 7 which is multiplied by the vector's in-memory size, irrespective of predication, and added to the base address. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STNT1W": + return { + "tooltip": "Contiguous store non-temporal of words from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.", + "html": "

Contiguous store non-temporal of words from elements of a vector register to the memory address generated by a 64-bit scalar base and scalar index which is multiplied by 4 and added to the base address. After each element access the index value is incremented, but the index register is not updated. Inactive elements are not written to memory.

A non-temporal store is a hint to the system that this data is unlikely to be referenced again soon.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STP": + return { + "tooltip": "Store Pair of SIMD&FP registers. This instruction stores a pair of SIMD&FP registers to memory. The address used for the store is calculated from a base register value and an immediate offset.", + "html": "

Store Pair of SIMD&FP registers. This instruction stores a pair of SIMD&FP registers to memory. The address used for the store is calculated from a base register value and an immediate offset.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STP": + return { + "tooltip": "Store Pair of Registers calculates an address from a base register value and an immediate offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from two registers. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Pair of Registers calculates an address from a base register value and an immediate offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from two registers. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STR": + return { + "tooltip": "Store SIMD&FP register (immediate offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an immediate offset.", + "html": "

Store SIMD&FP register (immediate offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an immediate offset.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STR": + return { + "tooltip": "Store Register (immediate) stores a word or a doubleword from a register to memory. The address that is used for the store is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register (immediate) stores a word or a doubleword from a register to memory. The address that is used for the store is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STR": + return { + "tooltip": "Store a predicate register to a memory address generated by a 64-bit scalar base, plus an immediate offset in the range -256 to 255 which is multiplied by the current predicate register size in bytes. This instruction is unpredicated.", + "html": "

Store a predicate register to a memory address generated by a 64-bit scalar base, plus an immediate offset in the range -256 to 255 which is multiplied by the current predicate register size in bytes. This instruction is unpredicated.

The store is performed as contiguous byte accesses, each containing 8 consecutive predicate bits in ascending element order, with no endian conversion and no guarantee of single-copy atomicity larger than a byte. However, if alignment is checked, then a general-purpose base register must be aligned to 2 bytes.

For programmer convenience, an assembler must also accept a predicate-as-counter register name for the source predicate register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STR": + return { + "tooltip": "Store SIMD&FP register (register offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.", + "html": "

Store SIMD&FP register (register offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an offset register value. The offset can be optionally shifted and extended.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STR": + return { + "tooltip": "Store Register (register) calculates an address from a base register value and an offset register value, and stores a 32-bit word or a 64-bit doubleword to the calculated address, from a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register (register) calculates an address from a base register value and an offset register value, and stores a 32-bit word or a 64-bit doubleword to the calculated address, from a register. For information about memory accesses, see Load/Store addressing modes.

The instruction uses an offset addressing mode, that calculates the address used for the memory access from a base register value and an offset register value. The offset can be optionally shifted and extended.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STR": + return { + "tooltip": "Store a vector register to a memory address generated by a 64-bit scalar base, plus an immediate offset in the range -256 to 255 which is multiplied by the current vector register size in bytes. This instruction is unpredicated.", + "html": "

Store a vector register to a memory address generated by a 64-bit scalar base, plus an immediate offset in the range -256 to 255 which is multiplied by the current vector register size in bytes. This instruction is unpredicated.

The store is performed as contiguous byte accesses, with no endian conversion and no guarantee of single-copy atomicity larger than a byte. However, if alignment is checked, then the base register must be aligned to 16 bytes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STR": + return { + "tooltip": "The ZA array vector is selected by the sum of the vector select register and immediate offset, modulo the number of bytes in a Streaming SVE vector. The immediate offset is in the range 0 to 15. The memory address is generated by a 64-bit scalar base, plus the same optional immediate offset multiplied by the current vector length in bytes. This instruction is unpredicated.", + "html": "

The ZA array vector is selected by the sum of the vector select register and immediate offset, modulo the number of bytes in a Streaming SVE vector. The immediate offset is in the range 0 to 15. The memory address is generated by a 64-bit scalar base, plus the same optional immediate offset multiplied by the current vector length in bytes. This instruction is unpredicated.

The store is performed as contiguous byte accesses, with no endian conversion and no guarantee of single-copy atomicity larger than a byte. However, if alignment is checked, then the base register must be aligned to 16 bytes.

This instruction does not require the PE to be in Streaming SVE mode, and it is expected that this instruction will not experience a significant slowdown due to contention with other PEs that are executing in Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STR": + return { + "tooltip": "Store the 64-byte ZT0 register to the memory address provided in the 64-bit scalar base register. This instruction is unpredicated.", + "html": "

Store the 64-byte ZT0 register to the memory address provided in the 64-bit scalar base register. This instruction is unpredicated.

The store is performed as contiguous byte accesses, with no endian conversion and no guarantee of single-copy atomicity larger than a byte. However, if alignment is checked, then the base register must be aligned to 16 bytes.

This instruction does not require the PE to be in Streaming SVE mode, and it is expected that this instruction will not experience a significant slowdown due to contention with other PEs that are executing in Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STRB": + return { + "tooltip": "Store Register Byte (immediate) stores the least significant byte of a 32-bit register to memory. The address that is used for the store is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register Byte (immediate) stores the least significant byte of a 32-bit register to memory. The address that is used for the store is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STRB": + return { + "tooltip": "Store Register Byte (register) calculates an address from a base register value and an offset register value, and stores a byte from a 32-bit register to the calculated address. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register Byte (register) calculates an address from a base register value and an offset register value, and stores a byte from a 32-bit register to the calculated address. For information about memory accesses, see Load/Store addressing modes.

The instruction uses an offset addressing mode, that calculates the address used for the memory access from a base register value and an offset register value. The offset can be optionally shifted and extended.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STRH": + return { + "tooltip": "Store Register Halfword (immediate) stores the least significant halfword of a 32-bit register to memory. The address that is used for the store is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register Halfword (immediate) stores the least significant halfword of a 32-bit register to memory. The address that is used for the store is calculated from a base register and an immediate offset. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STRH": + return { + "tooltip": "Store Register Halfword (register) calculates an address from a base register value and an offset register value, and stores a halfword from a 32-bit register to the calculated address. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register Halfword (register) calculates an address from a base register value and an offset register value, and stores a halfword from a 32-bit register to the calculated address. For information about memory accesses, see Load/Store addressing modes.

The instruction uses an offset addressing mode, that calculates the address used for the memory access from a base register value and an offset register value. The offset can be optionally shifted and extended.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSET": + case "STSETL": + return { + "tooltip": "Atomic bit set on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic bit set on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSETB": + case "STSETLB": + return { + "tooltip": "Atomic bit set on byte in memory, without return, atomically loads an 8-bit byte from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic bit set on byte in memory, without return, atomically loads an 8-bit byte from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSETH": + case "STSETLH": + return { + "tooltip": "Atomic bit set on halfword in memory, without return, atomically loads a 16-bit halfword from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory.", + "html": "

Atomic bit set on halfword in memory, without return, atomically loads a 16-bit halfword from memory, performs a bitwise OR with the value held in a register on it, and stores the result back to memory.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSMAX": + case "STSMAXL": + return { + "tooltip": "Atomic signed maximum on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers.", + "html": "

Atomic signed maximum on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSMAXB": + case "STSMAXLB": + return { + "tooltip": "Atomic signed maximum on byte in memory, without return, atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers.", + "html": "

Atomic signed maximum on byte in memory, without return, atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSMAXH": + case "STSMAXLH": + return { + "tooltip": "Atomic signed maximum on halfword in memory, without return, atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers.", + "html": "

Atomic signed maximum on halfword in memory, without return, atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as signed numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSMIN": + case "STSMINL": + return { + "tooltip": "Atomic signed minimum on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers.", + "html": "

Atomic signed minimum on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSMINB": + case "STSMINLB": + return { + "tooltip": "Atomic signed minimum on byte in memory, without return, atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers.", + "html": "

Atomic signed minimum on byte in memory, without return, atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STSMINH": + case "STSMINLH": + return { + "tooltip": "Atomic signed minimum on halfword in memory, without return, atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers.", + "html": "

Atomic signed minimum on halfword in memory, without return, atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as signed numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STTR": + return { + "tooltip": "Store Register (unprivileged) stores a word or doubleword from a register to memory. The address that is used for the store is calculated from a base register and an immediate offset.", + "html": "

Store Register (unprivileged) stores a word or doubleword from a register to memory. The address that is used for the store is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STTRB": + return { + "tooltip": "Store Register Byte (unprivileged) stores a byte from a 32-bit register to memory. The address that is used for the store is calculated from a base register and an immediate offset.", + "html": "

Store Register Byte (unprivileged) stores a byte from a 32-bit register to memory. The address that is used for the store is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STTRH": + return { + "tooltip": "Store Register Halfword (unprivileged) stores a halfword from a 32-bit register to memory. The address that is used for the store is calculated from a base register and an immediate offset.", + "html": "

Store Register Halfword (unprivileged) stores a halfword from a 32-bit register to memory. The address that is used for the store is calculated from a base register and an immediate offset.

Memory accesses made by the instruction behave as if the instruction was executed at EL0 if the Effective value of PSTATE.UAO is 0 and either:

Otherwise, the memory access operates with the restrictions determined by the Exception level at which the instruction is executed. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STUMAX": + case "STUMAXL": + return { + "tooltip": "Atomic unsigned maximum on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers.", + "html": "

Atomic unsigned maximum on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STUMAXB": + case "STUMAXLB": + return { + "tooltip": "Atomic unsigned maximum on byte in memory, without return, atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers.", + "html": "

Atomic unsigned maximum on byte in memory, without return, atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STUMAXH": + case "STUMAXLH": + return { + "tooltip": "Atomic unsigned maximum on halfword in memory, without return, atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers.", + "html": "

Atomic unsigned maximum on halfword in memory, without return, atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the larger value back to memory, treating the values as unsigned numbers.

For information about memory accesses see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STUMIN": + case "STUMINL": + return { + "tooltip": "Atomic unsigned minimum on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers.", + "html": "

Atomic unsigned minimum on word or doubleword in memory, without return, atomically loads a 32-bit word or 64-bit doubleword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STUMINB": + case "STUMINLB": + return { + "tooltip": "Atomic unsigned minimum on byte in memory, without return, atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers.", + "html": "

Atomic unsigned minimum on byte in memory, without return, atomically loads an 8-bit byte from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STUMINH": + case "STUMINLH": + return { + "tooltip": "Atomic unsigned minimum on halfword in memory, without return, atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers.", + "html": "

Atomic unsigned minimum on halfword in memory, without return, atomically loads a 16-bit halfword from memory, compares it against the value held in a register, and stores the smaller value back to memory, treating the values as unsigned numbers.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STUR": + return { + "tooltip": "Store SIMD&FP register (unscaled offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an optional immediate offset.", + "html": "

Store SIMD&FP register (unscaled offset). This instruction stores a single SIMD&FP register to memory. The address that is used for the store is calculated from a base register value and an optional immediate offset.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STUR": + return { + "tooltip": "Store Register (unscaled) calculates an address from a base register value and an immediate offset, and stores a 32-bit word or a 64-bit doubleword to the calculated address, from a register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register (unscaled) calculates an address from a base register value and an immediate offset, and stores a 32-bit word or a 64-bit doubleword to the calculated address, from a register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STURB": + return { + "tooltip": "Store Register Byte (unscaled) calculates an address from a base register value and an immediate offset, and stores a byte to the calculated address, from a 32-bit register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register Byte (unscaled) calculates an address from a base register value and an immediate offset, and stores a byte to the calculated address, from a 32-bit register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STURH": + return { + "tooltip": "Store Register Halfword (unscaled) calculates an address from a base register value and an immediate offset, and stores a halfword to the calculated address, from a 32-bit register. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Register Halfword (unscaled) calculates an address from a base register value and an immediate offset, and stores a halfword to the calculated address, from a 32-bit register. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STXP": + return { + "tooltip": "Store Exclusive Pair of registers stores two 32-bit words or two 64-bit doublewords from two registers to a memory location if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. If a 64-bit pair Store-Exclusive succeeds, it causes a single-copy atomic update of the 128-bit memory location being updated. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Exclusive Pair of registers stores two 32-bit words or two 64-bit doublewords from two registers to a memory location if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. For information on single-copy atomicity and alignment requirements, see Requirements for single-copy atomicity and Alignment of data accesses. If a 64-bit pair Store-Exclusive succeeds, it causes a single-copy atomic update of the 128-bit memory location being updated. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STXR": + return { + "tooltip": "Store Exclusive Register stores a 32-bit word or a 64-bit doubleword from a register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.", + "html": "

Store Exclusive Register stores a 32-bit word or a 64-bit doubleword from a register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STXRB": + return { + "tooltip": "Store Exclusive Register Byte stores a byte from a register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic.", + "html": "

Store Exclusive Register Byte stores a byte from a register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STXRH": + return { + "tooltip": "Store Exclusive Register Halfword stores a halfword from a register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic.", + "html": "

Store Exclusive Register Halfword stores a halfword from a register to memory if the PE has exclusive access to the memory address, and returns a status value of 0 if the store was successful, or of 1 if no store was performed. See Synchronization and semaphores. The memory access is atomic.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STZ2G": + return { + "tooltip": "Store Allocation Tags, Zeroing stores an Allocation Tag to two Tag granules of memory, zeroing the associated data locations. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the source register.", + "html": "

Store Allocation Tags, Zeroing stores an Allocation Tag to two Tag granules of memory, zeroing the associated data locations. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the source register.

This instruction generates an Unchecked access.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STZG": + return { + "tooltip": "Store Allocation Tag, Zeroing stores an Allocation Tag to memory, zeroing the associated data location. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the source register.", + "html": "

Store Allocation Tag, Zeroing stores an Allocation Tag to memory, zeroing the associated data location. The address used for the store is calculated from the base register and an immediate signed offset scaled by the Tag granule. The Allocation Tag is calculated from the Logical Address Tag in the source register.

This instruction generates an Unchecked access.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "STZGM": + return { + "tooltip": "Store Tag and Zero Multiple writes a naturally aligned block of N Allocation Tags and stores zero to the associated data locations, where the size of N is identified in DCZID_EL0.BS, and the Allocation Tag is taken from the source register bits<3:0>.", + "html": "

Store Tag and Zero Multiple writes a naturally aligned block of N Allocation Tags and stores zero to the associated data locations, where the size of N is identified in DCZID_EL0.BS, and the Allocation Tag is taken from the source register bits<3:0>.

This instruction is undefined at EL0.

This instruction generates an Unchecked access.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract (extended register) subtracts a sign or zero-extended register value, followed by an optional left shift amount, from a register value, and writes the result to the destination register. The argument that is extended from the register can be a byte, halfword, word, or doubleword.", + "html": "

Subtract (extended register) subtracts a sign or zero-extended register value, followed by an optional left shift amount, from a register value, and writes the result to the destination register. The argument that is extended from the <Rm> register can be a byte, halfword, word, or doubleword.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract (immediate) subtracts an optionally-shifted immediate value from a register value, and writes the result to the destination register.", + "html": "

Subtract (immediate) subtracts an optionally-shifted immediate value from a register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract (shifted register) subtracts an optionally-shifted register value from a register value, and writes the result to the destination register.", + "html": "

Subtract (shifted register) subtracts an optionally-shifted register value from a register value, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Subtract (vector). This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract active elements of the second source vector from corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active elements of the second source vector from corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract an unsigned immediate from each element of the source vector, and destructively place the results in the corresponding elements of the source vector. This instruction is unpredicated.", + "html": "

Subtract an unsigned immediate from each element of the source vector, and destructively place the results in the corresponding elements of the source vector. This instruction is unpredicated.

The immediate is an unsigned value in the range 0 to 255, and for element widths of 16 bits or higher it may also be a positive multiple of 256 in the range 256 to 65280.

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<uimm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract all elements of the second source vector from corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract all elements of the second source vector from corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Destructively subtract all elements of the two or four source vectors from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Destructively subtract all elements of the two or four source vectors from the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 64-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract all corresponding elements of the second source vector from the two or four first source vectors and place the results in the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Subtract all corresponding elements of the second source vector from the two or four first source vectors and place the results in the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 64-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUB": + return { + "tooltip": "Subtract all corresponding elements of the two or four second source vectors from first source vectors and place the results in the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

Subtract all corresponding elements of the two or four second source vectors from first source vectors and place the results in the corresponding elements of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 64-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBG": + return { + "tooltip": "Subtract with Tag subtracts an immediate value scaled by the Tag granule from the address in the source register, modifies the Logical Address Tag of the address using an immediate value, and writes the result to the destination register. Tags specified in GCR_EL1.Exclude are excluded from the possible outputs when modifying the Logical Address Tag.", + "html": "

Subtract with Tag subtracts an immediate value scaled by the Tag granule from the address in the source register, modifies the Logical Address Tag of the address using an immediate value, and writes the result to the destination register. Tags specified in GCR_EL1.Exclude are excluded from the possible outputs when modifying the Logical Address Tag.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBHN": + case "SUBHN2": + return { + "tooltip": "Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.", + "html": "

Subtract returning High Narrow. This instruction subtracts each vector element in the second source SIMD&FP register from the corresponding vector element in the first source SIMD&FP register, places the most significant half of the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are signed integer values.

The results are truncated. For rounded results, see RSUBHN.

The SUBHN instruction writes the vector to the lower half of the destination register and clears the upper half, while the SUBHN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBHNB": + return { + "tooltip": "Subtract each vector element of the second source vector from the corresponding vector element in the first source vector, and place the most significant half of the result in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. This instruction is unpredicated.", + "html": "

Subtract each vector element of the second source vector from the corresponding vector element in the first source vector, and place the most significant half of the result in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBHNT": + return { + "tooltip": "Subtract each vector element of the second source vector from the corresponding vector element in the first source vector, and place the most significant half of the result in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. This instruction is unpredicated.", + "html": "

Subtract each vector element of the second source vector from the corresponding vector element in the first source vector, and place the most significant half of the result in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBP": + return { + "tooltip": "Subtract Pointer subtracts the 56-bit address held in the second source register from the 56-bit address held in the first source register, sign-extends the result to 64-bits, and writes the result to the destination register.", + "html": "

Subtract Pointer subtracts the 56-bit address held in the second source register from the 56-bit address held in the first source register, sign-extends the result to 64-bits, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBPS": + return { + "tooltip": "Subtract Pointer, setting Flags subtracts the 56-bit address held in the second source register from the 56-bit address held in the first source register, sign-extends the result to 64-bits, and writes the result to the destination register. It updates the condition flags based on the result of the subtraction.", + "html": "

Subtract Pointer, setting Flags subtracts the 56-bit address held in the second source register from the 56-bit address held in the first source register, sign-extends the result to 64-bits, and writes the result to the destination register. It updates the condition flags based on the result of the subtraction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBR": + return { + "tooltip": "Reversed subtract active elements of the first source vector from corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Reversed subtract active elements of the first source vector from corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBR": + return { + "tooltip": "Reversed subtract from an unsigned immediate each element of the source vector, and destructively place the results in the corresponding elements of the source vector. This instruction is unpredicated.", + "html": "

Reversed subtract from an unsigned immediate each element of the source vector, and destructively place the results in the corresponding elements of the source vector. This instruction is unpredicated.

The immediate is an unsigned value in the range 0 to 255, and for element widths of 16 bits or higher it may also be a positive multiple of 256 in the range 256 to 65280.

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<uimm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBS": + return { + "tooltip": "Subtract (extended register), setting flags, subtracts a sign or zero-extended register value, followed by an optional left shift amount, from a register value, and writes the result to the destination register. The argument that is extended from the register can be a byte, halfword, word, or doubleword. It updates the condition flags based on the result.", + "html": "

Subtract (extended register), setting flags, subtracts a sign or zero-extended register value, followed by an optional left shift amount, from a register value, and writes the result to the destination register. The argument that is extended from the <Rm> register can be a byte, halfword, word, or doubleword. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBS": + return { + "tooltip": "Subtract (immediate), setting flags, subtracts an optionally-shifted immediate value from a register value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Subtract (immediate), setting flags, subtracts an optionally-shifted immediate value from a register value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUBS": + return { + "tooltip": "Subtract (shifted register), setting flags, subtracts an optionally-shifted register value from a register value, and writes the result to the destination register. It updates the condition flags based on the result.", + "html": "

Subtract (shifted register), setting flags, subtracts an optionally-shifted register value from a register value, and writes the result to the destination register. It updates the condition flags based on the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUDOT": + return { + "tooltip": "Dot product index form with signed and unsigned integers. This instruction performs the dot product of the four signed 8-bit integer values in each 32-bit element of the first source register with the four unsigned 8-bit integer values in an indexed 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination vector.", + "html": "

Dot product index form with signed and unsigned integers. This instruction performs the dot product of the four signed 8-bit integer values in each 32-bit element of the first source register with the four unsigned 8-bit integer values in an indexed 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination vector.

From Armv8.2 to Armv8.5, this is an optional instruction. From Armv8.6 it is mandatory for implementations that include Advanced SIMD to support it. ID_AA64ISAR1_EL1.I8MM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUDOT": + return { + "tooltip": "The signed by unsigned integer indexed dot product instruction computes the dot product of a group of four signed 8-bit integer values held in each 32-bit element of the first source vector multiplied by a group of four unsigned 8-bit integer values in an indexed 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.", + "html": "

The signed by unsigned integer indexed dot product instruction computes the dot product of a group of four signed 8-bit integer values held in each 32-bit element of the first source vector multiplied by a group of four unsigned 8-bit integer values in an indexed 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.

The groups within the second source vector are specified using an immediate index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3. This instruction is unpredicated.

ID_AA64ZFR0_EL1.I8MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUDOT": + return { + "tooltip": "The signed by unsigned integer dot product instruction computes the dot product of four signed 8-bit integer values held in each 32-bit element of the two or four first source vectors and four unsigned 8-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.", + "html": "

The signed by unsigned integer dot product instruction computes the dot product of four signed 8-bit integer values held in each 32-bit element of the two or four first source vectors and four unsigned 8-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3, encoded in 2 bits. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUDOT": + return { + "tooltip": "The signed by unsigned integer dot product instruction computes the dot product of four signed 8-bit integer values held in each 32-bit element of the two or four first source vectors and four unsigned 8-bit integer values in the corresponding 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The signed by unsigned integer dot product instruction computes the dot product of four signed 8-bit integer values held in each 32-bit element of the two or four first source vectors and four unsigned 8-bit integer values in the corresponding 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUMLALL": + return { + "tooltip": "This signed by unsigned integer multiply-add long-long instruction multiplies each signed 8-bit element in the one, two, or four first source vectors with each unsigned 8-bit indexed element of the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups.", + "html": "

This signed by unsigned integer multiply-add long-long instruction multiplies each signed 8-bit element in the one, two, or four first source vectors with each unsigned 8-bit indexed element of the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The element index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 4 bits. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUMLALL": + return { + "tooltip": "This signed by unsigned integer multiply-add long-long instruction multiplies each signed 8-bit element in the two or four first source vectors with each unsigned 8-bit element in the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This signed by unsigned integer multiply-add long-long instruction multiplies each signed 8-bit element in the two or four first source vectors with each unsigned 8-bit element in the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUMOPA": + return { + "tooltip": "The 8-bit integer variant works with a 32-bit element ZA tile.", + "html": "

The 8-bit integer variant works with a 32-bit element ZA tile.

The 16-bit integer variant works with a 64-bit element ZA tile.

The signed by unsigned integer sum of outer products and accumulate instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. In case of the 8-bit integer variant, the first source holds SVLS\u00d74 sub-matrix of signed 8-bit integer values, and the second source holds 4\u00d7SVLS sub-matrix of unsigned 8-bit integer values. In case of the 16-bit integer variant, the first source holds SVLD\u00d74 sub-matrix of signed 16-bit integer values, and the second source holds 4\u00d7SVLD sub-matrix of unsigned 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When an 8-bit source element in case of 8-bit integer variant or a 16-bit source element in case of 16-bit integer variant is Inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer or SVLD\u00d7SVLD widened 64-bit integer sum of outer products is then destructively added to the 32-bit integer or 64-bit integer destination tile, respectively for 8-bit integer and 16-bit integer instruction variants. This is equivalent to performing a 4-way dot product and accumulate to each of the destination tile elements.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUMOPS": + return { + "tooltip": "The 8-bit integer variant works with a 32-bit element ZA tile.", + "html": "

The 8-bit integer variant works with a 32-bit element ZA tile.

The 16-bit integer variant works with a 64-bit element ZA tile.

The signed by unsigned integer sum of outer products and subtract instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. In case of the 8-bit integer variant, the first source holds SVLS\u00d74 sub-matrix of signed 8-bit integer values, and the second source holds 4\u00d7SVLS sub-matrix of unsigned 8-bit integer values. In case of the 16-bit integer variant, the first source holds SVLD\u00d74 sub-matrix of signed 16-bit integer values, and the second source holds 4\u00d7SVLD sub-matrix of unsigned 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When an 8-bit source element in case of 8-bit integer variant or a 16-bit source element in case of 16-bit integer variant is Inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer or SVLD\u00d7SVLD widened 64-bit integer sum of outer products is then destructively subtracted from the 32-bit integer or 64-bit integer destination tile, respectively for 8-bit integer and 16-bit integer instruction variants. This is equivalent to performing a 4-way dot product and subtract from each of the destination tile elements.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUNPK": + return { + "tooltip": "Unpack elements from one or two source vectors and then sign-extend them to place in elements of twice their size within the two or four destination vectors.", + "html": "

Unpack elements from one or two source vectors and then sign-extend them to place in elements of twice their size within the two or four destination vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUNPKHI": + case "SUNPKLO": + return { + "tooltip": "Unpack elements from the lowest or highest half of the source vector and then sign-extend them to place in elements of twice their size within the destination vector. This instruction is unpredicated.", + "html": "

Unpack elements from the lowest or highest half of the source vector and then sign-extend them to place in elements of twice their size within the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUQADD": + return { + "tooltip": "Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.", + "html": "

Signed saturating Accumulate of Unsigned value. This instruction adds the unsigned integer values of the vector elements in the source SIMD&FP register to corresponding signed integer values of the vector elements in the destination SIMD&FP register, and writes the resulting signed integer values to the destination SIMD&FP register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUQADD": + return { + "tooltip": "Add active unsigned elements of the source vector to the corresponding signed elements of the addend vector, and destructively place the results in the corresponding elements of the addend vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active unsigned elements of the source vector to the corresponding signed elements of the addend vector, and destructively place the results in the corresponding elements of the addend vector. Each result element is saturated to the N-bit element's signed integer range -2(N-1) to (2(N-1) )-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SUVDOT": + return { + "tooltip": "The signed by unsigned integer vertical dot product instruction computes the vertical dot product of the corresponding signed 8-bit elements from the four first source vectors and four unsigned 8-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.", + "html": "

The signed by unsigned integer vertical dot product instruction computes the vertical dot product of the corresponding signed 8-bit elements from the four first source vectors and four unsigned 8-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3, encoded in 2 bits.

The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the ZA operand consists of four ZA single-vector groups. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SVC": + return { + "tooltip": "Supervisor Call causes an exception to be taken to EL1.", + "html": "

Supervisor Call causes an exception to be taken to EL1.

On executing an SVC instruction, the PE records the exception as a Supervisor Call exception in ESR_ELx, using the EC value 0x15, and the value of the immediate argument.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SVDOT": + return { + "tooltip": "The signed integer vertical dot product instruction computes the vertical dot product of the corresponding two signed 16-bit integer values held in the two first source vectors and two signed 16-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product results are destructively added to the corresponding 32-bit element of the ZA single-vector groups.", + "html": "

The signed integer vertical dot product instruction computes the vertical dot product of the corresponding two signed 16-bit integer values held in the two first source vectors and two signed 16-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product results are destructively added to the corresponding 32-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3, encoded in 2 bits.

The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the ZA operand consists of two ZA single-vector groups. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SVDOT": + return { + "tooltip": "The signed integer vertical dot product instruction computes the vertical dot product of the corresponding four signed 8-bit or 16-bit integer values held in the four first source vectors and four signed 8-bit or 16-bit integer values in the corresponding indexed 32-bit or 64-bit element of the second source vector. The widened dot product results are destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups.", + "html": "

The signed integer vertical dot product instruction computes the vertical dot product of the corresponding four signed 8-bit or 16-bit integer values held in the four first source vectors and four signed 8-bit or 16-bit integer values in the corresponding indexed 32-bit or 64-bit element of the second source vector. The widened dot product results are destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to one less than the number of groups per 128-bit segment, encoded in 1 to 2 bits depending on the size of the group.

The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the ZA operand consists of four ZA single-vector groups. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SWP": + case "SWPA": + case "SWPAL": + case "SWPL": + return { + "tooltip": "Swap word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from a memory location, and stores the value held in a register back to the same memory location. The value initially loaded from memory is returned in the destination register.", + "html": "

Swap word or doubleword in memory atomically loads a 32-bit word or 64-bit doubleword from a memory location, and stores the value held in a register back to the same memory location. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SWPAB": + case "SWPALB": + case "SWPB": + case "SWPLB": + return { + "tooltip": "Swap byte in memory atomically loads an 8-bit byte from a memory location, and stores the value held in a register back to the same memory location. The value initially loaded from memory is returned in the destination register.", + "html": "

Swap byte in memory atomically loads an 8-bit byte from a memory location, and stores the value held in a register back to the same memory location. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SWPAH": + case "SWPALH": + case "SWPH": + case "SWPLH": + return { + "tooltip": "Swap halfword in memory atomically loads a 16-bit halfword from a memory location, and stores the value held in a register back to the same memory location. The value initially loaded from memory is returned in the destination register.", + "html": "

Swap halfword in memory atomically loads a 16-bit halfword from a memory location, and stores the value held in a register back to the same memory location. The value initially loaded from memory is returned in the destination register.

For more information about memory ordering semantics, see Load-Acquire, Store-Release.

For information about memory accesses, see Load/Store addressing modes.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SWPP": + case "SWPPA": + case "SWPPAL": + case "SWPPL": + return { + "tooltip": "Swap quadword in memory atomically loads a 128-bit quadword from a memory location, and stores the value held in a pair of registers back to the same memory location. The value initially loaded from memory is returned in the same pair of registers.", + "html": "

Swap quadword in memory atomically loads a 128-bit quadword from a memory location, and stores the value held in a pair of registers back to the same memory location. The value initially loaded from memory is returned in the same pair of registers.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SXTB": + return { + "tooltip": "Signed Extend Byte extracts an 8-bit value from a register, sign-extends it to the size of the register, and writes the result to the destination register.", + "html": "

Signed Extend Byte extracts an 8-bit value from a register, sign-extends it to the size of the register, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SXTB": + case "SXTH": + case "SXTW": + return { + "tooltip": "Sign-extend the least-significant sub-element of each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Sign-extend the least-significant sub-element of each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SXTH": + return { + "tooltip": "Sign Extend Halfword extracts a 16-bit value, sign-extends it to the size of the register, and writes the result to the destination register.", + "html": "

Sign Extend Halfword extracts a 16-bit value, sign-extends it to the size of the register, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SXTL": + case "SXTL2": + return { + "tooltip": "Signed extend Long. This instruction duplicates each vector element in the lower or upper half of the source SIMD&FP register into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.", + "html": "

Signed extend Long. This instruction duplicates each vector element in the lower or upper half of the source SIMD&FP register into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are signed integer values.

The SXTL instruction extracts the source vector from the lower half of the source register. The SXTL2 instruction extracts the source vector from the upper half of the source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SXTW": + return { + "tooltip": "Sign Extend Word sign-extends a word to the size of the register, and writes the result to the destination register.", + "html": "

Sign Extend Word sign-extends a word to the size of the register, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SYS": + return { + "tooltip": "System instruction. For more information, see Op0 equals 0b01, cache maintenance, TLB maintenance, and address translation instructions for the encodings of System instructions.", + "html": "

System instruction. For more information, see Op0 equals 0b01, cache maintenance, TLB maintenance, and address translation instructions for the encodings of System instructions.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SYSL": + return { + "tooltip": "System instruction with result. For more information, see Op0 equals 0b01, cache maintenance, TLB maintenance, and address translation instructions for the encodings of System instructions.", + "html": "

System instruction with result. For more information, see Op0 equals 0b01, cache maintenance, TLB maintenance, and address translation instructions for the encodings of System instructions.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "SYSP": + return { + "tooltip": "128-bit System instruction.", + "html": "

128-bit System instruction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TBL": + return { + "tooltip": "Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.", + "html": "

Table vector Lookup. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the result for that lookup is 0. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TBL": + return { + "tooltip": "Reads each element of the second source (index) vector and uses its value to select an indexed element from a table of elements consisting of one or two consecutive vector registers, where the first or only vector holds the lower numbered elements, and places the indexed table element in the destination vector element corresponding to the index vector element. If an index value is greater than or equal to the number of vector elements then it places zero in the corresponding destination vector element.", + "html": "

Reads each element of the second source (index) vector and uses its value to select an indexed element from a table of elements consisting of one or two consecutive vector registers, where the first or only vector holds the lower numbered elements, and places the indexed table element in the destination vector element corresponding to the index vector element. If an index value is greater than or equal to the number of vector elements then it places zero in the corresponding destination vector element.

Since the index values can select any element in a vector this operation is not naturally vector length agnostic.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TBLQ": + return { + "tooltip": "For each 128-bit destination vector segment, reads each element of the corresponding second source (index) vector segment and uses its value to select an indexed element from the corresponding first source (table) vector segment. The indexed table element is placed in the element of the destination vector that corresponds to the index vector element. If an index value is greater than or equal to the number of elements in a 128-bit vector segment then it places zero in the corresponding destination vector element. This instruction is unpredicated.", + "html": "

For each 128-bit destination vector segment, reads each element of the corresponding second source (index) vector segment and uses its value to select an indexed element from the corresponding first source (table) vector segment. The indexed table element is placed in the element of the destination vector that corresponds to the index vector element. If an index value is greater than or equal to the number of elements in a 128-bit vector segment then it places zero in the corresponding destination vector element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TBNZ": + return { + "tooltip": "Test bit and Branch if Nonzero compares the value of a bit in a general-purpose register with zero, and conditionally branches to a label at a PC-relative offset if the comparison is not equal. It provides a hint that this is not a subroutine call or return. This instruction does not affect condition flags.", + "html": "

Test bit and Branch if Nonzero compares the value of a bit in a general-purpose register with zero, and conditionally branches to a label at a PC-relative offset if the comparison is not equal. It provides a hint that this is not a subroutine call or return. This instruction does not affect condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TBX": + return { + "tooltip": "Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.", + "html": "

Table vector lookup extension. This instruction reads each value from the vector elements in the index source SIMD&FP register, uses each result as an index to perform a lookup in a table of bytes that is described by one to four source table SIMD&FP registers, places the lookup result in a vector, and writes the vector to the destination SIMD&FP register. If an index is out of range for the table, the existing value in the vector element of the destination register is left unchanged. If more than one source register is used to describe the table, the first source register describes the lowest bytes of the table.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TBX": + return { + "tooltip": "Reads each element of the second source (index) vector and uses its value to select an indexed element from a table of elements in the first source vector, and places the indexed element in the destination vector element corresponding to the index vector element. If an index value is greater than or equal to the number of vector elements then the corresponding destination vector element is left unchanged.", + "html": "

Reads each element of the second source (index) vector and uses its value to select an indexed element from a table of elements in the first source vector, and places the indexed element in the destination vector element corresponding to the index vector element. If an index value is greater than or equal to the number of vector elements then the corresponding destination vector element is left unchanged.

Since the index values can select any element in a vector this operation is not naturally vector length agnostic.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TBXQ": + return { + "tooltip": "For each 128-bit destination vector segment, reads each element of the corresponding second source (index) vector segment and uses its value to select an indexed element from the corresponding first source (table) vector segment. The indexed table element is placed in the element of the destination vector that corresponds to the index vector element. If an index value is greater than or equal to the number of elements in a 128-bit vector segment then the corresponding destination vector element is left unchanged. This instruction is unpredicated.", + "html": "

For each 128-bit destination vector segment, reads each element of the corresponding second source (index) vector segment and uses its value to select an indexed element from the corresponding first source (table) vector segment. The indexed table element is placed in the element of the destination vector that corresponds to the index vector element. If an index value is greater than or equal to the number of elements in a 128-bit vector segment then the corresponding destination vector element is left unchanged. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TBZ": + return { + "tooltip": "Test bit and Branch if Zero compares the value of a test bit with zero, and conditionally branches to a label at a PC-relative offset if the comparison is equal. It provides a hint that this is not a subroutine call or return. This instruction does not affect condition flags.", + "html": "

Test bit and Branch if Zero compares the value of a test bit with zero, and conditionally branches to a label at a PC-relative offset if the comparison is equal. It provides a hint that this is not a subroutine call or return. This instruction does not affect condition flags.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TCANCEL": + return { + "tooltip": "This instruction exits Transactional state and discards all state modifications that were performed transactionally. Execution continues at the instruction that follows the TSTART instruction of the outer transaction. The destination register of the TSTART instruction of the outer transaction is written with the immediate operand of TCANCEL.", + "html": "

This instruction exits Transactional state and discards all state modifications that were performed transactionally. Execution continues at the instruction that follows the TSTART instruction of the outer transaction. The destination register of the TSTART instruction of the outer transaction is written with the immediate operand of TCANCEL.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TCOMMIT": + return { + "tooltip": "This instruction commits the current transaction. If the current transaction is an outer transaction, then Transactional state is exited, and all state modifications performed transactionally are committed to the architectural state. TCOMMIT takes no inputs and returns no value.", + "html": "

This instruction commits the current transaction. If the current transaction is an outer transaction, then Transactional state is exited, and all state modifications performed transactionally are committed to the architectural state. TCOMMIT takes no inputs and returns no value.

Execution of TCOMMIT is UNDEFINED in Non-transactional state.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TLBI": + return { + "tooltip": "TLB Invalidate operation. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.", + "html": "

TLB Invalidate operation. For more information, see op0==0b01, cache maintenance, TLB maintenance, and address translation instructions.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TLBIP": + return { + "tooltip": "TLB Invalidate Pair operation.", + "html": "

TLB Invalidate Pair operation.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TRCIT": + return { + "tooltip": "Trace Instrumentation generates an instrumentation trace packet that contains the value of the provided register.", + "html": "

Trace Instrumentation generates an instrumentation trace packet that contains the value of the provided register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TRN1": + return { + "tooltip": "Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.", + "html": "

Transpose vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

By using this instruction with TRN2, a 2 x 2 matrix can be transposed.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TRN1": + case "TRN2": + return { + "tooltip": "Interleave alternating even or odd-numbered elements from the first and second source predicates and place in elements of the destination predicate. This instruction is unpredicated.", + "html": "

Interleave alternating even or odd-numbered elements from the first and second source predicates and place in elements of the destination predicate. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TRN1": + case "TRN2": + return { + "tooltip": "Interleave alternating even or odd-numbered elements from the first and second source vectors and place in elements of the destination vector. This instruction is unpredicated.", + "html": "

Interleave alternating even or odd-numbered elements from the first and second source vectors and place in elements of the destination vector. This instruction is unpredicated.

The 128-bit element variant requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits are set to zero. ID_AA64ZFR0_EL1.F64MM indicates whether the 128-bit element variant is implemented. The 128-bit element variant is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TRN2": + return { + "tooltip": "Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.", + "html": "

Transpose vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places each result into consecutive elements of a vector, and writes the vector to the destination SIMD&FP register. Vector elements from the first source register are placed into even-numbered elements of the destination vector, starting at zero, while vector elements from the second source register are placed into odd-numbered elements of the destination vector.

By using this instruction with TRN1, a 2 x 2 matrix can be transposed.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TSBCSYNC": + return { + "tooltip": "Trace Synchronization Barrier. This instruction is a barrier that synchronizes the trace operations of instructions, see Trace Synchronization Buffer (TSB CSYNC).", + "html": "

Trace Synchronization Barrier. This instruction is a barrier that synchronizes the trace operations of instructions, see Trace Synchronization Buffer (TSB CSYNC).

If FEAT_TRF is not implemented, this instruction executes as a NOP.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TST": + return { + "tooltip": "Test (shifted register) performs a bitwise AND operation on a register value and an optionally-shifted register value. It updates the condition flags based on the result, and discards the result.", + "html": "

Test (shifted register) performs a bitwise AND operation on a register value and an optionally-shifted register value. It updates the condition flags based on the result, and discards the result.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TSTART": + return { + "tooltip": "This instruction starts a new transaction. If the transaction started successfully, the destination register is set to zero. If the transaction failed or was canceled, then all state modifications that were performed transactionally are discarded and the destination register is written with a nonzero value that encodes the cause of the failure.", + "html": "

This instruction starts a new transaction. If the transaction started successfully, the destination register is set to zero. If the transaction failed or was canceled, then all state modifications that were performed transactionally are discarded and the destination register is written with a nonzero value that encodes the cause of the failure.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "TTEST": + return { + "tooltip": "This instruction writes the depth of the transaction to the destination register, or the value 0 otherwise.", + "html": "

This instruction writes the depth of the transaction to the destination register, or the value 0 otherwise.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABA": + return { + "tooltip": "Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.", + "html": "

Unsigned Absolute difference and Accumulate. This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the elements of the vector of the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABA": + return { + "tooltip": "Compute the absolute difference between unsigned integer values in elements of the second source vector and corresponding elements of the first source vector, and add the difference to the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between unsigned integer values in elements of the second source vector and corresponding elements of the first source vector, and add the difference to the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABAL": + case "UABAL2": + return { + "tooltip": "Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Absolute difference and Accumulate Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, and accumulates the absolute values of the results into the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

The UABAL instruction extracts each source vector from the lower half of each source register. The UABAL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABALB": + return { + "tooltip": "Compute the absolute difference between even-numbered unsigned elements of the second source vector and corresponding elements of the first source vector, and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between even-numbered unsigned elements of the second source vector and corresponding elements of the first source vector, and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABALT": + return { + "tooltip": "Compute the absolute difference between odd-numbered unsigned elements of the second source vector and corresponding elements of the first source vector, and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between odd-numbered unsigned elements of the second source vector and corresponding elements of the first source vector, and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABD": + return { + "tooltip": "Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Absolute Difference (vector). This instruction subtracts the elements of the vector of the second source SIMD&FP register from the corresponding elements of the first source SIMD&FP register, places the absolute values of the results into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABD": + return { + "tooltip": "Compute the absolute difference between unsigned integer values in active elements of the second source vector and corresponding elements of the first source vector and destructively place the difference in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Compute the absolute difference between unsigned integer values in active elements of the second source vector and corresponding elements of the first source vector and destructively place the difference in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABDL": + case "UABDL2": + return { + "tooltip": "Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Absolute Difference Long. This instruction subtracts the vector elements in the lower or upper half of the second source SIMD&FP register from the corresponding vector elements of the first source SIMD&FP register, places the absolute value of the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

The UABDL instruction extracts each source vector from the lower half of each source register. The UABDL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABDLB": + return { + "tooltip": "Compute the absolute difference between the even-numbered unsigned integer values in elements of the second source vector and the corresponding elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between the even-numbered unsigned integer values in elements of the second source vector and the corresponding elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UABDLT": + return { + "tooltip": "Compute the absolute difference between the odd-numbered unsigned integer values in elements of the second source vector and corresponding elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Compute the absolute difference between the odd-numbered unsigned integer values in elements of the second source vector and corresponding elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADALP": + return { + "tooltip": "Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Unsigned Add and Accumulate Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADALP": + return { + "tooltip": "Add pairs of adjacent unsigned integer values and accumulate the results into the overlapping double-width elements of the destination vector.", + "html": "

Add pairs of adjacent unsigned integer values and accumulate the results into the overlapping double-width elements of the destination vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDL": + case "UADDL2": + return { + "tooltip": "Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Add Long (vector). This instruction adds each vector element in the lower or upper half of the first source SIMD&FP register to the corresponding vector element of the second source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

The UADDL instruction extracts each source vector from the lower half of each source register. The UADDL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDLB": + return { + "tooltip": "Add the corresponding even-numbered unsigned elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the corresponding even-numbered unsigned elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDLP": + return { + "tooltip": "Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Unsigned Add Long Pairwise. This instruction adds pairs of adjacent unsigned integer values from the vector in the source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDLT": + return { + "tooltip": "Add the corresponding odd-numbered unsigned elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the corresponding odd-numbered unsigned elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDLV": + return { + "tooltip": "Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned sum Long across Vector. This instruction adds every vector element in the source SIMD&FP register together, and writes the scalar result to the destination SIMD&FP register. The destination scalar is twice as long as the source vector elements. All the values in this instruction are unsigned integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDV": + return { + "tooltip": "Unsigned add horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Narrow elements are first zero-extended to 64 bits. Inactive elements in the source vector are treated as zero.", + "html": "

Unsigned add horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Narrow elements are first zero-extended to 64 bits. Inactive elements in the source vector are treated as zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDW": + case "UADDW2": + return { + "tooltip": "Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Add Wide. This instruction adds the vector elements of the first source SIMD&FP register to the corresponding vector elements in the lower or upper half of the second source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register. All the values in this instruction are unsigned integer values.

The UADDW instruction extracts vector elements from the lower half of the second source register. The UADDW2 instruction extracts vector elements from the upper half of the second source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDWB": + return { + "tooltip": "Add the even-numbered unsigned elements of the second source vector to the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the even-numbered unsigned elements of the second source vector to the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UADDWT": + return { + "tooltip": "Add the odd-numbered unsigned elements of the second source vector to the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Add the odd-numbered unsigned elements of the second source vector to the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UBFIZ": + return { + "tooltip": "Unsigned Bitfield Insert in Zeros copies a bitfield of bits from the least significant bits of the source register to bit position of the destination register, setting the destination bits above and below the bitfield to zero.", + "html": "

Unsigned Bitfield Insert in Zeros copies a bitfield of <width> bits from the least significant bits of the source register to bit position <lsb> of the destination register, setting the destination bits above and below the bitfield to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UBFM": + return { + "tooltip": "Unsigned Bitfield Move is usually accessed via one of its aliases, which are always preferred for disassembly.", + "html": "

Unsigned Bitfield Move is usually accessed via one of its aliases, which are always preferred for disassembly.

If <imms> is greater than or equal to <immr>, this copies a bitfield of (<imms>-<immr>+1) bits starting from bit position <immr> in the source register to the least significant bits of the destination register.

If <imms> is less than <immr>, this copies a bitfield of (<imms>+1) bits from the least significant bits of the source register to bit position (regsize-<immr>) of the destination register, where regsize is the destination register size of 32 or 64 bits.

In both cases the destination bits below and above the bitfield are set to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UBFX": + return { + "tooltip": "Unsigned Bitfield Extract copies a bitfield of bits starting from bit position in the source register to the least significant bits of the destination register, and sets destination bits above the bitfield to zero.", + "html": "

Unsigned Bitfield Extract copies a bitfield of <width> bits starting from bit position <lsb> in the source register to the least significant bits of the destination register, and sets destination bits above the bitfield to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UCLAMP": + return { + "tooltip": "Clamp each unsigned element in the two or four destination vectors to between the unsigned minimum value in the corresponding element of the first source vector and the unsigned maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the two or four destination vectors.", + "html": "

Clamp each unsigned element in the two or four destination vectors to between the unsigned minimum value in the corresponding element of the first source vector and the unsigned maximum value in the corresponding element of the second source vector and destructively place the clamped results in the corresponding elements of the two or four destination vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UCLAMP": + return { + "tooltip": "Clamp each unsigned element in the destination vector to between the unsigned minimum value in the corresponding element of the first source vector and the unsigned maximum value in the corresponding element of the second source vector and destructively write the results in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Clamp each unsigned element in the destination vector to between the unsigned minimum value in the corresponding element of the first source vector and the unsigned maximum value in the corresponding element of the second source vector and destructively write the results in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UCVTF": + return { + "tooltip": "Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Unsigned fixed-point Convert to Floating-point (vector). This instruction converts each element in a vector from fixed-point to floating-point using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UCVTF": + return { + "tooltip": "Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Unsigned integer Convert to Floating-point (vector). This instruction converts each element in a vector from an unsigned integer value to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UCVTF": + return { + "tooltip": "Unsigned fixed-point Convert to Floating-point (scalar). This instruction converts the unsigned value in the 32-bit or 64-bit general-purpose source register to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Unsigned fixed-point Convert to Floating-point (scalar). This instruction converts the unsigned value in the 32-bit or 64-bit general-purpose source register to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the Security state and Exception level in which the instruction is executed, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UCVTF": + return { + "tooltip": "Unsigned integer Convert to Floating-point (scalar). This instruction converts the unsigned integer value in the general-purpose source register to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.", + "html": "

Unsigned integer Convert to Floating-point (scalar). This instruction converts the unsigned integer value in the general-purpose source register to a floating-point value using the rounding mode that is specified by the FPCR, and writes the result to the SIMD&FP destination register.

A floating-point exception can be generated by this instruction. Depending on the settings in FPCR, the exception results in either a flag being set in FPSR, or a synchronous exception being generated. For more information, see Floating-point exception traps.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UCVTF": + return { + "tooltip": "Convert to single-precision from unsigned 32-bit integer, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.", + "html": "

Convert to single-precision from unsigned 32-bit integer, each element of the two or four source vectors, and place the results in the corresponding elements of the two or four destination vectors.

This instruction follows SME2 floating-point numerical behaviors corresponding to instructions that place their results in one or more SVE Z vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UCVTF": + return { + "tooltip": "Convert to floating-point from the unsigned integer in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Convert to floating-point from the unsigned integer in each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

If the input and result types have a different size the smaller type is held unpacked in the least significant bits of elements of the larger size. When the input is the smaller type the upper bits of each source element are ignored. When the result is the smaller type the results are zero-extended to fill each destination element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDF": + return { + "tooltip": "Permanently Undefined generates an Undefined Instruction exception (ESR_ELx.EC = 0b000000). The encodings for UDF used in this section are defined as permanently undefined.", + "html": "

Permanently Undefined generates an Undefined Instruction exception (ESR_ELx.EC = 0b000000). The encodings for UDF used in this section are defined as permanently undefined.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDIV": + return { + "tooltip": "Unsigned Divide divides an unsigned integer register value by another unsigned integer register value, and writes the result to the destination register. The condition flags are not affected.", + "html": "

Unsigned Divide divides an unsigned integer register value by another unsigned integer register value, and writes the result to the destination register. The condition flags are not affected.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDIV": + return { + "tooltip": "Unsigned divide active elements of the first source vector by corresponding elements of the second source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Unsigned divide active elements of the first source vector by corresponding elements of the second source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDIVR": + return { + "tooltip": "Unsigned reversed divide active elements of the second source vector by corresponding elements of the first source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Unsigned reversed divide active elements of the second source vector by corresponding elements of the first source vector and destructively place the quotient in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "Dot Product unsigned arithmetic (vector, by element). This instruction performs the dot product of the four 8-bit elements in each 32-bit element of the first source register with the four 8-bit elements of an indexed 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.", + "html": "

Dot Product unsigned arithmetic (vector, by element). This instruction performs the dot product of the four 8-bit elements in each 32-bit element of the first source register with the four 8-bit elements of an indexed 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

In Armv8.2 and Armv8.3, this is an optional instruction. From Armv8.4 it is mandatory for all implementations to support it.

ID_AA64ISAR0_EL1.DP indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "Dot Product unsigned arithmetic (vector). This instruction performs the dot product of the four unsigned 8-bit elements in each 32-bit element of the first source register with the four unsigned 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.", + "html": "

Dot Product unsigned arithmetic (vector). This instruction performs the dot product of the four unsigned 8-bit elements in each 32-bit element of the first source register with the four unsigned 8-bit elements of the corresponding 32-bit element in the second source register, accumulating the result into the corresponding 32-bit element of the destination register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

In Armv8.2 and Armv8.3, this is an optional instruction. From Armv8.4 it is mandatory for all implementations to support it.

ID_AA64ISAR0_EL1.DP indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer dot product instruction computes the dot product of a group of two unsigned 16-bit integer values held in each 32-bit element of the first source vector multiplied by a group of two unsigned 16-bit integer values in the corresponding 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.", + "html": "

The unsigned integer dot product instruction computes the dot product of a group of two unsigned 16-bit integer values held in each 32-bit element of the first source vector multiplied by a group of two unsigned 16-bit integer values in the corresponding 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer indexed dot product instruction computes the dot product of a group of two unsigned 16-bit integer values held in each 32-bit element of the first source vector multiplied by a group of two unsigned 16-bit integer values in an indexed 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.", + "html": "

The unsigned integer indexed dot product instruction computes the dot product of a group of two unsigned 16-bit integer values held in each 32-bit element of the first source vector multiplied by a group of two unsigned 16-bit integer values in an indexed 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.

The groups within the second source vector are specified using an immediate index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer dot product instruction computes the dot product of a group of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the first source vector multiplied by a group of four unsigned 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit or 64-bit element of the destination vector.", + "html": "

The unsigned integer dot product instruction computes the dot product of a group of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the first source vector multiplied by a group of four unsigned 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit or 64-bit element of the destination vector.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer indexed dot product instruction computes the dot product of a group of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the first source vector multiplied by a group of four unsigned 8-bit or 16-bit integer values in an indexed 32-bit or 64-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit or 64-bit element of the destination vector.", + "html": "

The unsigned integer indexed dot product instruction computes the dot product of a group of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the first source vector multiplied by a group of four unsigned 8-bit or 16-bit integer values in an indexed 32-bit or 64-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit or 64-bit element of the destination vector.

The groups within the second source vector are specified using an immediate index which selects the same group position within each 128-bit vector segment. The index range is from 0 to one less than the number of groups per 128-bit segment, encoded in 1 to 2 bits depending on the size of the group. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer dot product instruction computes the dot product of two unsigned 16-bit integer values held in each 32-bit element of the two or four first source vectors and two unsigned 16-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.", + "html": "

The unsigned integer dot product instruction computes the dot product of two unsigned 16-bit integer values held in each 32-bit element of the two or four first source vectors and two unsigned 16-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3, encoded in 2 bits. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer dot product instruction computes the dot product of two unsigned 16-bit integer values held in each 32-bit element of the two or four first source vectors and two unsigned 16-bit integer values in the corresponding 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The unsigned integer dot product instruction computes the dot product of two unsigned 16-bit integer values held in each 32-bit element of the two or four first source vectors and two unsigned 16-bit integer values in the corresponding 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer dot product instruction computes the dot product of two unsigned 16-bit integer values held in each 32-bit element of the two or four first source vectors and two unsigned 16-bit integer values in the corresponding 32-bit element of the two or four second source vectors. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The unsigned integer dot product instruction computes the dot product of two unsigned 16-bit integer values held in each 32-bit element of the two or four first source vectors and two unsigned 16-bit integer values in the corresponding 32-bit element of the two or four second source vectors. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer dot product instruction computes the dot product of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four unsigned 8-bit or 16-bit integer values in the corresponding indexed 32-bit or 64-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups.", + "html": "

The unsigned integer dot product instruction computes the dot product of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four unsigned 8-bit or 16-bit integer values in the corresponding indexed 32-bit or 64-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to one less than the number of groups per 128-bit segment, encoded in 1 to 2 bits depending on the size of the group. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer dot product instruction computes the dot product of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four unsigned 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The unsigned integer dot product instruction computes the dot product of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four unsigned 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UDOT": + return { + "tooltip": "The unsigned integer dot product instruction computes the dot product of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four unsigned 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the two or four second source vectors. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The unsigned integer dot product instruction computes the dot product of four unsigned 8-bit or 16-bit integer values held in each 32-bit or 64-bit element of the two or four first source vectors and four unsigned 8-bit or 16-bit integer values in the corresponding 32-bit or 64-bit element of the two or four second source vectors. The widened dot product result is destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UHADD": + return { + "tooltip": "Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

The results are truncated. For rounded results, see URHADD.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UHADD": + return { + "tooltip": "Add active unsigned elements of the first source vector to corresponding unsigned elements of the second source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active unsigned elements of the first source vector to corresponding unsigned elements of the second source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UHSUB": + return { + "tooltip": "Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&FP register from the corresponding vector elements in the first source SIMD&FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Halving Subtract. This instruction subtracts the vector elements in the second source SIMD&FP register from the corresponding vector elements in the first source SIMD&FP register, shifts each result right one bit, places each result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UHSUB": + return { + "tooltip": "Subtract active unsigned elements of the second source vector from corresponding unsigned elements of the first source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active unsigned elements of the second source vector from corresponding unsigned elements of the first source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UHSUBR": + return { + "tooltip": "Subtract active unsigned elements of the first source vector from corresponding unsigned elements of the second source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active unsigned elements of the first source vector from corresponding unsigned elements of the second source vector, shift right one bit, and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMADDL": + return { + "tooltip": "Unsigned Multiply-Add Long multiplies two 32-bit register values, adds a 64-bit register value, and writes the result to the 64-bit destination register.", + "html": "

Unsigned Multiply-Add Long multiplies two 32-bit register values, adds a 64-bit register value, and writes the result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAX": + return { + "tooltip": "Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Maximum (vector). This instruction compares corresponding elements in the vectors in the two source SIMD&FP registers, places the larger of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAX": + return { + "tooltip": "Unsigned Maximum (immediate) determines the unsigned maximum of the source register value and immediate, and writes the result to the destination register.", + "html": "

Unsigned Maximum (immediate) determines the unsigned maximum of the source register value and immediate, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAX": + return { + "tooltip": "Determine the unsigned maximum of elements of the second source vector and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the unsigned maximum of elements of the second source vector and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAX": + return { + "tooltip": "Determine the unsigned maximum of elements of the two or four second source vectors and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the unsigned maximum of elements of the two or four second source vectors and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAX": + return { + "tooltip": "Unsigned Maximum (register) determines the unsigned maximum of the two source register values and writes the result to the destination register.", + "html": "

Unsigned Maximum (register) determines the unsigned maximum of the two source register values and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAX": + return { + "tooltip": "Determine the unsigned maximum of active elements of the second source vector and corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Determine the unsigned maximum of active elements of the second source vector and corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAX": + return { + "tooltip": "Determine the unsigned maximum of an immediate and each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is an unsigned 8-bit value in the range 0 to 255, inclusive. This instruction is unpredicated.", + "html": "

Determine the unsigned maximum of an immediate and each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is an unsigned 8-bit value in the range 0 to 255, inclusive. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAXP": + return { + "tooltip": "Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Maximum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the largest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAXP": + return { + "tooltip": "Compute the maximum value of each pair of adjacent unsigned integer elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Compute the maximum value of each pair of adjacent unsigned integer elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAXQV": + return { + "tooltip": "Unsigned maximum of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as zero.", + "html": "

Unsigned maximum of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAXV": + return { + "tooltip": "Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Maximum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the largest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMAXV": + return { + "tooltip": "Unsigned maximum horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as zero.", + "html": "

Unsigned maximum horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMIN": + return { + "tooltip": "Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Minimum (vector). This instruction compares corresponding vector elements in the two source SIMD&FP registers, places the smaller of each of the two unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMIN": + return { + "tooltip": "Unsigned Minimum (immediate) determines the unsigned minimum of the source register value and immediate, and writes the result to the destination register.", + "html": "

Unsigned Minimum (immediate) determines the unsigned minimum of the source register value and immediate, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMIN": + return { + "tooltip": "Determine the unsigned minimum of elements of the second source vector and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the unsigned minimum of elements of the second source vector and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMIN": + return { + "tooltip": "Determine the unsigned minimum of elements of the two or four second source vectors and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.", + "html": "

Determine the unsigned minimum of elements of the two or four second source vectors and the corresponding elements of the two or four first source vectors and destructively place the results in the corresponding elements of the two or four first source vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMIN": + return { + "tooltip": "Unsigned Minimum (register) determines the unsigned minimum of the two source register values and writes the result to the destination register.", + "html": "

Unsigned Minimum (register) determines the unsigned minimum of the two source register values and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMIN": + return { + "tooltip": "Determine the unsigned minimum of active elements of the second source vector and corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Determine the unsigned minimum of active elements of the second source vector and corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMIN": + return { + "tooltip": "Determine the unsigned minimum of an immediate and each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is an unsigned 8-bit value in the range 0 to 255, inclusive. This instruction is unpredicated.", + "html": "

Determine the unsigned minimum of an immediate and each element of the source vector, and destructively place the results in the corresponding elements of the source vector. The immediate is an unsigned 8-bit value in the range 0 to 255, inclusive. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMINP": + return { + "tooltip": "Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Minimum Pairwise. This instruction creates a vector by concatenating the vector elements of the first source SIMD&FP register after the vector elements of the second source SIMD&FP register, reads each pair of adjacent vector elements in the two source SIMD&FP registers, writes the smallest of each pair of unsigned integer values into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMINP": + return { + "tooltip": "Compute the minimum value of each pair of adjacent unsigned integer elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.", + "html": "

Compute the minimum value of each pair of adjacent unsigned integer elements within each source vector, and interleave the results from corresponding lanes. The interleaved result values are destructively placed in the first source vector.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMINQV": + return { + "tooltip": "Unsigned minimum of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the maximum unsigned integer for the element size.", + "html": "

Unsigned minimum of the same element numbers from each 128-bit source vector segment, placing each result into the corresponding element number of the 128-bit SIMD&FP destination register. Inactive elements in the source vector are treated as the maximum unsigned integer for the element size.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMINV": + return { + "tooltip": "Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Minimum across Vector. This instruction compares all the vector elements in the source SIMD&FP register, and writes the smallest of the values as a scalar to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMINV": + return { + "tooltip": "Unsigned minimum horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the maximum unsigned integer for the element size.", + "html": "

Unsigned minimum horizontally across all lanes of a vector, and place the result in the SIMD&FP scalar destination register. Inactive elements in the source vector are treated as the maximum unsigned integer for the element size.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLAL": + case "UMLAL2": + return { + "tooltip": "Unsigned Multiply-Add Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Unsigned Multiply-Add Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

The UMLAL instruction extracts vector elements from the lower half of the first source register. The UMLAL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLAL": + case "UMLAL2": + return { + "tooltip": "Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Unsigned Multiply-Add Long (vector). This instruction multiplies the vector elements in the lower or upper half of the first source SIMD&FP register by the corresponding vector elements of the second source SIMD&FP register, and accumulates the results with the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

The UMLAL instruction extracts vector elements from the lower half of the first source register. The UMLAL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLAL": + return { + "tooltip": "This unsigned integer multiply-add long instruction multiplies each unsigned 16-bit element in the one, two, or four first source vectors with each unsigned 16-bit indexed element of the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups.", + "html": "

This unsigned integer multiply-add long instruction multiplies each unsigned 16-bit element in the one, two, or four first source vectors with each unsigned 16-bit indexed element of the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7, encoded in 3 bits. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLAL": + return { + "tooltip": "This unsigned integer multiply-add long instruction multiplies each unsigned 16-bit element in the one, two, or four first source vectors with each unsigned 16-bit element in the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This unsigned integer multiply-add long instruction multiplies each unsigned 16-bit element in the one, two, or four first source vectors with each unsigned 16-bit element in the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLAL": + return { + "tooltip": "This unsigned integer multiply-add long instruction multiplies each unsigned 16-bit element in the two or four first source vectors with each unsigned 16-bit element in the two or four second source vectors, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This unsigned integer multiply-add long instruction multiplies each unsigned 16-bit element in the two or four first source vectors with each unsigned 16-bit element in the two or four second source vectors, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLALB": + return { + "tooltip": "Multiply the corresponding even-numbered unsigned elements of the first and second source vectors and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding even-numbered unsigned elements of the first and second source vectors and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLALB": + return { + "tooltip": "Multiply the even-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment and destructively add to the overlapping double-width elements of the addend vector.", + "html": "

Multiply the even-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment and destructively add to the overlapping double-width elements of the addend vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLALL": + return { + "tooltip": "This unsigned integer multiply-add long-long instruction multiplies each unsigned 8-bit or 16-bit element in the one, two, or four first source vectors with each unsigned 8-bit or 16-bit indexed element of second source vector, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups.", + "html": "

This unsigned integer multiply-add long-long instruction multiplies each unsigned 8-bit or 16-bit element in the one, two, or four first source vectors with each unsigned 8-bit or 16-bit indexed element of second source vector, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 3 to 4 bits depending on the size of the element. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLALL": + return { + "tooltip": "This unsigned integer multiply-add long-long instruction multiplies each unsigned 8-bit or 16-bit element in the one, two, or four first source vectors with each unsigned 8-bit or 16-bit element in the second source vector, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This unsigned integer multiply-add long-long instruction multiplies each unsigned 8-bit or 16-bit element in the one, two, or four first source vectors with each unsigned 8-bit or 16-bit element in the second source vector, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLALL": + return { + "tooltip": "This unsigned integer multiply-add long-long instruction multiplies each unsigned 8-bit or 16-bit element in the two or four first source vectors with each unsigned 8-bit or 16-bit element in the one, two, or four second source vectors, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This unsigned integer multiply-add long-long instruction multiplies each unsigned 8-bit or 16-bit element in the two or four first source vectors with each unsigned 8-bit or 16-bit element in the one, two, or four second source vectors, widens each product to 32-bits or 64-bits and destructively adds these values to the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLALT": + return { + "tooltip": "Multiply the corresponding odd-numbered unsigned elements of the first and second source vectors and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding odd-numbered unsigned elements of the first and second source vectors and destructively add to the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLALT": + return { + "tooltip": "Multiply the odd-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment and destructively add to the overlapping double-width elements of the addend vector.", + "html": "

Multiply the odd-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment and destructively add to the overlapping double-width elements of the addend vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSL": + case "UMLSL2": + return { + "tooltip": "Unsigned Multiply-Subtract Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Unsigned Multiply-Subtract Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

The UMLSL instruction extracts vector elements from the lower half of the first source register. The UMLSL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSL": + case "UMLSL2": + return { + "tooltip": "Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Multiply-Subtract Long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, and subtracts the results from the vector elements of the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

The UMLSL instruction extracts each source vector from the lower half of each source register. The UMLSL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSL": + return { + "tooltip": "This unsigned integer multiply-subtract long instruction multiplies each unsigned 16-bit element in the one, two, or four first source vectors with each unsigned 16-bit indexed element of the second source vector, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups.", + "html": "

This unsigned integer multiply-subtract long instruction multiplies each unsigned 16-bit element in the one, two, or four first source vectors with each unsigned 16-bit indexed element of the second source vector, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to 7, encoded in 3 bits. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSL": + return { + "tooltip": "This unsigned integer multiply-subtract long instruction multiplies each unsigned 16-bit element in the one, two, or four first source vectors with each unsigned 16-bit element in the second source vector, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This unsigned integer multiply-subtract long instruction multiplies each unsigned 16-bit element in the one, two, or four first source vectors with each unsigned 16-bit element in the second source vector, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSL": + return { + "tooltip": "This unsigned integer multiply-subtract long instruction multiplies each unsigned 16-bit element in the two or four first source vectors with each unsigned 16-bit element in the two or four second source vectors, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This unsigned integer multiply-subtract long instruction multiplies each unsigned 16-bit element in the two or four first source vectors with each unsigned 16-bit element in the two or four second source vectors, widens each product to 32-bits and destructively subtracts these values from the corresponding 32-bit elements of the ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSLB": + return { + "tooltip": "Multiply the corresponding even-numbered unsigned elements of the first and second source vectors and destructively subtract from the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding even-numbered unsigned elements of the first and second source vectors and destructively subtract from the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSLB": + return { + "tooltip": "Multiply the even-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment and destructively subtract from the overlapping double-width elements of the addend vector.", + "html": "

Multiply the even-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment and destructively subtract from the overlapping double-width elements of the addend vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSLL": + return { + "tooltip": "This unsigned integer multiply-subtract long-long instruction multiplies each unsigned 8-bit or 16-bit element in the one, two, or four first source vectors with each unsigned 8-bit or 16-bit indexed element of second source vector, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups.", + "html": "

This unsigned integer multiply-subtract long-long instruction multiplies each unsigned 8-bit or 16-bit element in the one, two, or four first source vectors with each unsigned 8-bit or 16-bit indexed element of second source vector, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 3 to 4 bits depending on the size of the element. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSLL": + return { + "tooltip": "This unsigned integer multiply-subtract long-long instruction multiplies each unsigned 8-bit or 16-bit element in the one, two, or four first source vectors with each unsigned 8-bit or 16-bit element in the second source vector, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This unsigned integer multiply-subtract long-long instruction multiplies each unsigned 8-bit or 16-bit element in the one, two, or four first source vectors with each unsigned 8-bit or 16-bit element in the second source vector, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSLL": + return { + "tooltip": "This unsigned integer multiply-subtract long-long instruction multiplies each unsigned 8-bit or 16-bit element in the two or four first source vectors with each unsigned 8-bit or 16-bit element in the one, two, or four second source vectors, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This unsigned integer multiply-subtract long-long instruction multiplies each unsigned 8-bit or 16-bit element in the two or four first source vectors with each unsigned 8-bit or 16-bit element in the one, two, or four second source vectors, widens each product to 32-bits or 64-bits and destructively subtracts these values from the corresponding 32-bit or 64-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

ID_AA64SMFR0_EL1.I16I64 indicates whether the 16-bit integer variant is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSLT": + return { + "tooltip": "Multiply the corresponding odd-numbered unsigned elements of the first and second source vectors and destructively subtract from the overlapping double-width elements of the addend vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding odd-numbered unsigned elements of the first and second source vectors and destructively subtract from the overlapping double-width elements of the addend vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMLSLT": + return { + "tooltip": "Multiply the odd-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment and destructively subtract from the overlapping double-width elements of the addend vector.", + "html": "

Multiply the odd-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment and destructively subtract from the overlapping double-width elements of the addend vector.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMMLA": + return { + "tooltip": "Unsigned 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of unsigned 8-bit integer values in the first source vector by the 8x2 matrix of unsigned 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.", + "html": "

Unsigned 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of unsigned 8-bit integer values in the first source vector by the 8x2 matrix of unsigned 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.

From Armv8.2 to Armv8.5, this is an optional instruction. From Armv8.6 it is mandatory for implementations that include Advanced SIMD to support it. ID_AA64ISAR1_EL1.I8MM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMMLA": + return { + "tooltip": "The unsigned integer matrix multiply-accumulate instruction multiplies the 2\u00d78 matrix of unsigned 8-bit integer values held in each 128-bit segment of the first source vector by the 8\u00d72 matrix of unsigned 8-bit integer values in the corresponding segment of the second source vector. The resulting 2\u00d72 widened 32-bit integer matrix product is then destructively added to the 32-bit integer matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing an 8-way dot product per destination element.", + "html": "

The unsigned integer matrix multiply-accumulate instruction multiplies the 2\u00d78 matrix of unsigned 8-bit integer values held in each 128-bit segment of the first source vector by the 8\u00d72 matrix of unsigned 8-bit integer values in the corresponding segment of the second source vector. The resulting 2\u00d72 widened 32-bit integer matrix product is then destructively added to the 32-bit integer matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing an 8-way dot product per destination element.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.I8MM indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMNEGL": + return { + "tooltip": "Unsigned Multiply-Negate Long multiplies two 32-bit register values, negates the product, and writes the result to the 64-bit destination register.", + "html": "

Unsigned Multiply-Negate Long multiplies two 32-bit register values, negates the product, and writes the result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMOPA": + return { + "tooltip": "This instruction works with a 32-bit element ZA tile.", + "html": "

This instruction works with a 32-bit element ZA tile.

The unsigned integer sum of outer products and accumulate instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. The first source holds SVLS\u00d72 sub-matrix of unsigned 16-bit integer values, and the second source holds 2\u00d7SVLS sub-matrix of unsigned 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When a 16-bit source element is inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer sum of outer products is then destructively added to the 32-bit integer destination tile. This is equivalent to performing a 2-way dot product and accumulate to each of the destination tile elements.

Each 32-bit container of the first source vector holds 2 consecutive column elements of each row of a SVLS\u00d72 sub-matrix, and each 32-bit container of the second source vector holds 2 consecutive row elements of each column of a 2\u00d7SVLS sub-matrix.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMOPA": + return { + "tooltip": "The 8-bit integer variant works with a 32-bit element ZA tile.", + "html": "

The 8-bit integer variant works with a 32-bit element ZA tile.

The 16-bit integer variant works with a 64-bit element ZA tile.

The unsigned integer sum of outer products and accumulate instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. In case of the 8-bit integer variant, the first source holds SVLS\u00d74 sub-matrix of unsigned 8-bit integer values, and the second source holds 4\u00d7SVLS sub-matrix of unsigned 8-bit integer values. In case of the 16-bit integer variant, the first source holds SVLD\u00d74 sub-matrix of unsigned 16-bit integer values, and the second source holds 4\u00d7SVLD sub-matrix of unsigned 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When an 8-bit source element in case of 8-bit integer variant or a 16-bit source element in case of 16-bit integer variant is Inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer or SVLD\u00d7SVLD widened 64-bit integer sum of outer products is then destructively added to the 32-bit integer or 64-bit integer destination tile, respectively for 8-bit integer and 16-bit integer instruction variants. This is equivalent to performing a 4-way dot product and accumulate to each of the destination tile elements.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMOPS": + return { + "tooltip": "This instruction works with a 32-bit element ZA tile.", + "html": "

This instruction works with a 32-bit element ZA tile.

The unsigned integer sum of outer products and subtract instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. The first source holds SVLS\u00d72 sub-matrix of unsigned 16-bit integer values, and the second source holds 2\u00d7SVLS sub-matrix of unsigned 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When a 16-bit source element is inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer sum of outer products is then destructively subtracted from the 32-bit integer destination tile. This is equivalent to performing a 2-way dot product and subtract from each of the destination tile elements.

Each 32-bit container of the first source vector holds 2 consecutive column elements of each row of a SVLS\u00d72 sub-matrix, and each 32-bit container of the second source vector holds 2 consecutive row elements of each column of a 2\u00d7SVLS sub-matrix.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMOPS": + return { + "tooltip": "The 8-bit integer variant works with a 32-bit element ZA tile.", + "html": "

The 8-bit integer variant works with a 32-bit element ZA tile.

The 16-bit integer variant works with a 64-bit element ZA tile.

The unsigned integer sum of outer products and subtract instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. In case of the 8-bit integer variant, the first source holds SVLS\u00d74 sub-matrix of unsigned 8-bit integer values, and the second source holds 4\u00d7SVLS sub-matrix of unsigned 8-bit integer values. In case of the 16-bit integer variant, the first source holds SVLD\u00d74 sub-matrix of unsigned 16-bit integer values, and the second source holds 4\u00d7SVLD sub-matrix of unsigned 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When an 8-bit source element in case of 8-bit integer variant or a 16-bit source element in case of 16-bit integer variant is Inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer or SVLD\u00d7SVLD widened 64-bit integer sum of outer products is then destructively subtracted from the 32-bit integer or 64-bit integer destination tile, respectively for 8-bit integer and 16-bit integer instruction variants. This is equivalent to performing a 4-way dot product and subtract from each of the destination tile elements.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMOV": + return { + "tooltip": "Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.", + "html": "

Unsigned Move vector element to general-purpose register. This instruction reads the unsigned integer from the source SIMD&FP register, zero-extends it to form a 32-bit or 64-bit value, and writes the result to the destination general-purpose register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMSUBL": + return { + "tooltip": "Unsigned Multiply-Subtract Long multiplies two 32-bit register values, subtracts the product from a 64-bit register value, and writes the result to the 64-bit destination register.", + "html": "

Unsigned Multiply-Subtract Long multiplies two 32-bit register values, subtracts the product from a 64-bit register value, and writes the result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULH": + return { + "tooltip": "Unsigned Multiply High multiplies two 64-bit register values, and writes bits[127:64] of the 128-bit result to the 64-bit destination register.", + "html": "

Unsigned Multiply High multiplies two 64-bit register values, and writes bits[127:64] of the 128-bit result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULH": + return { + "tooltip": "Widening multiply unsigned integer values in active elements of the first source vector by corresponding elements of the second source vector and destructively place the high half of the result in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Widening multiply unsigned integer values in active elements of the first source vector by corresponding elements of the second source vector and destructively place the high half of the result in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULH": + return { + "tooltip": "Widening multiply unsigned integer values of all elements of the first source vector by corresponding elements of the second source vector and place the high half of the result in the corresponding elements of the destination vector. This instruction is unpredicated.", + "html": "

Widening multiply unsigned integer values of all elements of the first source vector by corresponding elements of the second source vector and place the high half of the result in the corresponding elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULL": + case "UMULL2": + return { + "tooltip": "Unsigned Multiply Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.", + "html": "

Unsigned Multiply Long (vector, by element). This instruction multiplies each vector element in the lower or upper half of the first source SIMD&FP register by the specified vector element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied.

The UMULL instruction extracts vector elements from the lower half of the first source register. The UMULL2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULL": + case "UMULL2": + return { + "tooltip": "Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Multiply long (vector). This instruction multiplies corresponding vector elements in the lower or upper half of the two source SIMD&FP registers, places the result in a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the elements that are multiplied. All the values in this instruction are unsigned integer values.

The UMULL instruction extracts each source vector from the lower half of each source register. The UMULL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULL": + return { + "tooltip": "Unsigned Multiply Long multiplies two 32-bit register values, and writes the result to the 64-bit destination register.", + "html": "

Unsigned Multiply Long multiplies two 32-bit register values, and writes the result to the 64-bit destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULLB": + return { + "tooltip": "Multiply the corresponding even-numbered unsigned elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding even-numbered unsigned elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULLB": + return { + "tooltip": "Multiply the even-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment, and place the results in the overlapping double-width elements of the destination vector register.", + "html": "

Multiply the even-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment, and place the results in the overlapping double-width elements of the destination vector register.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULLT": + return { + "tooltip": "Multiply the corresponding odd-numbered unsigned elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Multiply the corresponding odd-numbered unsigned elements of the first and second source vectors, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UMULLT": + return { + "tooltip": "Multiply the odd-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment, and place the results in the overlapping double-width elements of the destination vector register.", + "html": "

Multiply the odd-numbered unsigned elements within each 128-bit segment of the first source vector by the specified unsigned element in the corresponding second source vector segment, and place the results in the overlapping double-width elements of the destination vector register.

The elements within the second source vector are specified using an immediate index which selects the same element position within each 128-bit vector segment. The index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 2 or 3 bits depending on the size of the element.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQADD": + return { + "tooltip": "Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned saturating Add. This instruction adds the values of corresponding elements of the two source SIMD&FP registers, places the results into a vector, and writes the vector to the destination SIMD&FP register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQADD": + return { + "tooltip": "Add active unsigned elements of the first source vector to corresponding unsigned elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active unsigned elements of the first source vector to corresponding unsigned elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQADD": + return { + "tooltip": "Unsigned saturating add of an unsigned immediate to each element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. This instruction is unpredicated.", + "html": "

Unsigned saturating add of an unsigned immediate to each element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. This instruction is unpredicated.

The immediate is an unsigned value in the range 0 to 255, and for element widths of 16 bits or higher it may also be a positive multiple of 256 in the range 256 to 65280.

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<uimm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQADD": + return { + "tooltip": "Unsigned saturating add all elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. This instruction is unpredicated.", + "html": "

Unsigned saturating add all elements of the second source vector to corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQCVT": + return { + "tooltip": "Saturate the unsigned integer value in each element of the two source vectors to half the original source element width, and place the results in the half-width destination elements.", + "html": "

Saturate the unsigned integer value in each element of the two source vectors to half the original source element width, and place the results in the half-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQCVT": + return { + "tooltip": "Saturate the unsigned integer value in each element of the four source vectors to quarter the original source element width, and place the results in the quarter-width destination elements.", + "html": "

Saturate the unsigned integer value in each element of the four source vectors to quarter the original source element width, and place the results in the quarter-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQCVTN": + return { + "tooltip": "Saturate the unsigned integer value in each element of the group of two source vectors to half the original source element width, and place the two-way interleaved results in the half-width destination elements.", + "html": "

Saturate the unsigned integer value in each element of the group of two source vectors to half the original source element width, and place the two-way interleaved results in the half-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQCVTN": + return { + "tooltip": "Saturate the unsigned integer value in each element of the four source vectors to quarter the original source element width, and place the four-way interleaved results in the quarter-width destination elements.", + "html": "

Saturate the unsigned integer value in each element of the four source vectors to quarter the original source element width, and place the four-way interleaved results in the quarter-width destination elements.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECB": + return { + "tooltip": "Determines the number of active 8-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Determines the number of active 8-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECD": + return { + "tooltip": "Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECD": + return { + "tooltip": "Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 64-bit unsigned integer range.", + "html": "

Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 64-bit unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECH": + return { + "tooltip": "Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECH": + return { + "tooltip": "Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 16-bit unsigned integer range.", + "html": "

Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 16-bit unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to decrement all destination vector elements. The results are saturated to the element unsigned integer range.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to decrement all destination vector elements. The results are saturated to the element unsigned integer range.

The predicate size specifier may be omitted in assembler source code, but this is deprecated and will be prohibited in a future release of the architecture.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECW": + return { + "tooltip": "Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQDECW": + return { + "tooltip": "Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 32-bit unsigned integer range.", + "html": "

Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to decrement all destination vector elements. The results are saturated to the 32-bit unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCB": + return { + "tooltip": "Determines the number of active 8-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Determines the number of active 8-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCD": + return { + "tooltip": "Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCD": + return { + "tooltip": "Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 64-bit unsigned integer range.", + "html": "

Determines the number of active 64-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 64-bit unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCH": + return { + "tooltip": "Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCH": + return { + "tooltip": "Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 16-bit unsigned integer range.", + "html": "

Determines the number of active 16-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 16-bit unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCP": + return { + "tooltip": "Counts the number of true elements in the source predicate and then uses the result to increment all destination vector elements. The results are saturated to the element unsigned integer range.", + "html": "

Counts the number of true elements in the source predicate and then uses the result to increment all destination vector elements. The results are saturated to the element unsigned integer range.

The predicate size specifier may be omitted in assembler source code, but this is deprecated and will be prohibited in a future release of the architecture.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCW": + return { + "tooltip": "Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.", + "html": "

Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment the scalar destination. The result is saturated to the general-purpose register's unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQINCW": + return { + "tooltip": "Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 32-bit unsigned integer range.", + "html": "

Determines the number of active 32-bit elements implied by the named predicate constraint, multiplies that by an immediate in the range 1 to 16 inclusive, and then uses the result to increment all destination vector elements. The results are saturated to the 32-bit unsigned integer range.

The named predicate constraint limits the number of active elements in a single predicate to:

Unspecified or out of range constraint encodings generate an empty predicate or zero element count rather than Undefined Instruction exception.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHL": + return { + "tooltip": "Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned saturating Rounding Shift Left (register). This instruction takes each vector element of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding vector element of the second source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

If the shift value is positive, the operation is a left shift. Otherwise, it is a right shift. The results are rounded. For truncated results, see UQSHL.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHL": + return { + "tooltip": "Shift active unsigned elements of the first source vector by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active unsigned elements of the first source vector by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHLR": + return { + "tooltip": "Shift active unsigned elements of the second source vector by corresponding elements of the first source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active unsigned elements of the second source vector by corresponding elements of the first source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHR": + return { + "tooltip": "Shift right by an immediate value, the unsigned integer value in each element of the two source vectors and place the rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to 16.", + "html": "

Shift right by an immediate value, the unsigned integer value in each element of the two source vectors and place the rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to 16.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHR": + return { + "tooltip": "Shift right by an immediate value, the unsigned integer value in each element of the four source vectors and place the rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.", + "html": "

Shift right by an immediate value, the unsigned integer value in each element of the four source vectors and place the rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHRN": + case "UQRSHRN2": + return { + "tooltip": "Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.", + "html": "

Unsigned saturating Rounded Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see UQSHRN.

The UQRSHRN instruction writes the vector to the lower half of the destination register and clears the upper half, while the UQRSHRN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHRN": + return { + "tooltip": "Shift right by an immediate value, the unsigned integer value in each element of the group of two source vectors and place the two-way interleaved rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to 16.", + "html": "

Shift right by an immediate value, the unsigned integer value in each element of the group of two source vectors and place the two-way interleaved rounded results in the half-width destination elements. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to 16.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHRN": + return { + "tooltip": "Shift right by an immediate value, the unsigned integer value in each element of the four source vectors and place the four-way interleaved rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.", + "html": "

Shift right by an immediate value, the unsigned integer value in each element of the four source vectors and place the four-way interleaved rounded results in the quarter-width destination elements. Each result element is saturated to the quarter-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per source element.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHRNB": + return { + "tooltip": "Shift each unsigned integer value in the source vector elements right by an immediate value, and place the rounded results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each unsigned integer value in the source vector elements right by an immediate value, and place the rounded results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQRSHRNT": + return { + "tooltip": "Shift each unsigned integer value in the source vector elements by an immediate value, and place the rounded results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each unsigned integer value in the source vector elements by an immediate value, and place the rounded results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSHL": + return { + "tooltip": "Unsigned saturating Shift Left (immediate). This instruction takes each vector element in the source SIMD&FP register, shifts it by an immediate value, places the results in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.", + "html": "

Unsigned saturating Shift Left (immediate). This instruction takes each vector element in the source SIMD&FP register, shifts it by an immediate value, places the results in a vector, and writes the vector to the destination SIMD&FP register. The results are truncated. For rounded results, see UQRSHL.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSHL": + return { + "tooltip": "Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned saturating Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

If the shift value is positive, the operation is a left shift. Otherwise, it is a right shift. The results are truncated. For rounded results, see UQRSHL.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSHL": + return { + "tooltip": "Shift left by immediate each active unsigned element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift left by immediate each active unsigned element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSHL": + return { + "tooltip": "Shift active unsigned elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active unsigned elements of the first source vector by corresponding elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSHLR": + return { + "tooltip": "Shift active unsigned elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active unsigned elements of the second source vector by corresponding elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSHRN": + case "UQSHRN2": + return { + "tooltip": "Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.", + "html": "

Unsigned saturating Shift Right Narrow (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, saturates each shifted result to a value that is half the original width, puts the final result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see UQRSHRN.

The UQSHRN instruction writes the vector to the lower half of the destination register and clears the upper half, while the UQSHRN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSHRNB": + return { + "tooltip": "Shift each unsigned integer value in the source vector elements right by an immediate value, and place the truncated results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each unsigned integer value in the source vector elements right by an immediate value, and place the truncated results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSHRNT": + return { + "tooltip": "Shift each unsigned integer value in the source vector elements right by an immediate value, and place the truncated results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift each unsigned integer value in the source vector elements right by an immediate value, and place the truncated results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged. Each result element is saturated to the half-width N-bit element's unsigned integer range 0 to (2N)-1. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSUB": + return { + "tooltip": "Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned saturating Subtract. This instruction subtracts the element values of the second source SIMD&FP register from the corresponding element values of the first source SIMD&FP register, places the results into a vector, and writes the vector to the destination SIMD&FP register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSUB": + return { + "tooltip": "Subtract active unsigned elements of the second source vector from corresponding unsigned elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active unsigned elements of the second source vector from corresponding unsigned elements of the first source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSUB": + return { + "tooltip": "Unsigned saturating subtract an unsigned immediate from each element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. This instruction is unpredicated.", + "html": "

Unsigned saturating subtract an unsigned immediate from each element of the source vector, and destructively place the results in the corresponding elements of the source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. This instruction is unpredicated.

The immediate is an unsigned value in the range 0 to 255, and for element widths of 16 bits or higher it may also be a positive multiple of 256 in the range 256 to 65280.

The immediate is encoded in 8 bits with an optional left shift by 8. The preferred disassembly when the shift option is specified is \"#<uimm8>, LSL #8\". However an assembler and disassembler may also allow use of the shifted 16-bit value unless the immediate is 0 and the shift amount is 8, which must be unambiguously described as \"#0, LSL #8\".

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSUB": + return { + "tooltip": "Unsigned saturating subtract all elements of the second source vector from corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. This instruction is unpredicated.", + "html": "

Unsigned saturating subtract all elements of the second source vector from corresponding elements of the first source vector and place the results in the corresponding elements of the destination vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQSUBR": + return { + "tooltip": "Subtract active unsigned elements of the first source vector from corresponding unsigned elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Subtract active unsigned elements of the first source vector from corresponding unsigned elements of the second source vector and destructively place the results in the corresponding elements of the first source vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQXTN": + case "UQXTN2": + return { + "tooltip": "Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned saturating extract Narrow. This instruction reads each vector element from the source SIMD&FP register, saturates each value to half the original width, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

If saturation occurs, the cumulative saturation bit FPSR.QC is set.

The UQXTN instruction writes the vector to the lower half of the destination register and clears the upper half, while the UQXTN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQXTNB": + return { + "tooltip": "Saturate the unsigned integer value in each source element to half the original source element width, and place the results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero.", + "html": "

Saturate the unsigned integer value in each source element to half the original source element width, and place the results in the even-numbered half-width destination elements, while setting the odd-numbered elements to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UQXTNT": + return { + "tooltip": "Saturate the unsigned integer value in each source element to half the original source element width, and place the results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged.", + "html": "

Saturate the unsigned integer value in each source element to half the original source element width, and place the results in the odd-numbered half-width destination elements, leaving the even-numbered elements unchanged.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URECPE": + return { + "tooltip": "Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Reciprocal Estimate. This instruction reads each vector element from the source SIMD&FP register, calculates an approximate inverse for the unsigned integer value, places the result into a vector, and writes the vector to the destination SIMD&FP register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URECPE": + return { + "tooltip": "Find the approximate reciprocal of each active unsigned element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Find the approximate reciprocal of each active unsigned element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URHADD": + return { + "tooltip": "Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Rounding Halving Add. This instruction adds corresponding unsigned integer values from the two source SIMD&FP registers, shifts each result right one bit, places the results into a vector, and writes the vector to the destination SIMD&FP register.

The results are rounded. For truncated results, see UHADD.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URHADD": + return { + "tooltip": "Add active unsigned elements of the first source vector to corresponding unsigned elements of the second source vector, shift right one bit, and destructively place the rounded results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active unsigned elements of the first source vector to corresponding unsigned elements of the second source vector, shift right one bit, and destructively place the rounded results in the corresponding elements of the first source vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSHL": + return { + "tooltip": "Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Rounding Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts the vector element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

If the shift value is positive, the operation is a left shift. If the shift value is negative, it is a rounding right shift.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSHL": + return { + "tooltip": "Shift the unsigned elements of the two or four first source vectors by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the two or four first source vectors. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed.", + "html": "

Shift the unsigned elements of the two or four first source vectors by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the two or four first source vectors. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSHL": + return { + "tooltip": "Shift the unsigned elements of the two or four first source vectors by corresponding elements of the two or four second source vectors and destructively place the rounded results in the corresponding elements of the two or four first source vectors. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed.", + "html": "

Shift the unsigned elements of the two or four first source vectors by corresponding elements of the two or four second source vectors and destructively place the rounded results in the corresponding elements of the two or four first source vectors. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSHL": + return { + "tooltip": "Shift active unsigned elements of the first source vector by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active unsigned elements of the first source vector by corresponding elements of the second source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSHLR": + return { + "tooltip": "Shift active unsigned elements of the second source vector by corresponding elements of the first source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift active unsigned elements of the second source vector by corresponding elements of the first source vector and destructively place the rounded results in the corresponding elements of the first source vector. A positive shift amount performs a left shift, otherwise a right shift by the negated shift amount is performed. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSHR": + return { + "tooltip": "Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.", + "html": "

Unsigned Rounding Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USHR.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSHR": + return { + "tooltip": "Shift right by immediate each active unsigned element of the source vector, and destructively place the rounded results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.", + "html": "

Shift right by immediate each active unsigned element of the source vector, and destructively place the rounded results in the corresponding elements of the source vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSQRTE": + return { + "tooltip": "Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Reciprocal Square Root Estimate. This instruction reads each vector element from the source SIMD&FP register, calculates an approximate inverse square root for each value, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSQRTE": + return { + "tooltip": "Find the approximate reciprocal square root of each active unsigned element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Find the approximate reciprocal square root of each active unsigned element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSRA": + return { + "tooltip": "Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.", + "html": "

Unsigned Rounding Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are rounded. For truncated results, see USRA.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "URSRA": + return { + "tooltip": "Shift right by immediate each unsigned element of the source vector, inserting zeroes, and add the rounded intermediate result destructively to the corresponding elements of the addend vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift right by immediate each unsigned element of the source vector, inserting zeroes, and add the rounded intermediate result destructively to the corresponding elements of the addend vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USDOT": + return { + "tooltip": "Dot Product index form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in an indexed 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.", + "html": "

Dot Product index form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in an indexed 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.

From Armv8.2 to Armv8.5, this is an optional instruction. From Armv8.6 it is mandatory for implementations that include Advanced SIMD to support it. ID_AA64ISAR1_EL1.I8MM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USDOT": + return { + "tooltip": "Dot Product vector form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in the corresponding 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.", + "html": "

Dot Product vector form with unsigned and signed integers. This instruction performs the dot product of the four unsigned 8-bit integer values in each 32-bit element of the first source register with the four signed 8-bit integer values in the corresponding 32-bit element of the second source register, accumulating the result into the corresponding 32-bit element of the destination register.

From Armv8.2 to Armv8.5, this is an optional instruction. From Armv8.6 it is mandatory for implementations that include Advanced SIMD to support it. ID_AA64ISAR1_EL1.I8MM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USDOT": + return { + "tooltip": "The unsigned by signed integer dot product instruction computes the dot product of a group of four unsigned 8-bit integer values held in each 32-bit element of the first source vector multiplied by a group of four signed 8-bit integer values in the corresponding 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.", + "html": "

The unsigned by signed integer dot product instruction computes the dot product of a group of four unsigned 8-bit integer values held in each 32-bit element of the first source vector multiplied by a group of four signed 8-bit integer values in the corresponding 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.I8MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USDOT": + return { + "tooltip": "The unsigned by signed integer indexed dot product instruction computes the dot product of a group of four unsigned 8-bit integer values held in each 32-bit element of the first source vector multiplied by a group of four signed 8-bit integer values in an indexed 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.", + "html": "

The unsigned by signed integer indexed dot product instruction computes the dot product of a group of four unsigned 8-bit integer values held in each 32-bit element of the first source vector multiplied by a group of four signed 8-bit integer values in an indexed 32-bit element of the second source vector, and then destructively adds the widened dot product to the corresponding 32-bit element of the destination vector.

The groups within the second source vector are specified using an immediate index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3. This instruction is unpredicated.

ID_AA64ZFR0_EL1.I8MM indicates whether this instruction is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USDOT": + return { + "tooltip": "The unsigned by signed integer dot product instruction computes the dot product of four unsigned 8-bit integer values held in each 32-bit element of the two or four first source vectors and four signed 8-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.", + "html": "

The unsigned by signed integer dot product instruction computes the dot product of four unsigned 8-bit integer values held in each 32-bit element of the two or four first source vectors and four signed 8-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3, encoded in 2 bits. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USDOT": + return { + "tooltip": "The unsigned by signed integer dot product instruction computes the dot product of four unsigned 8-bit integer values held in each 32-bit element of the two or four first source vectors and four signed 8-bit integer values in the corresponding 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The unsigned by signed integer dot product instruction computes the dot product of four unsigned 8-bit integer values held in each 32-bit element of the two or four first source vectors and four signed 8-bit integer values in the corresponding 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USDOT": + return { + "tooltip": "The unsigned by signed integer dot product instruction computes the dot product of four unsigned 8-bit integer values held in each 32-bit element of the two or four first source vectors and four signed 8-bit integer values in the corresponding 32-bit element of the two or four second source vectors. The widened dot product result is destructively added to corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The unsigned by signed integer dot product instruction computes the dot product of four unsigned 8-bit integer values held in each 32-bit element of the two or four first source vectors and four signed 8-bit integer values in the corresponding 32-bit element of the two or four second source vectors. The widened dot product result is destructively added to corresponding 32-bit element of the ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USHL": + return { + "tooltip": "Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unsigned Shift Left (register). This instruction takes each element in the vector of the first source SIMD&FP register, shifts each element by a value from the least significant byte of the corresponding element of the second source SIMD&FP register, places the results in a vector, and writes the vector to the destination SIMD&FP register.

If the shift value is positive, the operation is a left shift. If the shift value is negative, it is a truncating right shift. For a rounding shift, see URSHL.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USHLL": + case "USHLL2": + return { + "tooltip": "Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Unsigned Shift Left Long (immediate). This instruction reads each vector element in the lower or upper half of the source SIMD&FP register, shifts the unsigned integer value left by the specified number of bits, places the result into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

The USHLL instruction extracts vector elements from the lower half of the source register. The USHLL2 instruction extracts vector elements from the upper half of the source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USHLLB": + return { + "tooltip": "Shift left by immediate each even-numbered unsigned element of the source vector, and place the results in the overlapping double-width elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.", + "html": "

Shift left by immediate each even-numbered unsigned element of the source vector, and place the results in the overlapping double-width elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USHLLT": + return { + "tooltip": "Shift left by immediate each odd-numbered unsigned element of the source vector, and place the results in the overlapping double-width elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.", + "html": "

Shift left by immediate each odd-numbered unsigned element of the source vector, and place the results in the overlapping double-width elements of the destination vector. The immediate shift amount is an unsigned value in the range 0 to number of bits per element minus 1. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USHR": + return { + "tooltip": "Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.", + "html": "

Unsigned Shift Right (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, writes the final result to a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSHR.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USMLALL": + return { + "tooltip": "This unsigned by signed integer multiply-add long-long instruction multiplies each unsigned 8-bit element in the one, two, or four first source vectors with each signed 8-bit indexed element of the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups.", + "html": "

This unsigned by signed integer multiply-add long-long instruction multiplies each unsigned 8-bit element in the one, two, or four first source vectors with each signed 8-bit indexed element of the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups.

The elements within the second source vector are specified using an immediate element index which selects the same element position within each 128-bit vector segment. The element index range is from 0 to one less than the number of elements per 128-bit segment, encoded in 4 bits. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USMLALL": + return { + "tooltip": "This unsigned by signed integer multiply-add long-long instruction multiplies each unsigned 8-bit element in the one, two, or four first source vectors with each signed 8-bit element in the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

This unsigned by signed integer multiply-add long-long instruction multiplies each unsigned 8-bit element in the one, two, or four first source vectors with each signed 8-bit element in the second source vector, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USMLALL": + return { + "tooltip": "This unsigned by signed integer multiply-add long-long instruction multiplies each unsigned 8-bit element in the two or four first source vectors with each signed 8-bit element in the two or four second source vectors, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

This unsigned by signed integer multiply-add long-long instruction multiplies each unsigned 8-bit element in the two or four first source vectors with each signed 8-bit element in the two or four second source vectors, widens each product to 32-bits and destructively adds these values to the corresponding 32-bit elements of the ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USMMLA": + return { + "tooltip": "Unsigned and signed 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of unsigned 8-bit integer values in the first source vector by the 8x2 matrix of signed 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.", + "html": "

Unsigned and signed 8-bit integer matrix multiply-accumulate. This instruction multiplies the 2x8 matrix of unsigned 8-bit integer values in the first source vector by the 8x2 matrix of signed 8-bit integer values in the second source vector. The resulting 2x2 32-bit integer matrix product is destructively added to the 32-bit integer matrix accumulator in the destination vector. This is equivalent to performing an 8-way dot product per destination element.

From Armv8.2 to Armv8.5, this is an optional instruction. From Armv8.6 it is mandatory for implementations that include Advanced SIMD to support it. ID_AA64ISAR1_EL1.I8MM indicates whether this instruction is supported.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USMMLA": + return { + "tooltip": "The unsigned by signed integer matrix multiply-accumulate instruction multiplies the 2\u00d78 matrix of unsigned 8-bit integer values held in each 128-bit segment of the first source vector by the 8\u00d72 matrix of signed 8-bit integer values in the corresponding segment of the second source vector. The resulting 2\u00d72 widened 32-bit integer matrix product is then destructively added to the 32-bit integer matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing an 8-way dot product per destination element.", + "html": "

The unsigned by signed integer matrix multiply-accumulate instruction multiplies the 2\u00d78 matrix of unsigned 8-bit integer values held in each 128-bit segment of the first source vector by the 8\u00d72 matrix of signed 8-bit integer values in the corresponding segment of the second source vector. The resulting 2\u00d72 widened 32-bit integer matrix product is then destructively added to the 32-bit integer matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing an 8-way dot product per destination element.

This instruction is unpredicated.

ID_AA64ZFR0_EL1.I8MM indicates whether this instruction is implemented.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USMOPA": + return { + "tooltip": "The 8-bit integer variant works with a 32-bit element ZA tile.", + "html": "

The 8-bit integer variant works with a 32-bit element ZA tile.

The 16-bit integer variant works with a 64-bit element ZA tile.

The unsigned by signed integer sum of outer products and accumulate instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. In case of the 8-bit integer variant, the first source holds SVLS\u00d74 sub-matrix of unsigned 8-bit integer values, and the second source holds 4\u00d7SVLS sub-matrix of signed 8-bit integer values. In case of the 16-bit integer variant, the first source holds SVLD\u00d74 sub-matrix of unsigned 16-bit integer values, and the second source holds 4\u00d7SVLD sub-matrix of signed 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When an 8-bit source element in case of 8-bit integer variant or a 16-bit source element in case of 16-bit integer variant is Inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer or SVLD\u00d7SVLD widened 64-bit integer sum of outer products is then destructively added to the 32-bit integer or 64-bit integer destination tile, respectively for 8-bit integer and 16-bit integer instruction variants. This is equivalent to performing a 4-way dot product and accumulate to each of the destination tile elements.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USMOPS": + return { + "tooltip": "The 8-bit integer variant works with a 32-bit element ZA tile.", + "html": "

The 8-bit integer variant works with a 32-bit element ZA tile.

The 16-bit integer variant works with a 64-bit element ZA tile.

The unsigned by signed integer sum of outer products and subtract instructions multiply the sub-matrix in the first source vector by the sub-matrix in the second source vector. In case of the 8-bit integer variant, the first source holds SVLS\u00d74 sub-matrix of unsigned 8-bit integer values, and the second source holds 4\u00d7SVLS sub-matrix of signed 8-bit integer values. In case of the 16-bit integer variant, the first source holds SVLD\u00d74 sub-matrix of unsigned 16-bit integer values, and the second source holds 4\u00d7SVLD sub-matrix of signed 16-bit integer values.

Each source vector is independently predicated by a corresponding governing predicate. When an 8-bit source element in case of 8-bit integer variant or a 16-bit source element in case of 16-bit integer variant is Inactive, it is treated as having the value 0.

The resulting SVLS\u00d7SVLS widened 32-bit integer or SVLD\u00d7SVLD widened 64-bit integer sum of outer products is then destructively subtracted from the 32-bit integer or 64-bit integer destination tile, respectively for 8-bit integer and 16-bit integer instruction variants. This is equivalent to performing a 4-way dot product and subtract from each of the destination tile elements.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USQADD": + return { + "tooltip": "Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.", + "html": "

Unsigned saturating Accumulate of Signed value. This instruction adds the signed integer values of the vector elements in the source SIMD&FP register to corresponding unsigned integer values of the vector elements in the destination SIMD&FP register, and accumulates the resulting unsigned integer values with the vector elements of the destination SIMD&FP register.

If overflow occurs with any of the results, those results are saturated. If saturation occurs, the cumulative saturation bit FPSR.QC is set.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USQADD": + return { + "tooltip": "Add active signed elements of the source vector to the corresponding unsigned elements of the addend vector, and destructively place the results in the corresponding elements of the addend vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.", + "html": "

Add active signed elements of the source vector to the corresponding unsigned elements of the addend vector, and destructively place the results in the corresponding elements of the addend vector. Each result element is saturated to the N-bit element's unsigned integer range 0 to (2N)-1. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USRA": + return { + "tooltip": "Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.", + "html": "

Unsigned Shift Right and Accumulate (immediate). This instruction reads each vector element in the source SIMD&FP register, right shifts each result by an immediate value, and accumulates the final results with the vector elements of the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The results are truncated. For rounded results, see URSRA.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USRA": + return { + "tooltip": "Shift right by immediate each unsigned element of the source vector, inserting zeroes, and add the truncated intermediate result destructively to the corresponding elements of the addend vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.", + "html": "

Shift right by immediate each unsigned element of the source vector, inserting zeroes, and add the truncated intermediate result destructively to the corresponding elements of the addend vector. The immediate shift amount is an unsigned value in the range 1 to number of bits per element. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USUBL": + case "USUBL2": + return { + "tooltip": "Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.", + "html": "

Unsigned Subtract Long. This instruction subtracts each vector element in the lower or upper half of the second source SIMD&FP register from the corresponding vector element of the first source SIMD&FP register, places the result into a vector, and writes the vector to the destination SIMD&FP register. All the values in this instruction are unsigned integer values. The destination vector elements are twice as long as the source vector elements.

The USUBL instruction extracts each source vector from the lower half of each source register. The USUBL2 instruction extracts each source vector from the upper half of each source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USUBLB": + return { + "tooltip": "Subtract the even-numbered unsigned elements of the second source vector from the corresponding unsigned elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the even-numbered unsigned elements of the second source vector from the corresponding unsigned elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USUBLT": + return { + "tooltip": "Subtract the odd-numbered unsigned elements of the second source vector from the corresponding unsigned elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the odd-numbered unsigned elements of the second source vector from the corresponding unsigned elements of the first source vector, and place the results in the overlapping double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USUBW": + case "USUBW2": + return { + "tooltip": "Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element in the lower or upper half of the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are unsigned integer values.", + "html": "

Unsigned Subtract Wide. This instruction subtracts each vector element of the second source SIMD&FP register from the corresponding vector element in the lower or upper half of the first source SIMD&FP register, places the result in a vector, and writes the vector to the SIMD&FP destination register. All the values in this instruction are unsigned integer values.

The vector elements of the destination register and the first source register are twice as long as the vector elements of the second source register.

The USUBW instruction extracts vector elements from the lower half of the first source register. The USUBW2 instruction extracts vector elements from the upper half of the first source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USUBWB": + return { + "tooltip": "Subtract the even-numbered unsigned elements of the second source vector from the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.", + "html": "

Subtract the even-numbered unsigned elements of the second source vector from the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USUBWT": + return { + "tooltip": "Subtract the odd-numbered unsigned elements of the second source vector from the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated. This instruction is unpredicated.", + "html": "

Subtract the odd-numbered unsigned elements of the second source vector from the overlapping double-width elements of the first source vector and place the results in the corresponding double-width elements of the destination vector. This instruction is unpredicated. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "USVDOT": + return { + "tooltip": "The unsigned by signed integer vertical dot product instruction computes the vertical dot product of corresponding unsigned 8-bit elements from the four first source vectors and four signed 8-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.", + "html": "

The unsigned by signed integer vertical dot product instruction computes the vertical dot product of corresponding unsigned 8-bit elements from the four first source vectors and four signed 8-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product result is destructively added to the corresponding 32-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3, encoded in 2 bits.

The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the ZA operand consists of four ZA single-vector groups. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UUNPK": + return { + "tooltip": "Unpack elements from one or two source vectors and then zero-extend them to place in elements of twice their size within the two or four destination vectors.", + "html": "

Unpack elements from one or two source vectors and then zero-extend them to place in elements of twice their size within the two or four destination vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UUNPKHI": + case "UUNPKLO": + return { + "tooltip": "Unpack elements from the lowest or highest half of the source vector and then zero-extend them to place in elements of twice their size within the destination vector. This instruction is unpredicated.", + "html": "

Unpack elements from the lowest or highest half of the source vector and then zero-extend them to place in elements of twice their size within the destination vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UVDOT": + return { + "tooltip": "The unsigned integer vertical dot product instruction computes the vertical dot product of the corresponding two unsigned 16-bit integer values held in the two first source vectors and two unsigned 16-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product results are destructively added to the corresponding 32-bit element of the ZA single-vector groups.", + "html": "

The unsigned integer vertical dot product instruction computes the vertical dot product of the corresponding two unsigned 16-bit integer values held in the two first source vectors and two unsigned 16-bit integer values in the corresponding indexed 32-bit element of the second source vector. The widened dot product results are destructively added to the corresponding 32-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to 3, encoded in 2 bits.

The vector numbers forming the single-vector group within each half of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half the number of ZA array vectors.

The vector group symbol VGx2 indicates that the ZA operand consists of two ZA single-vector groups. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UVDOT": + return { + "tooltip": "The unsigned integer vertical dot product instruction computes the vertical dot product of the corresponding four unsigned 8-bit or 16-bit integer values held in the four first source vectors and four unsigned 8-bit or 16-bit integer values in the corresponding indexed 32-bit or 64-bit element of the second source vector. The widened dot product results are destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups.", + "html": "

The unsigned integer vertical dot product instruction computes the vertical dot product of the corresponding four unsigned 8-bit or 16-bit integer values held in the four first source vectors and four unsigned 8-bit or 16-bit integer values in the corresponding indexed 32-bit or 64-bit element of the second source vector. The widened dot product results are destructively added to the corresponding 32-bit or 64-bit element of the ZA single-vector groups.

The groups within the second source vector are specified using an immediate element index which selects the same group position within each 128-bit vector segment. The index range is from 0 to one less than the number of groups per 128-bit segment, encoded in 1 to 2 bits depending on the size of the group.

The vector numbers forming the single-vector group within each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo quarter the number of ZA array vectors.

The vector group symbol VGx4 indicates that the ZA operand consists of four ZA single-vector groups. The vector group symbol is preferred for disassembly, but optional in assembler source code.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UXTB": + return { + "tooltip": "Unsigned Extend Byte extracts an 8-bit value from a register, zero-extends it to the size of the register, and writes the result to the destination register.", + "html": "

Unsigned Extend Byte extracts an 8-bit value from a register, zero-extends it to the size of the register, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UXTB": + case "UXTH": + case "UXTW": + return { + "tooltip": "Zero-extend the least-significant sub-element of each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.", + "html": "

Zero-extend the least-significant sub-element of each active element of the source vector, and place the results in the corresponding elements of the destination vector. Inactive elements in the destination vector register remain unmodified.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UXTH": + return { + "tooltip": "Unsigned Extend Halfword extracts a 16-bit value from a register, zero-extends it to the size of the register, and writes the result to the destination register.", + "html": "

Unsigned Extend Halfword extracts a 16-bit value from a register, zero-extends it to the size of the register, and writes the result to the destination register.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UXTL": + case "UXTL2": + return { + "tooltip": "Unsigned extend Long. This instruction copies each vector element from the lower or upper half of the source SIMD&FP register into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.", + "html": "

Unsigned extend Long. This instruction copies each vector element from the lower or upper half of the source SIMD&FP register into a vector, and writes the vector to the destination SIMD&FP register. The destination vector elements are twice as long as the source vector elements.

The UXTL instruction extracts vector elements from the lower half of the source register. The UXTL2 instruction extracts vector elements from the upper half of the source register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UZP1": + return { + "tooltip": "Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unzip vectors (primary). This instruction reads corresponding even-numbered vector elements from the two source SIMD&FP registers, starting at zero, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

This instruction can be used with UZP2 to de-interleave two vectors.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UZP1": + case "UZP2": + return { + "tooltip": "Concatenate adjacent even or odd-numbered elements from the first and second source predicates and place in elements of the destination predicate. This instruction is unpredicated.", + "html": "

Concatenate adjacent even or odd-numbered elements from the first and second source predicates and place in elements of the destination predicate. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UZP1": + case "UZP2": + return { + "tooltip": "Concatenate adjacent even or odd-numbered elements from the first and second source vectors and place in elements of the destination vector. This instruction is unpredicated.", + "html": "

Concatenate adjacent even or odd-numbered elements from the first and second source vectors and place in elements of the destination vector. This instruction is unpredicated.

Note: UZP1 is equivalent to truncating and packing each element from two source vectors into a single destination vector with elements of half the size.

The 128-bit element variant requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits are set to zero. ID_AA64ZFR0_EL1.F64MM indicates whether the 128-bit element variant is implemented. The 128-bit element variant is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UZP2": + return { + "tooltip": "Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.", + "html": "

Unzip vectors (secondary). This instruction reads corresponding odd-numbered vector elements from the two source SIMD&FP registers, places the result from the first source register into consecutive elements in the lower half of a vector, and the result from the second source register into consecutive elements in the upper half of a vector, and writes the vector to the destination SIMD&FP register.

This instruction can be used with UZP1 to de-interleave two vectors.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UZP": + return { + "tooltip": "Concatenate every fourth element from each of the four source vectors and place them in the corresponding elements of the four destination vectors.", + "html": "

Concatenate every fourth element from each of the four source vectors and place them in the corresponding elements of the four destination vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UZP": + return { + "tooltip": "Concatenate every second element from each of the first and second source vectors and place them in the corresponding elements of the two destination vectors.", + "html": "

Concatenate every second element from each of the first and second source vectors and place them in the corresponding elements of the two destination vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UZPQ1": + return { + "tooltip": "Concatenate adjacent even-numbered elements from the corresponding 128-bit vector segments of the first and second source vectors and place in elements of the corresponding destination vector segment. This instruction is unpredicated.", + "html": "

Concatenate adjacent even-numbered elements from the corresponding 128-bit vector segments of the first and second source vectors and place in elements of the corresponding destination vector segment. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "UZPQ2": + return { + "tooltip": "Concatenate adjacent odd-numbered elements from the corresponding 128-bit vector segments of the first and second source vectors and place in elements of the corresponding destination vector segment. This instruction is unpredicated.", + "html": "

Concatenate adjacent odd-numbered elements from the corresponding 128-bit vector segments of the first and second source vectors and place in elements of the corresponding destination vector segment. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WFE": + return { + "tooltip": "Wait For Event is a hint instruction that indicates that the PE can enter a low-power state and remain there until a wakeup event occurs. Wakeup events include the event signaled as a result of executing the SEV instruction on any PE in the multiprocessor system. For more information, see Wait For Event mechanism and Send event.", + "html": "

Wait For Event is a hint instruction that indicates that the PE can enter a low-power state and remain there until a wakeup event occurs. Wakeup events include the event signaled as a result of executing the SEV instruction on any PE in the multiprocessor system. For more information, see Wait For Event mechanism and Send event.

As described in Wait For Event mechanism and Send event, the execution of a WFE instruction that would otherwise cause entry to a low-power state can be trapped to a higher Exception level.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WFET": + return { + "tooltip": "Wait For Event with Timeout is a hint instruction that indicates that the PE can enter a low-power state and remain there until either a local timeout event or a wakeup event occurs. Wakeup events include the event signaled as a result of executing the SEV instruction on any PE in the multiprocessor system. For more information, see Wait For Event mechanism and Send event.", + "html": "

Wait For Event with Timeout is a hint instruction that indicates that the PE can enter a low-power state and remain there until either a local timeout event or a wakeup event occurs. Wakeup events include the event signaled as a result of executing the SEV instruction on any PE in the multiprocessor system. For more information, see Wait For Event mechanism and Send event.

As described in Wait For Event mechanism and Send event, the execution of a WFET instruction that would otherwise cause entry to a low-power state can be trapped to a higher Exception level.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WFI": + return { + "tooltip": "Wait For Interrupt is a hint instruction that indicates that the PE can enter a low-power state and remain there until a wakeup event occurs. For more information, see Wait For Interrupt.", + "html": "

Wait For Interrupt is a hint instruction that indicates that the PE can enter a low-power state and remain there until a wakeup event occurs. For more information, see Wait For Interrupt.

As described in Wait For Interrupt, the execution of a WFI instruction that would otherwise cause entry to a low-power state can be trapped to a higher Exception level.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WFIT": + return { + "tooltip": "Wait For Interrupt with Timeout is a hint instruction that indicates that the PE can enter a low-power state and remain there until either a local timeout event or a wakeup event occurs. For more information, see Wait For Interrupt.", + "html": "

Wait For Interrupt with Timeout is a hint instruction that indicates that the PE can enter a low-power state and remain there until either a local timeout event or a wakeup event occurs. For more information, see Wait For Interrupt.

As described in Wait For Interrupt, the execution of a WFIT instruction that would otherwise cause entry to a low-power state can be trapped to a higher Exception level.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEGE": + return { + "tooltip": "Generate a predicate that starting from the highest numbered element is true while the decrementing value of the first, signed scalar operand is greater than or equal to the second scalar operand and false thereafter down to the lowest numbered element.", + "html": "

Generate a predicate that starting from the highest numbered element is true while the decrementing value of the first, signed scalar operand is greater than or equal to the second scalar operand and false thereafter down to the lowest numbered element.

If the second scalar operand is equal to the minimum signed integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The predicate result is placed in the predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEGE": + return { + "tooltip": "Generate a predicate for a group of two or four vectors that starting from the highest numbered element of the group is true while the decrementing value of the first, signed scalar operand is greater than or equal to the second scalar operand and false thereafter down to the lowest numbered element of the group.", + "html": "

Generate a predicate for a group of two or four vectors that starting from the highest numbered element of the group is true while the decrementing value of the first, signed scalar operand is greater than or equal to the second scalar operand and false thereafter down to the lowest numbered element of the group.

If the second scalar operand is equal to the minimum signed integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size.

The predicate result is placed in the predicate destination register using the predicate-as-counter encoding. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEGE": + return { + "tooltip": "Generate a pair of predicates that starting from the highest numbered element of the pair is true while the decrementing value of the first, signed scalar operand is greater than or equal to the second scalar operand and false thereafter down to the lowest numbered element of the pair.", + "html": "

Generate a pair of predicates that starting from the highest numbered element of the pair is true while the decrementing value of the first, signed scalar operand is greater than or equal to the second scalar operand and false thereafter down to the lowest numbered element of the pair.

If the second scalar operand is equal to the minimum signed integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The lower-numbered elements are placed in the first predicate destination register, and the higher-numbered elements in the second predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEGT": + return { + "tooltip": "Generate a predicate that starting from the highest numbered element is true while the decrementing value of the first, signed scalar operand is greater than the second scalar operand and false thereafter down to the lowest numbered element.", + "html": "

Generate a predicate that starting from the highest numbered element is true while the decrementing value of the first, signed scalar operand is greater than the second scalar operand and false thereafter down to the lowest numbered element.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The predicate result is placed in the predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEGT": + return { + "tooltip": "Generate a predicate for a group of two or four vectors that starting from the highest numbered element of the group is true while the decrementing value of the first, signed scalar operand is greater than the second scalar operand and false thereafter down to the lowest numbered element of the group.", + "html": "

Generate a predicate for a group of two or four vectors that starting from the highest numbered element of the group is true while the decrementing value of the first, signed scalar operand is greater than the second scalar operand and false thereafter down to the lowest numbered element of the group.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size.

The predicate result is placed in the predicate destination register using the predicate-as-counter encoding. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEGT": + return { + "tooltip": "Generate a pair of predicates that starting from the highest numbered element of the pair is true while the decrementing value of the first, signed scalar operand is greater than the second scalar operand and false thereafter down to the lowest numbered element of the pair.", + "html": "

Generate a pair of predicates that starting from the highest numbered element of the pair is true while the decrementing value of the first, signed scalar operand is greater than the second scalar operand and false thereafter down to the lowest numbered element of the pair.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The lower-numbered elements are placed in the first predicate destination register, and the higher-numbered elements in the second predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEHI": + return { + "tooltip": "Generate a predicate that starting from the highest numbered element is true while the decrementing value of the first, unsigned scalar operand is higher than the second scalar operand and false thereafter down to the lowest numbered element.", + "html": "

Generate a predicate that starting from the highest numbered element is true while the decrementing value of the first, unsigned scalar operand is higher than the second scalar operand and false thereafter down to the lowest numbered element.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The predicate result is placed in the predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEHI": + return { + "tooltip": "Generate a predicate for a group of two or four vectors that starting from the highest numbered element of the group is true while the decrementing value of the first, unsigned scalar operand is higher than the second scalar operand and false thereafter down to the lowest numbered element of the group.", + "html": "

Generate a predicate for a group of two or four vectors that starting from the highest numbered element of the group is true while the decrementing value of the first, unsigned scalar operand is higher than the second scalar operand and false thereafter down to the lowest numbered element of the group.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size.

The predicate result is placed in the predicate destination register using the predicate-as-counter encoding. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEHI": + return { + "tooltip": "Generate a pair of predicates that starting from the highest numbered element of the pair is true while the decrementing value of the first, unsigned scalar operand is higher than the second scalar operand and false thereafter down to the lowest numbered element of the pair.", + "html": "

Generate a pair of predicates that starting from the highest numbered element of the pair is true while the decrementing value of the first, unsigned scalar operand is higher than the second scalar operand and false thereafter down to the lowest numbered element of the pair.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The lower-numbered elements are placed in the first predicate destination register, and the higher-numbered elements in the second predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEHS": + return { + "tooltip": "Generate a predicate that starting from the highest numbered element is true while the decrementing value of the first, unsigned scalar operand is higher or same as the second scalar operand and false thereafter down to the lowest numbered element.", + "html": "

Generate a predicate that starting from the highest numbered element is true while the decrementing value of the first, unsigned scalar operand is higher or same as the second scalar operand and false thereafter down to the lowest numbered element.

If the second scalar operand is equal to the minimum unsigned integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The predicate result is placed in the predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEHS": + return { + "tooltip": "Generate a predicate for a group of two or four vectors that starting from the highest numbered element of the group is true while the decrementing value of the first, unsigned scalar operand is higher or same as the second scalar operand and false thereafter down to the lowest numbered element of the group.", + "html": "

Generate a predicate for a group of two or four vectors that starting from the highest numbered element of the group is true while the decrementing value of the first, unsigned scalar operand is higher or same as the second scalar operand and false thereafter down to the lowest numbered element of the group.

If the second scalar operand is equal to the minimum unsigned integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size.

The predicate result is placed in the predicate destination register using the predicate-as-counter encoding. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEHS": + return { + "tooltip": "Generate a pair of predicates that starting from the highest numbered element of the pair is true while the decrementing value of the first, unsigned scalar operand is higher or same as the second scalar operand and false thereafter down to the lowest numbered element of the pair.", + "html": "

Generate a pair of predicates that starting from the highest numbered element of the pair is true while the decrementing value of the first, unsigned scalar operand is higher or same as the second scalar operand and false thereafter down to the lowest numbered element of the pair.

If the second scalar operand is equal to the minimum unsigned integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is decremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The lower-numbered elements are placed in the first predicate destination register, and the higher-numbered elements in the second predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELE": + return { + "tooltip": "Generate a predicate that starting from the lowest numbered element is true while the incrementing value of the first, signed scalar operand is less than or equal to the second scalar operand and false thereafter up to the highest numbered element.", + "html": "

Generate a predicate that starting from the lowest numbered element is true while the incrementing value of the first, signed scalar operand is less than or equal to the second scalar operand and false thereafter up to the highest numbered element.

If the second scalar operand is equal to the maximum signed integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The predicate result is placed in the predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELE": + return { + "tooltip": "Generate a predicate for a group of two or four vectors that starting from the lowest numbered element of the group is true while the incrementing value of the first, signed scalar operand is less than or equal to the second scalar operand and false thereafter up to the highest numbered element of the group.", + "html": "

Generate a predicate for a group of two or four vectors that starting from the lowest numbered element of the group is true while the incrementing value of the first, signed scalar operand is less than or equal to the second scalar operand and false thereafter up to the highest numbered element of the group.

If the second scalar operand is equal to the maximum signed integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size.

The predicate result is placed in the predicate destination register using the predicate-as-counter encoding. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELE": + return { + "tooltip": "Generate a pair of predicates that starting from the lowest numbered element of the pair is true while the incrementing value of the first, signed scalar operand is less than or equal to the second scalar operand and false thereafter up to the highest numbered element of the pair.", + "html": "

Generate a pair of predicates that starting from the lowest numbered element of the pair is true while the incrementing value of the first, signed scalar operand is less than or equal to the second scalar operand and false thereafter up to the highest numbered element of the pair.

If the second scalar operand is equal to the maximum signed integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The lower-numbered elements are placed in the first predicate destination register, and the higher-numbered elements in the second predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELO": + return { + "tooltip": "Generate a predicate that starting from the lowest numbered element is true while the incrementing value of the first, unsigned scalar operand is lower than the second scalar operand and false thereafter up to the highest numbered element.", + "html": "

Generate a predicate that starting from the lowest numbered element is true while the incrementing value of the first, unsigned scalar operand is lower than the second scalar operand and false thereafter up to the highest numbered element.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The predicate result is placed in the predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELO": + return { + "tooltip": "Generate a predicate for a group of two or four vectors that starting from the lowest numbered element of the group is true while the incrementing value of the first, unsigned scalar operand is lower than the second scalar operand and false thereafter up to the highest numbered element of the group.", + "html": "

Generate a predicate for a group of two or four vectors that starting from the lowest numbered element of the group is true while the incrementing value of the first, unsigned scalar operand is lower than the second scalar operand and false thereafter up to the highest numbered element of the group.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size.

The predicate result is placed in the predicate destination register using the predicate-as-counter encoding. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELO": + return { + "tooltip": "Generate a pair of predicates that starting from the lowest numbered element of the pair is true while the incrementing value of the first, unsigned scalar operand is lower than the second scalar operand and false thereafter up to the highest numbered element of the pair.", + "html": "

Generate a pair of predicates that starting from the lowest numbered element of the pair is true while the incrementing value of the first, unsigned scalar operand is lower than the second scalar operand and false thereafter up to the highest numbered element of the pair.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The lower-numbered elements are placed in the first predicate destination register, and the higher-numbered elements in the second predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELS": + return { + "tooltip": "Generate a predicate that starting from the lowest numbered element is true while the incrementing value of the first, unsigned scalar operand is lower or same as the second scalar operand and false thereafter up to the highest numbered element.", + "html": "

Generate a predicate that starting from the lowest numbered element is true while the incrementing value of the first, unsigned scalar operand is lower or same as the second scalar operand and false thereafter up to the highest numbered element.

If the second scalar operand is equal to the maximum unsigned integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The predicate result is placed in the predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELS": + return { + "tooltip": "Generate a predicate for a group of two or four vectors that starting from the lowest numbered element of the group is true while the incrementing value of the first, unsigned scalar operand is lower or same as the second scalar operand and false thereafter up to the highest numbered element of the group.", + "html": "

Generate a predicate for a group of two or four vectors that starting from the lowest numbered element of the group is true while the incrementing value of the first, unsigned scalar operand is lower or same as the second scalar operand and false thereafter up to the highest numbered element of the group.

If the second scalar operand is equal to the maximum unsigned integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size.

The predicate result is placed in the predicate destination register using the predicate-as-counter encoding. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELS": + return { + "tooltip": "Generate a pair of predicates that starting from the lowest numbered element of the pair is true while the incrementing value of the first, unsigned scalar operand is lower or same as the second scalar operand and false thereafter up to the highest numbered element of the pair.", + "html": "

Generate a pair of predicates that starting from the lowest numbered element of the pair is true while the incrementing value of the first, unsigned scalar operand is lower or same as the second scalar operand and false thereafter up to the highest numbered element of the pair.

If the second scalar operand is equal to the maximum unsigned integer value then a condition which includes an equality test can never fail and the result will be an all-true predicate.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The lower-numbered elements are placed in the first predicate destination register, and the higher-numbered elements in the second predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELT": + return { + "tooltip": "Generate a predicate that starting from the lowest numbered element is true while the incrementing value of the first, signed scalar operand is less than the second scalar operand and false thereafter up to the highest numbered element.", + "html": "

Generate a predicate that starting from the lowest numbered element is true while the incrementing value of the first, signed scalar operand is less than the second scalar operand and false thereafter up to the highest numbered element.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The predicate result is placed in the predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELT": + return { + "tooltip": "Generate a predicate for a group of two or four vectors that starting from the lowest numbered element of the group is true while the incrementing value of the first, signed scalar operand is less than the second scalar operand and false thereafter up to the highest numbered element of the group.", + "html": "

Generate a predicate for a group of two or four vectors that starting from the lowest numbered element of the group is true while the incrementing value of the first, signed scalar operand is less than the second scalar operand and false thereafter up to the highest numbered element of the group.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size.

The predicate result is placed in the predicate destination register using the predicate-as-counter encoding. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILELT": + return { + "tooltip": "Generate a pair of predicates that starting from the lowest numbered element of the pair is true while the incrementing value of the first, signed scalar operand is less than the second scalar operand and false thereafter up to the highest numbered element of the pair.", + "html": "

Generate a pair of predicates that starting from the lowest numbered element of the pair is true while the incrementing value of the first, signed scalar operand is less than the second scalar operand and false thereafter up to the highest numbered element of the pair.

The full width of the scalar operands is significant for the purposes of comparison, and the full width first operand is incremented by one for each destination predicate element, irrespective of the predicate result element size. The first general-purpose source register is not itself updated.

The lower-numbered elements are placed in the first predicate destination register, and the higher-numbered elements in the second predicate destination register. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILERW": + return { + "tooltip": "This instruction checks two addresses for a conflict or overlap between address ranges of the form [addr,addr+VL\u00f78), where VL is the accessible vector length in bits, that could result in a loop-carried dependency through memory due to the use of these addresses by contiguous load and store instructions within the same iteration of a loop. Generate a predicate whose elements are true while the addresses cannot conflict within the same iteration, and false thereafter. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

This instruction checks two addresses for a conflict or overlap between address ranges of the form [addr,addr+VL\u00f78), where VL is the accessible vector length in bits, that could result in a loop-carried dependency through memory due to the use of these addresses by contiguous load and store instructions within the same iteration of a loop. Generate a predicate whose elements are true while the addresses cannot conflict within the same iteration, and false thereafter. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WHILEWR": + return { + "tooltip": "This instruction checks two addresses for a conflict or overlap between address ranges of the form [addr,addr+VL\u00f78), where VL is the accessible vector length in bits, that could result in a loop-carried dependency through memory due to the use of these addresses by contiguous load and store instructions within the same iteration of a loop. Generate a predicate whose elements are true while the addresses cannot conflict within the same iteration, and false thereafter. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.", + "html": "

This instruction checks two addresses for a conflict or overlap between address ranges of the form [addr,addr+VL\u00f78), where VL is the accessible vector length in bits, that could result in a loop-carried dependency through memory due to the use of these addresses by contiguous load and store instructions within the same iteration of a loop. Generate a predicate whose elements are true while the addresses cannot conflict within the same iteration, and false thereafter. Sets the First (N), None (Z), !Last (C) condition flags based on the predicate result, and the V flag to zero.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "WRFFR": + return { + "tooltip": "Read the source predicate register and place in the first-fault register (FFR). This instruction is intended to restore a saved FFR and is not recommended for general use by applications.", + "html": "

Read the source predicate register and place in the first-fault register (FFR). This instruction is intended to restore a saved FFR and is not recommended for general use by applications.

This instruction requires that the source predicate contains a monotonic predicate value, in which starting from bit 0 there are zero or more 1 bits, followed only by 0 bits in any remaining bit positions. If the source is not a monotonic predicate value, then the resulting value in the FFR will be UNPREDICTABLE. It is not possible to generate a non-monotonic value in FFR when using SETFFR followed by first-fault or non-fault loads.

This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "XAFLAG": + return { + "tooltip": "Convert floating-point condition flags from external format to Arm format. This instruction converts the state of the PSTATE.{N,Z,C,V} flags from an alternative representation required by some software to a form representing the result of an Arm floating-point scalar compare instruction.", + "html": "

Convert floating-point condition flags from external format to Arm format. This instruction converts the state of the PSTATE.{N,Z,C,V} flags from an alternative representation required by some software to a form representing the result of an Arm floating-point scalar compare instruction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "XAR": + return { + "tooltip": "Exclusive-OR and Rotate performs a bitwise exclusive-OR of the 128-bit vectors in the two source SIMD&FP registers, rotates each 64-bit element of the resulting 128-bit vector right by the value specified by a 6-bit immediate value, and writes the result to the destination SIMD&FP register.", + "html": "

Exclusive-OR and Rotate performs a bitwise exclusive-OR of the 128-bit vectors in the two source SIMD&FP registers, rotates each 64-bit element of the resulting 128-bit vector right by the value specified by a 6-bit immediate value, and writes the result to the destination SIMD&FP register.

This instruction is implemented only when FEAT_SHA3 is implemented.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "XAR": + return { + "tooltip": "Bitwise exclusive OR the corresponding elements of the first and second source vectors, then rotate each result element right by an immediate amount. The final results are destructively placed in the corresponding elements of the destination and first source vector. This instruction is unpredicated.", + "html": "

Bitwise exclusive OR the corresponding elements of the first and second source vectors, then rotate each result element right by an immediate amount. The final results are destructively placed in the corresponding elements of the destination and first source vector. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "XPACD": + case "XPACI": + case "XPACLRI": + return { + "tooltip": "Strip Pointer Authentication Code. This instruction removes the pointer authentication code from an address. The address is in the specified general-purpose register for XPACI and XPACD, and is in LR for XPACLRI.", + "html": "

Strip Pointer Authentication Code. This instruction removes the pointer authentication code from an address. The address is in the specified general-purpose register for XPACI and XPACD, and is in LR for XPACLRI.

The XPACD instruction is used for data addresses, and XPACI and XPACLRI are used for instruction addresses.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "XTN": + case "XTN2": + return { + "tooltip": "Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.", + "html": "

Extract Narrow. This instruction reads each vector element from the source SIMD&FP register, narrows each value to half the original width, places the result into a vector, and writes the vector to the lower or upper half of the destination SIMD&FP register. The destination vector elements are half as long as the source vector elements.

The XTN instruction writes the vector to the lower half of the destination register and clears the upper half, while the XTN2 instruction writes the vector to the upper half of the destination register without affecting the other bits of the register.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "YIELD": + return { + "tooltip": "YIELD is a hint instruction. Software with a multithreading capability can use a YIELD instruction to indicate to the PE that it is performing a task, for example a spin-lock, that could be swapped out to improve overall system performance. The PE can use this hint to suspend and resume multiple software threads if it supports the capability.", + "html": "

YIELD is a hint instruction. Software with a multithreading capability can use a YIELD instruction to indicate to the PE that it is performing a task, for example a spin-lock, that could be swapped out to improve overall system performance. The PE can use this hint to suspend and resume multiple software threads if it supports the capability.

For more information about the recommended use of this instruction, see The YIELD instruction.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZERO": + return { + "tooltip": "The instruction zeroes two or four ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.", + "html": "

The instruction zeroes two or four ZA single-vector groups. The vector numbers forming the single-vector group within each half of or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo half or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA single-vector groups respectively.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZERO": + return { + "tooltip": "The instruction zeroes one, two, or four ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

The instruction zeroes one, two, or four ZA double-vector groups. The lowest of the two consecutive vector numbers forming the double-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA double-vector groups respectively.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZERO": + return { + "tooltip": "The instruction zeroes one, two, or four ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.", + "html": "

The instruction zeroes one, two, or four ZA quad-vector groups. The lowest of the four consecutive vector numbers forming the quad-vector group within all of, each half of, or each quarter of the ZA array are selected by the sum of the vector select register and immediate offset, modulo all, half, or quarter the number of ZA array vectors.

The vector group symbol, VGx2 or VGx4, indicates that the ZA operand consists of two or four ZA quad-vector groups respectively.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZERO": + return { + "tooltip": "Zeroes all bytes within each of the up to eight listed 64-bit element tiles named ZA0.D to ZA7.D, leaving the other 64-bit element tiles unmodified.", + "html": "

Zeroes all bytes within each of the up to eight listed 64-bit element tiles named ZA0.D to ZA7.D, leaving the other 64-bit element tiles unmodified.

This instruction does not require the PE to be in Streaming SVE mode, and it is expected that this instruction will not experience a significant slowdown due to contention with other PEs that are executing in Streaming SVE mode.

For programmer convenience an assembler must also accept the names of 32-bit, 16-bit, and 8-bit element tiles which are converted into the corresponding set of 64-bit element tiles.

In accordance with the architecturally defined mapping between different element size tiles:

The preferred disassembly of this instruction uses the shortest list of tile names that represent the encoded immediate mask.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZERO": + return { + "tooltip": "Zero all bytes of the ZT0 register.", + "html": "

Zero all bytes of the ZT0 register.

This instruction does not require the PE to be in Streaming SVE mode, and it is expected that this instruction will not experience a significant slowdown due to contention with other PEs that are executing in Streaming SVE mode.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZIP1": + return { + "tooltip": "Zip vectors (primary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.", + "html": "

Zip vectors (primary). This instruction reads adjacent vector elements from the lower half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

This instruction can be used with ZIP2 to interleave two vectors.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZIP1": + case "ZIP2": + return { + "tooltip": "Interleave alternating elements from the lowest or highest halves of the first and second source predicates and place in elements of the destination predicate. This instruction is unpredicated.", + "html": "

Interleave alternating elements from the lowest or highest halves of the first and second source predicates and place in elements of the destination predicate. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZIP1": + case "ZIP2": + return { + "tooltip": "Interleave alternating elements from the lowest or highest halves of the first and second source vectors and place in elements of the destination vector. This instruction is unpredicated.", + "html": "

Interleave alternating elements from the lowest or highest halves of the first and second source vectors and place in elements of the destination vector. This instruction is unpredicated.

The 128-bit element variant requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits are set to zero. ID_AA64ZFR0_EL1.F64MM indicates whether the 128-bit element variant is implemented. The 128-bit element variant is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZIP2": + return { + "tooltip": "Zip vectors (secondary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.", + "html": "

Zip vectors (secondary). This instruction reads adjacent vector elements from the upper half of two source SIMD&FP registers as pairs, interleaves the pairs and places them into a vector, and writes the vector to the destination SIMD&FP register. The first pair from the first source register is placed into the two lowest vector elements, with subsequent pairs taken alternately from each source register.

This instruction can be used with ZIP1 to interleave two vectors.

Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZIP": + return { + "tooltip": "Place the four-way interleaved elements from the four source vectors in the corresponding elements of the four destination vectors.", + "html": "

Place the four-way interleaved elements from the four source vectors in the corresponding elements of the four destination vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZIP": + return { + "tooltip": "Place the two-way interleaved elements from the first and second source vectors in the corresponding elements of the two destination vectors.", + "html": "

Place the two-way interleaved elements from the first and second source vectors in the corresponding elements of the two destination vectors.

This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZIPQ1": + return { + "tooltip": "Interleave alternating elements from low halves of the corresponding 128-bit vector segments of the first and second source vectors and place in elements of the corresponding destination vector segment. This instruction is unpredicated.", + "html": "

Interleave alternating elements from low halves of the corresponding 128-bit vector segments of the first and second source vectors and place in elements of the corresponding destination vector segment. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + case "ZIPQ2": + return { + "tooltip": "Interleave alternating elements from high halves of the corresponding 128-bit vector segments of the first and second source vectors and place in elements of the corresponding destination vector segment. This instruction is unpredicated.", + "html": "

Interleave alternating elements from high halves of the corresponding 128-bit vector segments of the first and second source vectors and place in elements of the corresponding destination vector segment. This instruction is unpredicated.

", + "url": "https://developer.arm.com/documentation/ddi0602/latest/Base-Instructions/" + }; + + + } +} \ No newline at end of file diff --git a/src/extension.ts b/src/extension.ts new file mode 100644 index 0000000..b5f3e29 --- /dev/null +++ b/src/extension.ts @@ -0,0 +1,83 @@ +import * as vscode from 'vscode'; +import { getAsmOpcode } from './asm-docs-aarch64.js'; + +export function activate(context: vscode.ExtensionContext) { + const hoverProvider = vscode.languages.registerHoverProvider('arm', { + provideHover(document, position, token) { + const range = document.getWordRangeAtPosition(position); + const word = document.getText(range); + + const info = getAsmOpcode(word.toUpperCase()) + if (info) { + return new vscode.Hover(info.tooltip); + } + + switch (word.toUpperCase()) { + case 'SP': + return new vscode.Hover('Stack Pointer - a register pointing to the last value written to the stack'); + case 'LR': + return new vscode.Hover('Link Register - a pointer to the return address of a subroutine call but can also be used for other purposes'); + case 'PC': + return new vscode.Hover('Program Counter - a pointer to the address of the next instruction to be executed'); + case 'APSR': + return new vscode.Hover('Application Program Status Register - a 32-bit register that holds the current status flags'); + case 'RZR': + return new vscode.Hover('Zero register'); + case 'XZR': + return new vscode.Hover('64-bit zero register'); + case 'WZR': + return new vscode.Hover('32-bit zero register'); + case 'LOC': // TODO: it should actually be .LOC + // TODO: might have more than those 3 + return new vscode.Hover('Debugging directive linking to the source file number, line number and column number'); + } + + const register = word.match(/^([rxwsdq])([0-9]|1[0-9]|2[0-9]|3[0-1])$/i) + if (register) { + const [, srcType, srcNumber] = register; + const type = srcType.toUpperCase(); + function matchCase(registerName: string) { + if (srcType === type) { + return registerName.toUpperCase(); + } + return registerName.toLowerCase(); + } + const n = parseInt(srcNumber); + if (type === 'R') { + return new vscode.Hover('General-purpose register'); + } + if (type === 'X') { + return new vscode.Hover('64-bit general-purpose register'); + } + if (type === 'W') { + return new vscode.Hover('32-bit general-purpose register\n\nthe bottom 32 bits of ' + matchCase('X') + n); + } + if (type === 'S') { + if (n % 2 === 0) { + return new vscode.Hover('Single-precision floating-point register\n\nthe least significant 32 bits of ' + matchCase('D') + (n * 2)); + } + return new vscode.Hover('Single-precision floating-point register\n\nthe most significant 32 bits of ' + matchCase('D') + (n * 2 - 1)); + } + if (type === 'D') { + if (n % 2 === 0) { + return new vscode.Hover('Double-precision floating-point register\n\nthe least significant 64 bits of ' + matchCase('Q') + (n * 2)); + } + return new vscode.Hover('Double-precision floating-point register\n\nthe most significant 64 bits of ' + matchCase('Q') + (n * 2 - 1)); + } + if (type === 'Q') { + return new vscode.Hover('Quad-precision floating-point register'); + } + } + + // TODO: immediate offset (#123 #-123 and maybe more?) + // TODO: l_.str.1@PAGE incorrectly says "str" is a label + // TODO: ldr x0, [x8, x9, lsl #3] incorrectly says "lsl" is a label + } + }); + + context.subscriptions.push(hoverProvider); + +} + +// This method is called when your extension is deactivated +export function deactivate() {} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..6954702 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "module": "Node16", + "target": "ES2022", + "outDir": "out", + "lib": [ + "ES2022" + ], + "sourceMap": true, + "rootDir": "src", + "strict": true /* enable all strict type-checking options */ + /* Additional Checks */ + // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ + // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ + // "noUnusedParameters": true, /* Report errors on unused parameters. */ + } +} From 5e37840b676a521a03378f8ab7dad744b86ecd35 Mon Sep 17 00:00:00 2001 From: Boris Verkhovskiy Date: Sat, 13 Jan 2024 05:25:58 -0800 Subject: [PATCH 4/4] More registers and clang directives --- language-configuration.json | 4 +- src/extension.ts | 428 +++++++++++++++++++++++++++++++----- 2 files changed, 372 insertions(+), 60 deletions(-) diff --git a/language-configuration.json b/language-configuration.json index f38cd94..d9f7c39 100644 --- a/language-configuration.json +++ b/language-configuration.json @@ -17,5 +17,7 @@ { "open": "'", "close": "'", "notIn": ["string"] }, { "open": "\"", "close": "\"", "notIn": ["string"] }, { "open": "/*", "close": " */", "notIn": ["string"] } - ] + ], + // (binary|octal|hex|hex|decimal)|float|.?word + "wordPattern": "(#?-?(0b[01]+|0[0-7]+|0x[0-9a-fA-F]+|[0x]?[0-9][0-9a-fA-F]*[hH]|[1-9][0-9]*))|(\\d*\\.\\d\\w*)|\\.?([^\\`\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\-\\=\\+\\[\\{\\]\\}\\\\\\|\\;\\:\\'\\\"\\,\\.\\<\\>\\/\\?\\s]+)", } \ No newline at end of file diff --git a/src/extension.ts b/src/extension.ts index b5f3e29..1083215 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -1,77 +1,387 @@ import * as vscode from 'vscode'; import { getAsmOpcode } from './asm-docs-aarch64.js'; +// The difference from the one in language-config.json is that +// this treats '.' as part of a word. +const WORD_PATTERN = /(#?-?(0b[01]+|0[0-7]+|0x[0-9a-fA-F]+|[0x]?[0-9][0-9a-fA-F]*[hH]|[1-9][0-9]*))|(\d*\.\d\w*)|([^\`\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\\"\,\<\>\/\?\s]+)/; + export function activate(context: vscode.ExtensionContext) { - const hoverProvider = vscode.languages.registerHoverProvider('arm', { - provideHover(document, position, token) { - const range = document.getWordRangeAtPosition(position); - const word = document.getText(range); + const getInfo = (word: string): string | undefined => { + const info = getAsmOpcode(word.toUpperCase()) + if (info) { + return info.tooltip; + } - const info = getAsmOpcode(word.toUpperCase()) - if (info) { - return new vscode.Hover(info.tooltip); - } + function matchCase(s: string) { + return word === word.toLowerCase() ? s.toLowerCase() : s.toUpperCase(); + } + switch (word.toLowerCase()) { + case 'sp': + return 'Stack Pointer - a register pointing to the last value written to the stack (' + matchCase('XZR') + ')'; + case 'wsp': + return '32-bit Stack Pointer - a register pointing to the last value written to the stack (' + matchCase('WZR') + ')'; + case 'lr': + return 'Link Register - a pointer to the return address of a subroutine call but can also be used for other purposes (' + matchCase('X30') + ')'; + case 'fp': + return 'Frame Pointer - a pointer to the current stack frame (' + matchCase('X29') + ')'; + case 'ip1': + return 'Second 64-bit intra-procedure-call temporary register (' + matchCase('X17') + ')' + case 'ip0': + return 'First 64-bit intra-procedure-call temporary register (' + matchCase('X16') + ')' + case 'pc': + return 'Program Counter - a pointer to the address of the next instruction to be executed'; + case 'rzr': + return 'Zero register (' + matchCase('SP') + ')'; + case 'xzr': + return '64-bit zero register (' + matchCase('SP') + ')'; + case 'wzr': + return '32-bit zero register (' + matchCase('SP') + ')'; + case 'apsr': + return 'Application Program Status Register - a 32-bit register that holds the current status flags'; + case 'fpcr': + return 'Floating Point Control Register - a 32-bit register that controls how floating point operations are performed'; + case 'fpsr': + return 'Floating Point Status Register - a 32-bit register that holds the current status flags for floating point operations'; + case 'pstate': + return 'Processor State register - a 32-bit register that holds the current processor state'; + + // Standard conditions + case 'eq': + return 'Equal'; + case 'ne': + return 'Not equal'; + case 'hs': + return 'Unsigned higher or same'; + case 'cs': + return 'Carry set'; + case 'lo': + return 'Unsigned lower'; + case 'cc': + return 'Carry clear'; + case 'mi': + return 'Negative'; + case 'pl': + return 'Positive or zero'; + case 'vs': + return 'Overflow'; + case 'vc': + return 'No overflow'; + case 'ls': + return 'Unsigned lower or same'; + case 'ge': + return 'Signed greater than or equal'; + case 'lt': + return 'Signed less than'; + case 'gt': + return 'Signed greater than'; + case 'le': + return 'Signed less than or equal'; + case 'hi': + return 'Unsigned higher'; + // "The condition code NV exists only to provide + // a valid disassembly of the ‘1111b’ encoding, and + // otherwise behaves identically to AL." + case 'al': + case 'nv': + return 'Always'; - switch (word.toUpperCase()) { - case 'SP': - return new vscode.Hover('Stack Pointer - a register pointing to the last value written to the stack'); - case 'LR': - return new vscode.Hover('Link Register - a pointer to the return address of a subroutine call but can also be used for other purposes'); - case 'PC': - return new vscode.Hover('Program Counter - a pointer to the address of the next instruction to be executed'); - case 'APSR': - return new vscode.Hover('Application Program Status Register - a 32-bit register that holds the current status flags'); - case 'RZR': - return new vscode.Hover('Zero register'); - case 'XZR': - return new vscode.Hover('64-bit zero register'); - case 'WZR': - return new vscode.Hover('32-bit zero register'); - case 'LOC': // TODO: it should actually be .LOC - // TODO: might have more than those 3 - return new vscode.Hover('Debugging directive linking to the source file number, line number and column number'); - } + // Labels + case '.section': + return 'Section label'; + case '.loc': + // TODO: might have more than those 3 + return 'Debugging directive linking to the source file, line and column numbers'; + case '.set': + case '.equ': + case '.equiv': + case '.ascii': + case '.asciz': + case '.string': + case '.byte': + case '.short': + case '.value': + case '.2byte': + case '.long': + case '.int': + case '.4byte': + case '.quad': + case '.8byte': + case '.octa': + case '.single': + case '.float': + case '.double': + case '.align': + case '.align32': + case '.balign': + case '.balignw': + case '.balignl': + case '.p2align': + case '.p2alignw': + case '.p2alignl': + case '.org': + case '.fill': + case '.zero': + case '.extern': + case '.globl': + case '.global': + case '.lazy_reference': + case '.no_dead_strip': + case '.symbol_resolver': + case '.private_extern': + case '.reference': + case '.weak_definition': + case '.weak_reference': + case '.weak_def_can_be_hidden': + case '.cold': + case '.comm': + case '.common': + case '.lcomm': + case '.abort': + case '.include': + case '.incbin': + case '.code16': + case '.code16gcc': + case '.rept': + case '.rep': + case '.irp': + case '.irpc': + case '.endr': + case '.bundle_align_mode': + case '.bundle_lock': + case '.bundle_unlock': + case '.if': + case '.ifeq': + case '.ifge': + case '.ifgt': + case '.ifle': + case '.iflt': + case '.ifne': + case '.ifb': + case '.ifnb': + case '.ifc': + case '.ifeqs': + case '.ifnc': + case '.ifnes': + case '.ifdef': + case '.ifndef': + case '.ifnotdef': + case '.elseif': + case '.else': + case '.end': + case '.endif': + case '.skip': + case '.space': + case '.file': + case '.line': + // case '.loc': + case '.stabs': + case '.cv_file': + case '.cv_func_id': + case '.cv_loc': + case '.cv_linetable': + case '.cv_inline_linetable': + case '.cv_inline_site_id': + case '.cv_def_range': + case '.cv_string': + case '.cv_stringtable': + case '.cv_filechecksums': + case '.cv_filechecksumoffset': + case '.cv_fpo_data': + case '.sleb128': + case '.uleb128': + case '.cfi_sections': + case '.cfi_startproc': + case '.cfi_endproc': + case '.cfi_def_cfa': + case '.cfi_def_cfa_offset': + case '.cfi_adjust_cfa_offset': + case '.cfi_def_cfa_register': + case '.cfi_llvm_def_aspace_cfa': + case '.cfi_offset': + case '.cfi_rel_offset': + case '.cfi_personality': + case '.cfi_lsda': + case '.cfi_remember_state': + case '.cfi_restore_state': + case '.cfi_same_value': + case '.cfi_restore': + case '.cfi_escape': + case '.cfi_return_column': + case '.cfi_signal_frame': + case '.cfi_undefined': + case '.cfi_register': + case '.cfi_window_save': + case '.cfi_b_key_frame': + case '.cfi_mte_tagged_frame': + case '.macros_on': + case '.macros_off': + case '.macro': + case '.exitm': + case '.endm': + case '.endmacro': + case '.purgem': + case '.err': + case '.error': + case '.warning': + case '.altmacro': + case '.noaltmacro': + case '.reloc': + case '.dc': + case '.dc.a': + case '.dc.b': + case '.dc.d': + case '.dc.l': + case '.dc.s': + case '.dc.w': + case '.dc.x': + case '.dcb': + case '.dcb.b': + case '.dcb.d': + case '.dcb.l': + case '.dcb.s': + case '.dcb.w': + case '.dcb.x': + case '.ds': + case '.ds.b': + case '.ds.d': + case '.ds.l': + case '.ds.p': + case '.ds.s': + case '.ds.w': + case '.ds.x': + case '.print': + case '.addrsig': + case '.addrsig_sym': + case '.pseudoprobe': + case '.lto_discard': + case '.lto_set_conditional': + case '.memtag': + return 'Directive'; + } - const register = word.match(/^([rxwsdq])([0-9]|1[0-9]|2[0-9]|3[0-1])$/i) - if (register) { - const [, srcType, srcNumber] = register; - const type = srcType.toUpperCase(); - function matchCase(registerName: string) { - if (srcType === type) { - return registerName.toUpperCase(); - } - return registerName.toLowerCase(); + // TODO: SIMD shapes? For example "Vn.16B" (where n is a register number) + const register = word.match(/^([rxwvzbhsdqhb])([0-9]|1[0-9]|2[0-9]|3[0-1])|p([0-9]|1[0-5])$/i) + if (register) { + const [, srcType, srcNumber] = register; + const type = srcType.toLowerCase(); + const n = parseInt(srcNumber); + if (type === 'r') { + if (n === 31) { + return 'Stack pointer or zero register (' + matchCase('SP') + ' or ' + matchCase('RZR') + ')'; + } else if (n === 30) { + return 'Link register (' + matchCase('LR') + ')'; + } else if (n === 29) { + return 'Frame pointer (' + matchCase('FP') + ')'; + } else if (n >= 19) { + return 'Callee-saved register' + } else if (n === 18) { + return 'Platform register or just a temporary register' + } else if (n === 17) { + return 'Second intra-procedure-call temporary register (' + matchCase('IP1') + ')' + } else if (n === 16) { + return 'First intra-procedure-call temporary register (' + matchCase('IP0') + ')' + } else if (n >= 9) { + return 'Temporary register' + } else if (n === 8) { + return 'Indirect result location register' + } else if (n >= 0) { + return 'Parameter/result register' } - const n = parseInt(srcNumber); - if (type === 'R') { - return new vscode.Hover('General-purpose register'); + } else if (type === 'x') { + if (n === 31) { + return '64-bit stack pointer or zero register (' + matchCase('SP') + ' or ' + matchCase('XZR') + ')'; + } else if (n === 30) { + return '64-bit link register (' + matchCase('LR') + ')'; + } else if (n === 29) { + return '64-bit frame pointer (' + matchCase('FP') + ')'; + } else if (n >= 19) { + return '64-bit callee-saved register' + } else if (n === 18) { + return '64-bit platform register or just a temporary register' + } else if (n === 17) { + return 'Second 64-bit intra-procedure-call temporary register (' + matchCase('IP1') + ')' + } else if (n === 16) { + return 'First 64-bit intra-procedure-call temporary register (' + matchCase('IP0') + ')' + } else if (n >= 9) { + return '64-bit temporary register' + } else if (n === 8) { + return '64-bit indirect result location register' + } else if (n >= 0) { + return '64-bit parameter/result register' } - if (type === 'X') { - return new vscode.Hover('64-bit general-purpose register'); + } else if (type === 'w') { + let registerHelp = '32-bit register'; + if (n === 31) { + registerHelp = '32-bit stack pointer or zero register'; + } else if (n === 30) { + registerHelp = '32-bit link register'; + } else if (n === 29) { + registerHelp = '32-bit frame pointer'; + } else if (n >= 19) { + registerHelp = '32-bit callee-saved register' + } else if (n === 18) { + registerHelp = '32-bit platform register or just a temporary register' + } else if (n === 17) { + registerHelp = 'Second 32-bit intra-procedure-call temporary register' + } else if (n === 16) { + registerHelp = 'First 32-bit intra-procedure-call temporary register' + } else if (n >= 9) { + registerHelp = '32-bit temporary register' + } else if (n === 8) { + registerHelp = '32-bit indirect result location register' + } else if (n >= 0) { + registerHelp = '32-bit parameter/result register' } - if (type === 'W') { - return new vscode.Hover('32-bit general-purpose register\n\nthe bottom 32 bits of ' + matchCase('X') + n); + return registerHelp + '\n\nthe bottom 32 bits of ' + matchCase('X') + n + } else if (type === 'b') { + // TODO: b/h/s/d/q can be floating point or SIMD registers + return '8-bit scalar register'; + } else if (type === 'h') { + return '16-bit scalar or half-precision floating-point register'; + } else if (type === 's') { + if (n % 2 === 0) { + return 'Single-precision floating-point register\n\nthe least significant 32 bits of ' + matchCase('D') + (n * 2); } - if (type === 'S') { - if (n % 2 === 0) { - return new vscode.Hover('Single-precision floating-point register\n\nthe least significant 32 bits of ' + matchCase('D') + (n * 2)); - } - return new vscode.Hover('Single-precision floating-point register\n\nthe most significant 32 bits of ' + matchCase('D') + (n * 2 - 1)); + return 'Single-precision floating-point register\n\nthe most significant 32 bits of ' + matchCase('D') + (n * 2 - 1); + } else if (type === 'd') { + if (n % 2 === 0) { + return 'Double-precision floating-point register\n\nthe least significant 64 bits of ' + matchCase('Q') + (n * 2); } - if (type === 'D') { - if (n % 2 === 0) { - return new vscode.Hover('Double-precision floating-point register\n\nthe least significant 64 bits of ' + matchCase('Q') + (n * 2)); - } - return new vscode.Hover('Double-precision floating-point register\n\nthe most significant 64 bits of ' + matchCase('Q') + (n * 2 - 1)); - } - if (type === 'Q') { - return new vscode.Hover('Quad-precision floating-point register'); + return 'Double-precision floating-point register\n\nthe most significant 64 bits of ' + matchCase('Q') + (n * 2 - 1); + } else if (type === 'q') { + return '128-bit scalar register'; + } + } + + const immediate = word.match(/^#(-?[0-9]+)$/i) + if (immediate) { + // TODO: can be offset + if (word === '#0') { + return 'Zero immediate'; + } + if (word.startsWith('#-')) { + return 'Negative immediate'; + } + return 'Immediate'; + } + + // TODO: immediate offset (#123 #-123 and maybe more?) + // TODO: l_.str.1@PAGE incorrectly says "str" is a label + // TODO: ldr x0, [x8, x9, lsl #3] incorrectly says "lsl" is a label + } + + const hoverProvider = vscode.languages.registerHoverProvider('arm', { + provideHover(document, position, token) { + + const range = document.getWordRangeAtPosition(position); + const word = document.getText(range); + if (word) { + const info = getInfo(word); + if (info) { + return new vscode.Hover(info); } } - // TODO: immediate offset (#123 #-123 and maybe more?) - // TODO: l_.str.1@PAGE incorrectly says "str" is a label - // TODO: ldr x0, [x8, x9, lsl #3] incorrectly says "lsl" is a label } });