summaryrefslogtreecommitdiff
path: root/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl
blob: b4afe085f1c955e0a2aaf0964d4a99b7eb8041c8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/local/bin/perl
# alpha assember 

sub bn_sqr_comba8
	{
	local($name)=@_;
	local(@a,@b,$r,$c0,$c1,$c2);

	$cnt=1;
	&init_pool(2);

	$rp=&wparam(0);
	$ap=&wparam(1);

	&function_begin($name,"");

	&comment("");

	&ld(($a[0])=&NR(1),&QWPw(0,$ap));
	&ld(($a[1])=&NR(1),&QWPw(1,$ap));
	&ld(($a[2])=&NR(1),&QWPw(2,$ap));
	&ld(($a[3])=&NR(1),&QWPw(3,$ap));
	&ld(($a[4])=&NR(1),&QWPw(4,$ap));
	&ld(($a[5])=&NR(1),&QWPw(5,$ap));
	&ld(($a[6])=&NR(1),&QWPw(6,$ap));
        &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);

	($c0,$c1,$c2)=&NR(3);

	&mov("zero",$c2);
	&mul($a[0],$a[0],$c0);
	&muh($a[0],$a[0],$c1);
	&st($c0,&QWPw(0,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
	&st($c0,&QWPw(1,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c($a[1],$c0,$c1,$c2);
	&sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
	&st($c0,&QWPw(2,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
	&sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
	&st($c0,&QWPw(3,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c($a[2],$c0,$c1,$c2);
	&sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
	&sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
	&st($c0,&QWPw(4,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
	&sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
	&sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
	&st($c0,&QWPw(5,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c($a[3],$c0,$c1,$c2);
	&sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
	&sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
	&sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
	&st($c0,&QWPw(6,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
	&sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
	&sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
	&sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
	&st($c0,&QWPw(7,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c($a[4],$c0,$c1,$c2);
	&sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
	&sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
	&sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
	&st($c0,&QWPw(8,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
	&sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
	&sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
	&st($c0,&QWPw(9,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c($a[5],$c0,$c1,$c2);
	&sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
	&sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
	&st($c0,&QWPw(10,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
	&sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
	&st($c0,&QWPw(11,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c($a[6],$c0,$c1,$c2);
	&sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
	&st($c0,&QWPw(12,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
	&st($c0,&QWPw(13,$rp));
	($c0,$c1,$c2)=($c1,$c2,$c0);
	&mov("zero",$c2);

	&sqr_add_c($a[7],$c0,$c1,$c2);
	&st($c0,&QWPw(14,$rp));
	&st($c1,&QWPw(15,$rp));

	&function_end($name);

	&fin_pool;
	}

1;