Skip to content

Commit 9f03000

Browse files
gururaj1512kgrytestdlib-bot
authored
feat: add number/float32/base/to-float16
PR-URL: #8938 Co-authored-by: Athan Reines <[email protected]> Reviewed-by: Athan Reines <[email protected]> Co-authored-by: stdlib-bot <[email protected]>
1 parent 4eb40b3 commit 9f03000

38 files changed

+2828
-0
lines changed
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
<!--
2+
3+
@license Apache-2.0
4+
5+
Copyright (c) 2025 The Stdlib Authors.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
19+
-->
20+
21+
# toFloat16
22+
23+
> Convert a [single-precision floating-point number][ieee754] to the nearest [half-precision floating-point number][half-precision-floating-point-format].
24+
25+
<section class="usage">
26+
27+
## Usage
28+
29+
```javascript
30+
var float32ToFloat16 = require( '@stdlib/number/float32/base/to-float16' );
31+
```
32+
33+
#### float32ToFloat16( x )
34+
35+
Converts a [single-precision floating-point number][ieee754] to the nearest [half-precision floating-point number][half-precision-floating-point-format].
36+
37+
```javascript
38+
var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
39+
40+
var y = float32ToFloat16( float64ToFloat32( 1.337 ) );
41+
// returns 1.3369140625
42+
```
43+
44+
</section>
45+
46+
<!-- /.usage -->
47+
48+
<section class="notes">
49+
50+
</section>
51+
52+
<!-- /.notes -->
53+
54+
<section class="examples">
55+
56+
## Examples
57+
58+
<!-- eslint no-undef: "error" -->
59+
60+
```javascript
61+
var uniform = require( '@stdlib/random/array/uniform' );
62+
var pickArguments = require( '@stdlib/utils/pick-arguments' );
63+
var logEachMap = require( '@stdlib/console/log-each-map' );
64+
var float32ToFloat16 = require( '@stdlib/number/float32/base/to-float16' );
65+
66+
// Generate an array of random numbers:
67+
var f32 = uniform( 100, 0.0, 100.0, {
68+
'dtype': 'float32'
69+
});
70+
71+
// Convert each single-precision floating-point number to the nearest half-precision floating-point number:
72+
logEachMap( 'float32: %f => float16: %f', f32, pickArguments( float32ToFloat16, [ 1 ] ) );
73+
```
74+
75+
</section>
76+
77+
<!-- /.examples -->
78+
79+
<!-- C interface documentation. -->
80+
81+
* * *
82+
83+
<section class="c">
84+
85+
## C APIs
86+
87+
<!-- Section to include introductory text. Make sure to keep an empty line after the intro `section` element and another before the `/section` close. -->
88+
89+
<section class="intro">
90+
91+
</section>
92+
93+
<!-- /.intro -->
94+
95+
<!-- C usage documentation. -->
96+
97+
<section class="usage">
98+
99+
### Usage
100+
101+
```c
102+
#include "stdlib/number/float32/base/to_float16.h"
103+
```
104+
105+
#### stdlib_base_float32_to_float16( x )
106+
107+
Converts a [single-precision floating-point number][ieee754] to the nearest [half-precision floating-point number][half-precision-floating-point-format].
108+
109+
```c
110+
#include "stdlib/number/float16/ctor.h"
111+
112+
stdlib_float16_t x = stdlib_base_float32_to_float16( 3.14f );
113+
```
114+
115+
The function accepts the following arguments:
116+
117+
- **x**: `[in] float` input value.
118+
119+
```c
120+
stdlib_float16_t stdlib_base_float32_to_float16( const float x );
121+
```
122+
123+
</section>
124+
125+
<!-- /.usage -->
126+
127+
<!-- C API usage notes. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
128+
129+
<section class="notes">
130+
131+
</section>
132+
133+
<!-- /.notes -->
134+
135+
<!-- C API usage examples. -->
136+
137+
<section class="examples">
138+
139+
### Examples
140+
141+
```c
142+
#include "stdlib/number/float32/base/to_float16.h"
143+
#include "stdlib/number/float16/ctor.h"
144+
#include <stdint.h>
145+
#include <stdio.h>
146+
147+
int main( void ) {
148+
const float x[] = { 3.14f, -3.14f, 0.0f, 0.0f/0.0f };
149+
150+
stdlib_float16_t v;
151+
int i;
152+
for ( i = 0; i < 4; i++ ) {
153+
v = stdlib_base_float32_to_float16( x[ i ] );
154+
printf( "%f => uint16: %d\n", x[ i ], stdlib_float16_to_bits( v ) );
155+
}
156+
}
157+
```
158+
159+
</section>
160+
161+
<!-- /.examples -->
162+
163+
</section>
164+
165+
<!-- /.c -->
166+
167+
<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
168+
169+
<section class="related">
170+
171+
</section>
172+
173+
<!-- /.related -->
174+
175+
<!-- Section for all links. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
176+
177+
<section class="links">
178+
179+
[ieee754]: https://en.wikipedia.org/wiki/IEEE_754-1985
180+
181+
[half-precision-floating-point-format]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
182+
183+
</section>
184+
185+
<!-- /.links -->
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var bench = require( '@stdlib/bench' );
24+
var uniform = require( '@stdlib/random/array/uniform' );
25+
var isnan = require( '@stdlib/math/base/assert/is-nan' );
26+
var pkg = require( './../package.json' ).name;
27+
var float32ToFloat16 = require( './../lib' );
28+
var polyfill = require( './../lib/polyfill.js' );
29+
30+
31+
// VARIABLES //
32+
33+
var opts = {
34+
'skip': ( typeof Math.f16round === 'undefined' ) // eslint-disable-line stdlib/no-builtin-math
35+
};
36+
37+
38+
// MAIN //
39+
40+
bench( pkg, function benchmark( b ) {
41+
var x;
42+
var y;
43+
var i;
44+
45+
x = uniform( 100, -5.0e4, 5.0e4, {
46+
'dtype': 'float32'
47+
});
48+
49+
b.tic();
50+
for ( i = 0; i < b.iterations; i++ ) {
51+
y = float32ToFloat16( x[ i%x.length ] );
52+
if ( isnan( y ) ) {
53+
b.fail( 'should not return NaN' );
54+
}
55+
}
56+
b.toc();
57+
if ( isnan( y ) ) {
58+
b.fail( 'should not return NaN' );
59+
}
60+
b.pass( 'benchmark finished' );
61+
b.end();
62+
});
63+
64+
bench( pkg+'::polyfill', function benchmark( b ) {
65+
var x;
66+
var y;
67+
var i;
68+
69+
x = uniform( 100, -5.0e4, 5.0e4, {
70+
'dtype': 'float32'
71+
});
72+
73+
b.tic();
74+
for ( i = 0; i < b.iterations; i++ ) {
75+
y = polyfill( x[ i%x.length ] );
76+
if ( isnan( y ) ) {
77+
b.fail( 'should not return NaN' );
78+
}
79+
}
80+
b.toc();
81+
if ( isnan( y ) ) {
82+
b.fail( 'should not return NaN' );
83+
}
84+
b.pass( 'benchmark finished' );
85+
b.end();
86+
});
87+
88+
bench( pkg+'::builtin', opts, function benchmark( b ) {
89+
var x;
90+
var y;
91+
var i;
92+
93+
x = uniform( 100, -5.0e4, 5.0e4, {
94+
'dtype': 'float32'
95+
});
96+
97+
b.tic();
98+
for ( i = 0; i < b.iterations; i++ ) {
99+
y = Math.f16round( x[ i%x.length ] ); // eslint-disable-line stdlib/no-builtin-math
100+
if ( isnan( y ) ) {
101+
b.fail( 'should not return NaN' );
102+
}
103+
}
104+
b.toc();
105+
if ( isnan( y ) ) {
106+
b.fail( 'should not return NaN' );
107+
}
108+
b.pass( 'benchmark finished' );
109+
b.end();
110+
});
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var resolve = require( 'path' ).resolve;
24+
var bench = require( '@stdlib/bench' );
25+
var uniform = require( '@stdlib/random/array/uniform' );
26+
var isnan = require( '@stdlib/math/base/assert/is-nan' );
27+
var tryRequire = require( '@stdlib/utils/try-require' );
28+
var pkg = require( './../package.json' ).name;
29+
30+
31+
// VARIABLES //
32+
33+
var float32ToFloat16 = tryRequire( resolve( __dirname, './../lib/native.js' ) );
34+
var opts = {
35+
'skip': ( float32ToFloat16 instanceof Error )
36+
};
37+
38+
39+
// MAIN //
40+
41+
bench( pkg+'::native', opts, function benchmark( b ) {
42+
var x;
43+
var y;
44+
var i;
45+
46+
x = uniform( 100, -5.0e4, 5.0e4, {
47+
'dtype': 'float32'
48+
});
49+
50+
b.tic();
51+
for ( i = 0; i < b.iterations; i++ ) {
52+
y = float32ToFloat16( x[ i%x.length ] );
53+
if ( isnan( y ) ) {
54+
b.fail( 'should not return NaN' );
55+
}
56+
}
57+
b.toc();
58+
if ( isnan( y ) ) {
59+
b.fail( 'should not return NaN' );
60+
}
61+
b.pass( 'benchmark finished' );
62+
b.end();
63+
});

0 commit comments

Comments
 (0)