Most of computer applications operates on small integers which can be represented by 32 bit integers. However, there are times we’ll need to deal with extremely big integers which can only be represented by hundreds of bits. One of such application is cryptography.
This post gives two simple implementations of the big integer arithmetic. Both implementations use char array to store the big integers. The first one stores the digits of the integer as char, and the second one stores the digits as single digit integer values.
String Representation
We define a big integer as the structure below,
typedef struct {
int sign;
int length;
char dg[MAXDG];
} BN;
For example, given two input integers “111222333444555” and “12345”, we can store them into two BNs a and b, with a.length = 15, a.dg[0] = ‘1’, a.dg[1]=’1’ …, a.dg[14]=’5’, a.sign = 1; b.length=5, b.dg[0]=’1’, b.dg[1]=’2’…a.dg[4]=’5’, b.sign=1.
Addition: We add the two integers digit by digit from right to left. If the sum of the two digits is bigger than ‘9’. Then a carry of 1 is produced for the next position. And the carry is added to the sum of the digits at the next position. We store the digits of the result from left to right and then reverse it after it’s done.
Subtraction: Subtraction is similar to addition. We minus the digits from the two integers from right to left. If the resulted digit is less than 0, we borrow 1 from the next position.
Multiplication: A naive approach is to use addition to handle multiplication. But if both inputs are big, then the algorithm is very slow.
A better approach is to handle a single digit multiplication using addition, then shift the multiplicand by 1 (which is equivalent to x10) and handle the next digit multiplication.
For example, if the two inputs are 12345 and 57, we treat the multiplication as 12345 x 7 + 123450 x 5, then each of the two multiplications can be handled using additions. In this way, the number of additions needed are reduced to the sum of all digits of the multiplier.
Division: Again the naive approach is to use subtraction. A better alternative is trying to subtract the divisor from part of the dividend from right to left. For example, to compute 234 / 11, we start by computing 23 / 11 using subtraction. The quotient is 2 and the remainder is 1. We then compute (1*10 + 4) / 11 using subtraction, the quotient will be 1. So the result is 21.
Below is a C implementation of the algorithms described above,
/**
array implementation of bignum
**/
#include <stdio.h>
#include <string.h>
#define MAXDG 500
#define PLUS 1
#define MINUS -1
typedef struct {
int sign;
int length;
char dg[MAXDG];
} BN;
int cmp(BN a, BN b);
void sub(BN a, BN b, BN *c);
void add(BN a, BN b, BN *c);
void reverse(BN *c);
void rm_tzeros(BN *c);
void rm_lzeros(BN *c);
void init(BN *a);
int cmp(BN a, BN b) {
if (a.sign == PLUS && b.sign == MINUS) {
return 1;
} else if (a.sign == MINUS && b.sign == PLUS) {
return -1;
} else {
//a and b have same sign
if (a.length > b.length) {
return 1*a.sign;
} else if (a.length < b.length) {
return -1*a.sign;
} else {
//a and b have same sign, same length
int i;
for (i = 0; i < a.length; ++i) {
if (a.dg[i] > b.dg[i]) {
return 1*a.sign;
} else if (a.dg[i] < b.dg[i]) {
return -1*a.sign;
}
}
return 0; //equal
}
}
}
void reverse(BN *c) {
char tmp;
int i;
for (i = 0; i < c->length/2; ++i) {
tmp = c->dg[i];
c->dg[i] = c->dg[c->length-i-1];
c->dg[c->length-i-1] = tmp;
}
}
void rm_lzeros(BN *c) {
int i, j;
for (i = 0; i < c->length; ++i) {
if (c->dg[i] != '0') {
break;
}
}
for (j = 0; i < c->length; ++i, ++j) {
c->dg[j] = c->dg[i];
}
c->length = j == 0?1:j;
c->dg[c->length] = '\0';
}
void rm_tzeros(BN *c) {
int i, j = 0;
for (i = c->length-1; i > 0; --i) {
if (c->dg[i] == '0') {
j++;
} else {
break;
}
}
c->length -= j;
c->dg[c->length] = '\0';
}
void sub(BN a, BN b, BN *c) {
int i, j, k;
int borrow = 0;
if (a.sign == MINUS) {
if (b.sign == MINUS) {
sub(b, a, c);
c->sign *= -1;
} else {
b.sign = MINUS;
add(a, b, c);
}
return;
} else {
if (b.sign == MINUS) {
b.sign = PLUS;
add(a, b, c);
return;
}
}
//a.sign == PLUS, b.sign == PLUS
if (cmp(a, b) < 0) {
sub(b, a, c);
c->sign = MINUS;
return;
} else if (cmp(a, b) == 0) {
c->dg[0] = '0';
c->dg[1] = '\0';
c->length = 1;
c->sign = PLUS;
return;
}
//a.sign == PLUS, b.sign == PLUS, a > b
c->sign = PLUS;
for (i = a.length-1, j = b.length-1, k = 0; i >= 0 && j >= 0; --i, --j, ++k) {
c->dg[k] = a.dg[i] - borrow - b.dg[j];
if (c->dg[k] < 0) {
c->dg[k] += 10;
borrow = 1;
} else {
borrow = 0;
}
c->dg[k] += '0';
}
for (; i >= 0; --i, ++k) {
c->dg[k] = a.dg[i] - borrow;
if (c->dg[k] - '0' < 0) {
c->dg[k] += 10;
borrow = 1;
} else {
borrow = 0;
}
}
c->dg[k] = '\0';
c->length = k;
//printf("%s\n", c->dg);
rm_tzeros(c);
//printf("%s\n", c->dg);
reverse(c);
}
void add(BN a, BN b, BN *c) {
int i, j, k;
int carry = 0;
// printf("%d:%d\n", a.length, b.length);
if (a.sign == b.sign) {
c->sign = a.sign;
} else {
if (a.sign == MINUS) {
a.sign = PLUS; //tmp
sub(b, a, c);
} else {
b.sign = PLUS; //tmp
sub(a, b, c);
}
return;
}
for (i = a.length-1, j = b.length-1, k = 0; i >= 0 && j >= 0; --i, --j, ++k) {
c->dg[k] = a.dg[i] -'0' + b.dg[j] - '0' + carry;
//printf("%d %d %c %c\n",k, c->dg[k], a.dg[i], b.dg[j]);
carry = c->dg[k]/10;
c->dg[k] = c->dg[k]%10 + '0';
// printf("%d %c\n", k, (*c->dg[k]);
}
for (; i >= 0; --i, ++k){
if (carry != 0) {
c->dg[k] = carry + a.dg[i] - '0';
carry = c->dg[k]/10;
c->dg[k] = c->dg[k]%10 + '0';
} else {
c->dg[k] = a.dg[i];
}
}
for (; j >= 0; --j, ++k){
if (carry != 0) {
c->dg[k] = carry + b.dg[j] - '0';
carry = c->dg[k]/10;
c->dg[k] = c->dg[k]%10 + '0';
} else {
c->dg[k] = b.dg[j];
}
}
if (carry != 0) {
c->dg[k++] = carry + '0';
}
c->dg[k] = '\0';
c->length = k;
//printf("%d %s\n", c->length, c->dg);
reverse(c);
//printf("%d %s\n", c->length, c->dg);
}
void lshift(BN *a, int num) {
int i;
if ((a->length == 1) && (a->dg[0] == '0')) {
return;
}
for (i = 0; i < num; ++i) {
a->dg[i+a->length] = '0';
}
a->length += num;
a->dg[a->length] = '\0';
}
void mul(BN a, BN b, BN *c) {
int i, j;
init(c);
for (i = a.length - 1; i >= 0; --i) {
for (j = 0; j < a.dg[i]-'0'; ++j) {
//printf("add %s to %s\n", b.dg, c->dg);
add(*c, b, c);
}
lshift(&b, 1);
//printf("%s %s\n", c->dg, b.dg);
}
c->sign = a.sign*b.sign;
}
void div(BN a, BN b, BN *c) {
int i;
BN tmp;
init(c);
init(&tmp);
for (i = 0; i < a.length; ++i) {
//printf("bf: %s\n", tmp.dg);
if (i > 0) {
lshift(&tmp, 1);
} else {
tmp.length = 1;
}
//printf("af: %s\n", tmp.dg);
tmp.dg[tmp.length-1] = a.dg[i];
c->dg[i] = '0';
//printf("%d b: %d:%d:%s %d:%d:%s\n", i, tmp.sign, tmp.length, tmp.dg, b.sign, b.length, b.dg);
rm_lzeros(&tmp);
while (cmp(tmp, b) >= 0) {
//printf("sub: %s %s\n", tmp.dg, b.dg);
sub(tmp, b, &tmp);
c->dg[i]++;
}
}
c->dg[i] = '\0';
c->length = a.length;
//printf("%d %s\n", i, c->dg);
rm_lzeros(c);
//printf("%s\n", c->dg);
}
void init(BN *a) {
a->sign = PLUS;
a->length = 1;
a->dg[0] = '0';
a->dg[1] = '\0';
}
int main(int argc, char** argv) {
BN m, n, rv;
if (argv[1][0] == '-') {
m.sign = MINUS;
sprintf(m.dg, "%s", &(argv[1][1]));
} else {
m.sign = PLUS;
sprintf(m.dg, "%s", argv[1]);
}
m.length = strlen(m.dg);
if (argv[2][0] == '-') {
n.sign = MINUS;
sprintf(n.dg, "%s", &(argv[2][1]));
} else {
n.sign = PLUS;
sprintf(n.dg, "%s", argv[2]);
}
n.length = strlen(n.dg);
printf("m:%c%s\n", m.sign>0?' ':'-', m.dg);
printf("n:%c%s\n", n.sign>0?' ':'-', n.dg);
add(m, n, &rv);
printf("m + n = %c%s\n", rv.sign>0?' ':'-', rv.dg);
sub(m, n, &rv);
printf("m - n = %c%s\n", rv.sign>0?' ':'-', rv.dg);
lshift(&rv, 1);
printf("m << 1 = %c%s\n", rv.sign>0?' ':'-', rv.dg);
mul(m, n, &rv);
printf("m * n = %c%s\n", rv.sign>0?' ':'-', rv.dg);
div(m, n, &rv);
printf("m / n = %c%s\n", rv.sign>0?' ':'-', rv.dg);
}
Save the code to bignumarray.c, and compile the code using the command below,
gcc -o bignumarray bignumarray.c
And here are some simple tests,
./bignumarray 34123432143214321 1342
m: 34123432143214321
n: 1342
m + n = 34123432143215663
m - n = 34123432143212979
m << 1 = 341234321432129790
m * n = 45793645936193618782
m / n = 25427296678997
./bignumarray 332 13424312432
m: 332
n: 13424312432
m + n = 13424312764
m - n = -13424312100
m << 1 = -134243121000
m * n = 4456871727424
m / n = 0
Digit Array Representation
The big integer is still defined as below,
typedef struct {
int sign;
int length;
char dg[MAXDG];
} BN;
But now the dg holds the values for each digit. For example, given two inputs 12345 and 678, then the two big integers are a, b, with a.length = 5, a.sign = 1, and a.dg[0]=5, a.dg[1]=4, a.dg[2]=3, a.dg[3]=2, a.dg[4]=1; b.length=3, b.sign = 1, and b.dg[0]=8, b.dg[1]=7, b.dg[2]=6. Note that the digits are stored in a reverse manner because it is more convenient to manipulate the end of an array than the start of an array (e.g. truncate the array by 1).
The arithmetic operations are similar to the previous implementation. Below is the sample C code.
#include <stdio.h>
#include <string.h>
#define MAXDG 500
#define PLUS 1
#define MINUS -1
typedef struct {
int sign;
int length;
char dg[MAXDG];
} BN;
void add(BN a, BN b, BN *c);
void mul(BN a, BN b, BN *c);
void div(BN a, BN b, BN *c);
void sub(BN a, BN b, BN *c);
void digit_shift(BN *a, int d);
int cmp(BN a, BN b);
void zero_justify(BN* a);
void init(BN *a);
void printbn(BN a);
void printbn(BN a) {
int i;
//printf("length: %d ", a.length);
if (a.sign == MINUS) {
printf("-");
}
for (i = a.length-1; i >= 0; --i) {
printf("%c", '0' + a.dg[i]);
}
printf("\n");
}
void init(BN *a) {
a->length = 1;
memset(a->dg, 0x00, sizeof(char)*MAXDG);
a->sign = PLUS;
}
void zero_justify(BN* a) {
while ((a->length > 1) && (a->dg[a->length-1] == 0)) {
(a->length)--;
}
//if -0, make it +0
if ((a->length == 1) && (a->dg[0] == 0)) {
a->sign = PLUS;
}
}
int cmp(BN a, BN b) {
if (a.sign == PLUS && b.sign == MINUS) {
return 1;
} else if (a.sign == MINUS && b.sign == PLUS) {
return -1;
} else {
//a and b have same sign
if (a.length > b.length) {
return 1*a.sign;
} else if (a.length < b.length) {
return -1*a.sign;
} else {
//a and b have same sign, same length
int i;
for (i = a.length-1; i >= 0; --i) {
if (a.dg[i] > b.dg[i]) {
return 1*a.sign;
} else if (a.dg[i] < b.dg[i]) {
return -1*a.sign;
}
}
return 0; //equal
}
}
}
void digit_shift(BN *a, int d) {
int i;
if ((a->length == 1) && (a->dg[0] == 0)) {
return;
}
for (i = a->length - 1; i >= 0; --i) {
a->dg[i+d] = a->dg[i];
}
for (i = 0; i < d; ++i) {
a->dg[i] = 0;
}
a->length = a->length + d;
}
void add(BN a, BN b, BN *c) {
int i;
int carry = 0;
init(c);
if (a.sign == b.sign) {
c->sign = a.sign;
} else {
if (a.sign == MINUS) {
a.sign = PLUS;
sub(b, a, c);
} else {
b.sign = PLUS;
sub(a, b, c);
}
return;
}
c->length = (a.length > b.length)?a.length+1:b.length+1;
for (i = 0; i < c->length; ++i) {
c->dg[i] = (char)(a.dg[i] + b.dg[i] + carry)%10;
carry = (a.dg[i] + b.dg[i] + carry)/10;
}
zero_justify(c);
}
void sub(BN a, BN b, BN *c) {
int borrow;
int v;
int i;
init(c);
if ((a.sign == MINUS) || (b.sign == MINUS)) {
b.sign = -1*b.sign;
add(a, b, c);
return;
}
if (cmp(a, b) < 0) {
sub(b, a, c);
c->sign = MINUS;
return;
}
//a > b, and both a and b are +
c->length = a.length > b.length?a.length:b.length;
borrow = 0;
for (i = 0; i < c->length; ++i) {
c->dg[i] = (a.dg[i] - borrow - b.dg[i]);
if (c->dg[i] < 0) {
c->dg[i] += 10;
borrow = 1;
} else {
borrow = 0;
}
}
zero_justify(c);
}
void mul(BN a, BN b, BN *c) {
BN row, tmp;
int i,j;
init(c);
for (i = 0; i < b.length; ++i) {
for (j = 0; j < b.dg[i]; ++j) {
add(*c, a, c);
}
digit_shift(&a, 1);
}
zero_justify(c);
}
void div(BN a, BN b, BN *c) {
int i;
BN tmp;
init(c);
init(&tmp);
c->sign = a.sign * b.sign;
a.sign = PLUS;
b.sign = PLUS;
c->length = a.length;
for (i = a.length - 1; i >= 0; --i) {
digit_shift(&tmp, 1);
//printf("%d:%d\n", tmp.length, b.length);
//printbn(tmp);
tmp.dg[0] = a.dg[i];
c->dg[i] = 0;
while (cmp(tmp, b) >= 0) {
(c->dg[i])++;
//printbn(tmp);
sub(tmp, b, &tmp);
//printbn(tmp);
}
//printf("div: %d\n", c->dg[i]);
}
zero_justify(c);
}
void readbn(char *s, BN *a) {
int i, j;
init(a);
// printf("readbn: %d\n", strlen(s));
for (i = strlen(s)-1, j = 0; i >= 0; --i) {
a->dg[j++] = s[i] - '0';
}
a->length = j;
}
int main(int argc, char** argv) {
BN m, n, rv;
if (argv[1][0] == '-') {
m.sign = MINUS;
readbn(&argv[1][1], &m);
} else {
m.sign = PLUS;
readbn(argv[1], &m);
}
if (argv[2][0] == '-') {
n.sign = MINUS;
readbn(&argv[2][1], &n);
} else {
n.sign = PLUS;
readbn(argv[2], &n);
}
printf("m:");
printbn(m);
printf("n:");
printbn(n);
add(m, n, &rv);
printf("m + n = ");
printbn(rv);
sub(m, n, &rv);
printf("m - n = ");
printbn(rv);
digit_shift(&rv, 1);
printf("m << 1 = ");
printbn(rv);
mul(m, n, &rv);
printf("m * n = ");
printbn(rv);
div(m, n, &rv);
printf("m / n = ");
printbn(rv);
}
Save the code to bignumarray2.c, and compile the code using the command below,
gcc -o bignumarray2 bignumarray2.c
And here are some simple tests,
./bignumarray2 34123432143214321 1342
m:34123432143214321
n:1342
m + n = 34123432143215663
m - n = 34123432143212979
m << 1 = 341234321432129790
m * n = 45793645936193618782
m / n = 25427296678997
./bignumarray2 332 13424312432
m:332
n:13424312432
m + n = 13424312764
m - n = -13424312100
m << 1 = -134243121000
m * n = 4456871727424
m / n = 0
Additional Notes
1. Dynamic array is necessary in order to support arbitrary length of big integers.
2. Using a single byte to represent a single digit is a huge waste. There’re better representations.
References:
1. Programming Challenges