#06 (02/05/2024)

Arrays


#include <stdio.h>

int main()
{
float a[3];
a[0]=1.0; a[1]=2.0; a[2]=5.0;

return 0;
}

Note that the index in arrays begins with 0 and ends with n-1 where n is the number of components.

#include <stdio.h>

int main()
{
float a[3]={1.0, 2.0, 3.0};

return 0;
}

or simply

#include <stdio.h>

int main()
{
float a[]={1.0, 2.0, 3.0};
return 0;
}

Example:

#include <stdio.h>
#define N 5
int main()
{
  int i;
  float a[N]={2.0, -15.0, 12.0, -5.4, 1.9};
  float sum=0.0;
  
  for (i=0;i <N;i++) sum = sum + a[i];

  printf("The sum is = %f\n", sum);
  return 0;
}

Standard deviation and variance

The average of a statistical population, {x1, x2, x3, …xn}, is the arithmetic average defined as

-
X
 
1

N
N

i=1 
xi.
(1)
The variance of the same distribution is defined as
sx2 1

N − 1
N

i=1 
(xi
-
X
 
)2.
(2)
Note the factor, N−1, instead of N in Eq. (2). This is a mathematical necessity.
Equation (2) is modified in two different ways as
N

i=1 
(xi
-
X
 
)2
=
N

i=1 
(xi2 − 2
-
X
 
xi +
-
X
 
2
 
)
(3)
=
N

i=1 
xi2 −2
-
X
 
N

i=1 
xi +
-
X
 
2
 
N

i=1 
1
(4)
=
N

i=1 
xi2 −2 N
-
X
 
2
 
+N
-
X
 
2
 
(5)
=
N

i=1 
xi2N
-
X
 
2
 
(6)
=
N

i=1 
xi2
(
xi)2

N
,
(7)
so

sx2
=
1

N−1

N

i=1 
xi2N
-
X
 
2
 

(8)
=
1

N−1


N

i=1 
xi2
(
xi)2

N


.
(9)
Equations (8, 9) are more convenient than Eq. (2) for actual computation of the variance.
The standard deviation, sx, having the dimension of xi is defined as

sx   ⎛


1

N−1
N

i=1 
(xi
-
X
 
)2
 
.
(10)
Example program to compute average and standard deviation:

/* Computes the average and the standard deviation of 100 data points. */

#include <stdio.h>
#include <math.h>
#define N 100

int main()
{
float a[N]={0.974742, 0.0982212, 0.578671, 0.717988, 0.881543, 0.0771773, 0.910513,
0.576627, 0.506879, 0.629856, 0.71646, 0.454598, 0.312042, 0.473764,
0.482425, 0.205726, 0.545685, 0.496812, 0.098855, 0.66501, 0.234723,
0.774508, 0.779933, 0.747837, 0.259982, 0.676287, 0.201261, 0.0298494,
0.378439, 0.599109, 0.290748, 0.453223, 0.87156, 0.969254, 0.574289,
0.998625, 0.559518, 0.49549, 0.091864, 0.792899, 0.0138333, 0.998678,
0.993009, 0.127889, 0.77911, 0.22417, 0.213076, 0.380052, 0.519128, 0.547883,
0.011815, 0.350202, 0.14069, 0.948774, 0.721067, 0.896979, 0.26913, 0.97952,
0.146778, 0.898354, 0.709611, 0.48403, 0.0549138, 0.105455, 0.695778,
0.485352, 0.0619048, 0.977566, 0.916668, 0.261182, 0.848828, 0.597515,
0.39754, 0.713299, 0.837013, 0.247313, 0.25685, 0.764525, 0.115947, 0.350333,
0.98772, 0.785004, 0.969169, 0.451979, 0.278109, 0.300974, 0.914255,
0.346524, 0.582331, 0.815621, 0.85235, 0.368957, 0.665663, 0.554439,
0.00352195, 0.771442, 0.268123, 0.84114, 0.166509, 0.52413};

float sum=0, average, var=0, sd;
int i;

for (i=0;i<N;i++) sum=sum+a[i];

average=sum/N;

for (i=0;i<N;i++) var=var+pow( a[i]-average, 2);

sd=sqrt(var/(N-1.0));

printf("Average= %f S.D.=%f\n", average, sd);

return 0;
}

Multi-dimensional arrays

A 2 × 5 matrix
a =


a[0][0]
a[0][1]
a[0][2]
a[0][3]
a[0][4]
a[1][0]
a[1][1]
a[1][2]
a[1][3]
a[1][4]



Note that the index begins with 0, not 1.

mat =


1.0,
2.0,
3.0,
4.0,
5.0
6.0 ,
7.0,
8.0,
9.0,
10.0



The following program defines a 2 × 5 matrix and prints all the components.

#include <stdio.h>

#define COL 5
#define ROW 2

int main()
{
  int i,j;
  float mat[ROW][COL]={{1.0 ,2.0 ,3.0, 4.0 ,5.0},{6.0, 7.0, 8.0, 9.0, 10.0}};

 for (i=0;i<ROW;i++)
  {for (j=0;j<COL;j++)
    printf("%f ",  mat[i][j]); printf("\n");}
 return 0;
}

Summary of parentheses

Regression analysis (curve fitting)

Example

x 1 2 3 4 5
y -1 5 16 25 50

y = a x + b.
Determine a and b.
regression2.gif
The error is defined as the difference between the measured value and predicted value.
x 1 2 3 4 5
Actual value -1 5 16 25 50
Predicted value a+b 2a+b 3a+b 4a+b 5a+b
Difference a+b+1 2a+b-5 3a+b -16 4a+b-25 5a+b-50
Total error:
E2
=
(a+b+1)2 + (2a+b−5)2 + (3a+b−16)2 + (4a+b−25)2 + (5a+b−50)2
=
3407 − 814 a + 55 a2 − 190 b + 30 a b + 5 b2.

E2

a
=
−814 + 110 a + 30 b
E2

b
=
−190 + 30 a + 10 b

110 a+30 b
=
814,
30 a+ 10 b
=
190,
(11)
which can be solved as ( link )

a =



814,
30
190,
10







110,
30
30,
10



= 61

5
= 12.2,
b =



110,
814
30,
190







110,
30
30,
10



=− 88

5
= −17.6.

y = 12.2 x − 17.6

Derivation of general formula

X x1 x2 x3 ... xN
Y y1 y2 y3 ... yN
regression.gif

y = a x + b.
(12)

E2 ≡ (a x1 + by1)2+(a x2 + by2)2+…+ (a xN + byN)2min
(13)

E2

a
=
0
E2

b
=
0.
(14)

2(a x1+by1) x1 +2(a x2+by2) x2 +…+ 2(a xN+byN) xN
=
0
2(a x1+by1)(+1) +2(a x2+by2) (+1) +…+ 2(a xN+byN) (+1)
=
0,
(15)
or

2 N

i=1 
(a xi +byi) xi
=
0
2 N

i=1 
(a xi +byi) (+1)
=
0,
(16)

( N

i=1 
xi2) a +( N

i=1 
xi) b
=
N

i=1 
xi yi
( N

i=1 
xi)a + ( N

i=1 
1)b
=
N

i=1 
yi,
(17)

a
=





xi yi

xi

yi
N










xi2

xi

xi
N




(18)
b
=





xi2

xi yi

xi

yi










xi2

xi

xi
N




.
(19)

#include <stdio.h>
#define N 10
int main(){
float x[N]={1,2,3,4,... ,10}, 
 y[N]={-3, 2.9, 0.5, ...., 2.4};

float xysum=0.0, xsum=0.0, ysum=0.0, x2sum=0.0;
float a, b;

int i;

for (i=0; i< N; i++)
{
xsum = xsum + x[i];
ysum = ysum + y[i];
xysum = xysum + x[i]*y[i];
x2sum = x2sum + x[i]*x[i];
}

a = .....;

b = .....;

printf("The regression line is %f x + %f.\n", a, b);
return 0;
}

Exercise

If it is more appropriate to assume a second order curve in the form of
y = a x2 + b,
what modification is needed ?
regression_2nd.gif




File translated from TEX by TTH, version 4.03.
On 14 Feb 2024, 10:45.