平均值、標準差、中數、與眾數

統計資料的計算

 

  實 習 內 容






1. 資料檔案處理

2. 陣列定義、處理、與陣列參數傳遞

3. 平均值與標準差的估算

4. 實作 selection sort 演算法來排序

5. 中數與眾數的估算

6. histogram 計算與呈現

1

請下載下列資料檔案 raw1.dat, raw2.dat, raw3.dat (請注意不要改檔案的副檔名)

範例執行檔案 至同一資料匣, 並且執行

這個程式是一個很標準的資料處理工具程式,main() 函數如下, 你可以看到裡面定義所使用到的資料變數 dataSize, frequency 陣列, 以及 data 陣列, 然後呼叫 readFile 從檔案裡把資料讀進 data 陣列, dataSize 是陣列中實際存放資料的筆數, 其後呼叫 mean_standard_deviation 函數來計算平均值和標準差, 此函數沒有回傳任何資料, 實際上計算出來的數值直接就寫到螢幕上了; 接下來呼叫 median 函數計算中數, 最後呼叫 mode 函數計算眾數, 最後留在 frequency 陣列裡是資料陣列中每一個數值出現的次數。

這個程式在設計的時候就是使用很標準的 top-down 設計方法, 把程式的功能分成幾個比較獨立的部份, 用函數來實作每一個獨立的部份功能。

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define DATASIZE 150

void readFile(int *dataSize, int data[]);
void mean_standard_deviation(const int data[], int dataSize);
void medium(const int data[], int dataSize);
void mode(const int data[], int dataSize);

int main(void)
{
    int dataSize;
    int data[DATASIZE];
    
    /* reading file */
    readFile(&dataSize, data);

    /* process responses */
    mean_standard_deviation(data, dataSize);
    median(data, dataSize);
    mode(data, dataSize);

    system("pause");
    return 0;  /* indicates successful termination */
}

2

由檔案中以文字模式讀取資料:

void readFile(int *dataSize, int data[])
{
     int i;
     FILE *fp;
     char filename[30];

     printf("Input the file name: ");
     gets(filename);

     fp = fopen(filename, "rt"); /* t 代表以文字方式處理資料, t 可以省略 */
     fscanf(fp, "%d", dataSize);
     for (i=0; i<*dataSize; i++)
          fscanf(fp, "%d", &data[i]);
     fclose(fp);
}

3

平均值與標準差的估算:

/* calculate average and standard deviation of all values */
void mean_standard_deviation(const int dataArray[], int arraySize)
{
    int i;
    double sum = 0.0, mean;
    double variance = 0.0, deviation;

    printf("********\n  Mean\n********\n");

    for (i = 0; i < arraySize; i++)
    {
        sum += dataArray[i];
        variance += dataArray[i]*dataArray[i];
    }

    mean = sum / arraySize;
    variance = (variance - mean*mean) / (arraySize-1); /* unbiased estimate */

    /* format and output results */
    printf("The mean is the average value of the data\n"
           "  items. The mean is equal to the sum of\n"
           "  all the data items divided by the number\n"
           "  of data items ( %d ). The mean value for\n"
           "  this run is: %f / %d = %f\n\n", 
           arraySize, sum, arraySize, mean);

    printf("The standard deviation is the square root of the\n"
           "  variance.  The variance is the average of the \n"
           "  square of (data values - mean).  The unbiased estimate\n"
           "  of variance is: %f and the standard deviation is: ",
           "  %f\n\n", 
           variance, sqrt(variance));
}

4

陣列排序中數的計算:

/* sort array and determine median element's value */
void median(const int dataArray[], int arraySize)
{
    printf("\n********\n Median\n********\nThe unsorted array of data is");

    printArray(dataArray, arraySize);  /* output unsorted array */

    selectionSort(dataArray, arraySize);  /* sort array */

    printf("\n\nThe sorted array is");
    printArray(dataArray, arraySize);  /* output sorted array */
     
    /* display median element */
    printf("\n\nThe median is the %d-th element of\nthe sorted %d" 
           " element array.\nFor this run the median is %d\n\n",
           arraySize/2, arraySize, dataArray[arraySize / 2]);
}

5

Selection Sort 排序演算法:

/* function that sorts an array with selection sort algorithm */
void selectionSort(int data[], int size)
{
    int tmp;  /* temporary location used to swap elements */
    int min;  /* index to keep the minimal element */
    int pivot, j;

    for (pivot = 0; pivot < size-1; pivot++)
    {
        for (min = pivot, j = pivot+1; j < size; j++)
            if (data[j] < data[min]) min = j;  
        if (min != pivot)
        {
            tmp = data[min];
            data[min] = data[pivot];
            data[pivot] = tmp;
        }
    }
}

6

眾數的估算與 histogram 在文字介面中的呈現:

/* determine most frequent data value and print out the histogram */

void mode(const int dataArray[], int arraySize)
{
    int i, j, k;
    int freq[10];
    int rating;
    int largest = 0;    /* represents largest frequency */
    int modeValue = 0;  /* represents most frequent response */

    printf("\n********\n  Mode\n********\n");

    /* initialize frequencies to 0 */
    for (i = 1; i <= 9; i++)
        freq[i] = 0;

    /* summarize frequencies */
    for (j = 0; j < arraySize; j++)
        ++freq[dataArray[j]];

    /* output headers for result columns */
    printf("Data  Frequency       Histogram\n"
           "                                1    1    2    2\n"
           "                           5    0    5    0    5\n\n");

    /* output results */
    for (rating = 1; rating <= 9; rating++) {
        printf("%4d%10d         ", rating, freq[rating]);

        /* keep track of mode value and largest fequency value */
        if (freq[rating] > largest)
        {
            largest = freq[rating];
            modeValue = rating;
        }

        /* output histogram bar representing frequency value */
        for (k = 1; k <= freq[rating]; k++)
            printf("*");
        printf("\n");
    }

    /* display the mode value */
    printf("The mode is the most frequent value.\n"
           "For this run the mode is %d which "
           "occurred %d times.\n", modeValue, largest);
}

當然, 你也可以用我們先前談到的 BGIm 圖形介面的 bar3D 來繪製 histogram

範例執行程式

#include <graphics.h>
    ....
void histogram(int size, const int data[])
{
    int i, left, bottom, width, unitHeight;
    char buf[40];

    initwindow(640, 480, "Histogram", 50, 50);

    setcolor(YELLOW);
    rectangle(0,30,639,450);
    settextstyle(SANS_SERIF_FONT,HORIZ_DIR,2);
    setcolor(WHITE);
    outtextxy(275,0,"Histogram");

    setlinestyle(SOLID_LINE,0,2);

    line(60,420,60,60); // y-axis
    line(50,70,60,60);  // arrow
    line(70,70,60,60);

    line(60,420,560,420); // x-axis
    line(550,410,560,420); // arrow
    line(550,430,560,420);

    outtextxy(55,35,"#");
    outtextxy(565,405,"Data");

    left = 110;
    bottom = 419;
    width = 45;
    unitHeight = 12;
    for (i=0; i<size; i++)
    {
        setfillstyle(INTERLEAVE_FILL, i+1);
        bar3d(left+i*width,
              bottom-data[i]*unitHeight,
              left+(i+1)*width-15,
              bottom, 
              10, 
              2);
        setcolor(i+1);
        sprintf(buf, "%2d", data[i]);
        outtextxy(left+i*width+10, bottom-data[i]*unitHeight-31,buf);
        setcolor(WHITE);
        sprintf(buf, "%2d", i+1);
        outtextxy(left+i*width+4, bottom+5,buf);
    }
}

7

陣列資料的列印:

陣列資料在標準輸出入函數庫中並沒有特別的函式來列印, 請自行撰寫迴圈來列印, 自行決定每一列需要列印幾筆資料

/* output array contents (20 values per row) */
void printArray(const int data[], int size)
{
    int i;
    for (i = 0; i < size; i++) 
    {
        if (i % 20 == 0)  /* begin new line every 20 values */
            printf("\n");

        printf("%2d", data[i]);

    }
}

計算機程式設計實習 首頁

製作日期: 101/11/16 by 丁培毅 (Pei-yih Ting)
E-mail: pyting@ntu.edu.tw