Vishal desai’s Oracle Blog

December 10, 2012

Calculate Median from Frequency in SQL

Filed under: SQL — vishaldesai @ 11:19 pm

If numbers are stored sequentially then median function can be used to calculate median. But what if you want to calculate median based on frequencies? Either you can convert frequencies into multiple values in rows format and then use median function or calculate median by using frequencies as described by http://www.onlinemathlearning.com/median-frequency-table.html

Here is how I put that into SQL format:

SQL> set feed on
SQL> set echo on
SQL> drop table v_median purge;

Table dropped.

SQL> create table v_median (id number, cnt number);

Table created.

SQL> delete from v_median;

0 rows deleted.

SQL> insert into v_median values (1,2);

1 row created.

SQL> insert into v_median values (2,5);

1 row created.

SQL> insert into v_median values (3,1);

1 row created.

SQL> insert into v_median values (4,3);

1 row created.

SQL> insert into v_median values (5,2);

1 row created.

SQL> commit;

Commit complete.

SQL> WITH qb1 AS
  2    (SELECT id,
  3      cnt,
  4      SUM(cnt) over (order by id rows unbounded preceding) tot
  5    FROM v_median
  6    ORDER BY id
  7    ),
  8    qb2 AS
  9    (SELECT MAX(tot) AS n ,
 10      CASE
 11        WHEN mod(MAX(tot),2) = 0
 12        THEN 'even'
 13        WHEN mod(MAX(tot),2) = 1
 14        THEN 'odd'
 15      END AS oe
 16    FROM
 17      (SELECT id,
 18        cnt,
 19        SUM(cnt) over (order by id rows unbounded preceding) tot
 20      FROM v_median
 21      ORDER BY id
 22      )
 23    )
 24  SELECT median
 25  FROM
 26    (SELECT DISTINCT
 27      CASE
 28        WHEN (qb2.oe    ='odd'
 29        AND (qb2.n+1)/2 = qb1.tot)
 30        THEN qb1.id
 31        WHEN (qb2.oe='even')
 32        THEN
 33          (SELECT median(id)
 34          FROM
 35            ( SELECT DISTINCT id
 36            FROM
 37              (SELECT id
 38              FROM
 39                (SELECT id FROM qb1,qb2 WHERE tot>((qb2.n/2)+1)
 40                )
 41              WHERE rownum=1
 42              UNION
 43              SELECT id
 44              FROM
 45                (SELECT id FROM qb1,qb2 WHERE tot<((qb2.n/2)+1)ORDER BY tot DESC
 46                )
 47              WHERE rownum=1
 48              )
 49            )
 50          )
 51      END AS median
 52    FROM qb1,
 53      qb2
 54    )
 55  WHERE median IS NOT NULL;

    MEDIAN                                                                                                                                                                                              
----------                                                                                                                                                                                              
         2                                                                                                                                                                                              

1 row selected.

SQL> delete from v_median;

5 rows deleted.

SQL> insert into v_median values (0,11);

1 row created.

SQL> insert into v_median values (1,9);

1 row created.

SQL> insert into v_median values (2,5);

1 row created.

SQL> insert into v_median values (3,10);

1 row created.

SQL> insert into v_median values (4,15);

1 row created.

SQL> commit;

Commit complete.

SQL> WITH qb1 AS
  2    (SELECT id,
  3      cnt,
  4      SUM(cnt) over (order by id rows unbounded preceding) tot
  5    FROM v_median
  6    ORDER BY id
  7    ),
  8    qb2 AS
  9    (SELECT MAX(tot) AS n ,
 10      CASE
 11        WHEN mod(MAX(tot),2) = 0
 12        THEN 'even'
 13        WHEN mod(MAX(tot),2) = 1
 14        THEN 'odd'
 15      END AS oe
 16    FROM
 17      (SELECT id,
 18        cnt,
 19        SUM(cnt) over (order by id rows unbounded preceding) tot
 20      FROM v_median
 21      ORDER BY id
 22      )
 23    )
 24  SELECT median
 25  FROM
 26    (SELECT DISTINCT
 27      CASE
 28        WHEN (qb2.oe    ='odd'
 29        AND (qb2.n+1)/2 = qb1.tot)
 30        THEN qb1.id
 31        WHEN (qb2.oe='even')
 32        THEN
 33          (SELECT median(id)
 34          FROM
 35            ( SELECT DISTINCT id
 36            FROM
 37              (SELECT id
 38              FROM
 39                (SELECT id FROM qb1,qb2 WHERE tot>((qb2.n/2)+1)
 40                )
 41              WHERE rownum=1
 42              UNION
 43              SELECT id
 44              FROM
 45                (SELECT id FROM qb1,qb2 WHERE tot<((qb2.n/2)+1)ORDER BY tot DESC
 46                )
 47              WHERE rownum=1
 48              )
 49            )
 50          )
 51      END AS median
 52    FROM qb1,
 53      qb2
 54    )
 55  WHERE median IS NOT NULL;

    MEDIAN                                                                                                                                                                                              
----------                                                                                                                                                                                              
       2.5                                                                                                                                                                                              

1 row selected.

SQL> spool off

Advertisements

Leave a Comment »

No comments yet.

RSS feed for comments on this post. TrackBack URI

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

Blog at WordPress.com.

%d bloggers like this: