#!/usr/bin/env perl # Strictly a filter, this histograms incoming data points into requested bin sizes. If # bin sizes are not given with the -interval option, then the interval is assumed to be # max-min/10 # &: Jan 28, 2003 # use strict; use warnings; use POSIX qw(floor ceil); my $IntervalWidth = undef; my $UseStar = undef; # Print horiz histogram with hash-marks my $Scale = 1; # If $UseStar, each * represents a count of this much. while (my $arg = shift(@ARGV)) { if ($arg =~ /^-h(elp)?$/) { usage(); } elsif ($arg =~ /^-int/) { $IntervalWidth = shift(@ARGV); } elsif ($arg =~ /^-star/) { $UseStar = 1; } elsif ($arg =~ /^-scale/) { $Scale = shift(@ARGV); } elsif ($arg =~ /^-/) { print STDERR "Unknown option: $arg\n"; usage(); } else { last; } } die "You need to specify the Interval Width (bucket size) with -int in this version" if !$IntervalWidth || $IntervalWidth < 0; my ($min, $max); my ($sum, $sumSq, $num) = (0,0,0); my %bins; while (my $line = ) { my @f = split(/\s+/, $line); foreach my $val (@f) { next if $val =~ /^\s*$/; $max = $val if (!defined($max) || $max < $val); $min = $val if (!defined($min) || $min > $val); $sum += $val; $sumSq += $val * $val; $num++; # Increment the right bin # my $bin = floor($val/$IntervalWidth); $bins{$bin}++; print STDERR "$num\r" if ($num %100000 == 0); print STDERR "$num\n" if ($num %1000000 == 0); } } exit(0) if !defined($min) || !defined($max); # Misc Stats my $mean = $num == 0? 0 : $sum/$num; my $var = $num == 0? 0 : $sumSq/$num - $mean * $mean; my $sd = sqrt($var); $Scale = 1 if ($Scale <= 0); print "# NumSamples = $num; Max = $max; Min = $min\n"; print "# Mean = $mean; Variance = $var; SD = $sd\n"; print "# Each * represents a count of $Scale\n" if ($UseStar); for (my $i = 0; $min+$i*$IntervalWidth < $max; $i++) { $bins{$i} = 0 if !$bins{$i}; my $low = $min + $i*$IntervalWidth; my $hi = $min + ($i+1)*$IntervalWidth; my $tag = sprintf("%.4f - %.4f", $low, $hi); if ($UseStar && $Scale != 0) { printf "%20s [%6d]: %s\n", "$tag", $bins{$i}, getStringOfLength($bins{$i}/$Scale); } else { printf "%20s: %d\n", "$tag", $bins{$i}; } } #---------------------------------------------------------------------- sub getStringOfLength { my($n) = @_; my $s = ""; for (my $j = 0; $j < $n; $j++) { $s .= "*"; } return $s; } sub usage { print "Usage: histo [-help] [-interval num] [-stars]\n"; print "Strictly a filter, this histograms incoming numerical data points into\n" . "requested bin sizes. If bin sizes are not given with the -interval\n" . "option, then the interval is assumed to be max-min/10.\n"; print "Use -stars to print a horizontal bar of stars for each bin\n"; exit 1; }