java多元线性回归
2016-06-21 16:43
477 查看
package com.lc.v3.scm.datamining.math; /************************************************************************************* * @author * * * * ******************************************************************************/ /*********************************************************************************** * Desc: The regression analyse is a widely used statistices method ,It was always * used for forecast,cause and effect analyse and so on. * --统计分析-〉回归分析,一般分为线形回归和非线性回归 * All of us familiar with the line regression.In fact,this method can extend to non * linear regression ,But the model built based on the analyser's private experience * and his knowledge of practice. * ************************************************************************************/ public class Regression { public Regression() { } /*************************************************************************************** *模型:多元线性回归 *简要说明:根据一个样本集,计算线性回归分析系数。线性回归分析的模型为:Y=k0+k1X1+k2X2+....+knXn,其中,X1,X2,....,Xn为因变量,Y为变量 * k0,k1,....,kn为回归系数,该函数就是要根据一组样本(Xi1,Xi2,...Xin,Yi)i=1,2,...,m[m个样本],根据最小二乘法得原则,计算出 * 最佳得回归分析系数k0,k1,....,kn,从而得到线性回归分析模型,该模型稍加扩展,就可以推广到非线性回归模型 *输入参数: * @param double[][] X 自变量样本集 * @param double[] Y 变量结果集 * @param double[] K 回归系数 * @param int n 回归变量个数 * @param int m 样本个数 *输出参数: * @return double result 0:失败,其他:成功 ****************************************************************************************/ public static double LineRegression( double[][] X, double[] Y, double[] K, int n, int m ) { double result = 0 ; /* *线性回归问题,最终转换为解一个对称线性方程组的求解问题 *线性方程组的系数矩阵为n+1*n+1,常数矩阵为n+1*1 */ int XLen = n+1; int YLen = 1; int i,j,k; double[][] coeffX = new double[XLen][XLen]; double[][] constY = new double[XLen][1]; double[][] resultK = new double[XLen][1]; /* *根据参数,计算所要求解方程组的系数矩阵、常数矩阵 */ double[][] temp = new double[m+1][n+1]; for(i =0;i<n+1;i++) { temp[0][i] = 1; } for(i =0;i<m+1;i++) { temp[i][0] = 1; } for( i=1;i<m+1;i++) for( j=1;j<n+1;j++) temp[i][j]= X[i-1][j-1]; /* *开始计算每一个系数 */ for(i=0;i<n+1;i++) { /* *coeffX的第i行和i列的系数,注意,是对称矩阵 */ for( j= i;j<n+1;j++){ double col = 0 ; for(k=1;k<m+1;k++) col+= ( temp[k][i]*temp[k][j] ); coeffX[i][j] = col; coeffX[j][i] = col; } /* *constY的第i个元素 */ double conTemp =0 ; for(k=1;k<m+1;k++) conTemp+= ( Y[k-1]*temp[k][i]); constY[i][0]=conTemp; } /* *调用Sequation方法,解线性方程组 */ result = Sequation.guassEquation(coeffX,constY,resultK,XLen,1); if(result ==0 ) { //System.out.println("The regression is failed,please check the sample point \n"); return result; }else{ for(i= 0;i<n+1;i++) K[i] = resultK[i][0]; } return result; } /***************************************************************************************** *模型名称:样本自优化线性回归 *简要说明:该模型是对简单线性回归模型的改良,其基本的考虑是所提供的样本数据中可能有些异常点使得模型的精度大大降低,系统通过一个度量函数自动 * 找那种对模型的拟合度最差的样本点,然后去掉该样本点再进行回归运算.本模型用实际值与模型预测值之间的平均偏差作为度量模型准确性的效 * 用函数。 * 如果平均偏差减少,标识模型在去掉噪声数据后模型拟合的效果增加,否则停止继续优化的步骤。另外,无论如何优化,都必须至少保留样本保有率 * 所确定的最少样本个数 *注:该模型尚未得到理论论证,是作者自己的经验总结 *函数参数: * @param double[][] X 自变量样本集 * @param double[] Y 变量结果集 * @param int n 回归变量个数 * @param double[] K 最终回归系数(返回) * @param int m 样本个数 * @param double retainRate 样本最低保有率 * @param int[] LossPoint 被丢弃的样本点(返回) *输出参数: * @return double res -1:失败,1:成功 *******************************************************************************************/ public static double optLineRegression( double[][] X, double[] Y, double[] K, int n, int m, double retainRate , int[] LossPoint){ double res = -1; if(n<1||m<1){ //System.out.println("The parameter is not normal ,please check it\n"); return res; } if(retainRate>=1.0||retainRate<=0.0){ //System.out.println("The retain parameter is not in 0 and 1 \n"); return res; } //必须确保的最小样本个数 Double Dtemp = new Double(m*retainRate); int minsample = Dtemp.intValue(); //被丢弃的样本点的个数 int lossnum =0 ; int[] LossPointTemp = new int[m]; //进行第一次回归 double temp = LineRegression(X,Y, K,n,m); if(temp == 0){ //System.out.println("The regression operation is failed\n"); return res; } //第一次的平均误差 double ErrorStd = avgerror(X, Y, K,n, m); double[][] SampleX = X; double[] SampleY = Y; double[] CoeffK = K; int SampleNum = m; /* *记载样本位置的变化情况 */ int[] change = new int[m]; for(int k=0;k<m;k++) change[k]=k; int index_max = -1; for (int i=m;i>minsample ;i-- ) { /* *找当前回归样本中的误差最大的样本index。 */ index_max = maxErrorIndex(SampleX,SampleY,CoeffK,n,SampleNum); if(index_max == -1) return -1; /* *第index_max为误差最大样本,去掉该样本 */ int Loss = change[index_max]; lossnum +=1; SampleNum -=1; double[][] SampleXTemp = SampleX; SampleX = new double[SampleN 4000 um] ; double[] SampleYTemp = SampleY; SampleY = new double[SampleNum]; for(int j= 0;j<index_max;j++){ for(int k=0;k<n;k++) { SampleX[j][k]=SampleXTemp[j+1][k]; } SampleY[j]=SampleYTemp[j]; } for(int j= index_max;j<SampleNum;j++){ for(int k=0;k<n;k++){ SampleX[j][k]=SampleXTemp[j+1][k]; } SampleY[j] = SampleYTemp[j+1]; change[j] = change[j+1]; } /* *利用新的样本进行回归 */ res=LineRegression(SampleX,SampleY,CoeffK,n,SampleNum); /* *比较新的预测模型误差与老模型的误差,如果没有改良,结束,否则,记载相关结果,并继续 */ double ErrorOpt = avgerror(SampleX,SampleY,CoeffK,n,SampleNum); if (ErrorOpt>=ErrorStd)//优化过程没有任何改良 { return 1; } else { /* *记载样本删减情况,记载回归系数 */ LossPoint[m-i] = Loss; K=CoeffK; } } return 1; } /************************************************************************************ *简要说明:计算回归模型的平均误差 *输入参数: * @param double[][] X 自变量样本集 * @param double[] Y 变量结果集 * @param int n 回归变量个数 * @param double[] K 回归系数 * @param int m 样本个数 *输出参数: * @return double Ierror :-1失败 平均误差 *************************************************************************************/ public static double avgerror(double[][] X,double[] Y,double[] K,int n,int m){ double res = -1; /* *YF用于存放模型的预测结果 */ double[] YF = new double[m]; for(int i=0;i<m;i++){ YF[i] = K[0]; for(int j=0;j<n;j++) YF[i]+=X[i][j]*K[j+1]; } /* *计算初始预测与真实值的误差(平均维和距) */ res = Base.dimaddavg(Y,YF); return res; } /************************************************************************************ *简要说明:计算回归模型的误差最大的样本点的index_id *输入参数: * @param double[][] X 自变量样本集 * @param double[] Y 变量结果集 * @param int n 回归变量个数 * @param double[] K 回归系数 * @param int m 样本个数 *输出参数: * @return int Index_id :-1失败 ************************************************************************************/ public static int maxErrorIndex(double[][] X,double[] Y,double[] K,int n,int m){ int index_id = -1; /* *YF用于存放模型的预测结果 */ double[] YF = new double[m]; for(int i=0;i<m;i++){ YF[i] = K[0]; for(int j=0;j<n;j++) YF[i]+=X[i][j]*K[j+1]; } /* *计算误差最大的样本点的index */ index_id = Base.maxerrordim(Y,YF); return index_id; } }
package com.lc.v3.scm.datamining.math; public class Base { public Base(){ } /* *Desc:get the max value for a nonmultidimensional array */ public static double max( double[] sample){ int n = sample.length; double result =sample[0]; for (int i=1;i<n ;i++ ) { if (sample[i]>result) { result=sample[i]; } } return result; } /* *Desc:get the min value for a nonmultidimensional array **/ public static double min( double[] sample){ int n = sample.length; double result =sample[0]; for (int i=1;i<n ;i++ ) { if (sample[i]<result) { result=sample[i]; } } return result; } /* *Desc:get the average value for a nonmultidimensional array */ public static double avg( double[] sample){ int n = sample.length; double result =sample[0]; for (int i=1;i<n ;i++ ) { result +=sample[i]; } return result/n; } /* *Desc: get the variance (方差)for a nonmultidimensional array * (Inpute)输入参数: * @param double[] sample 样本数组 * (OutPute)输出参数: * @return double if ==-1,样本为空 * (Logic)计算逻辑: * sample - 平均值的平方求和 **/ public static double val( double[] sample){ int n = sample.length; double result = -1; if (n<=0) { return result ; }else{ double avg = avg(sample); result = 0; for(int j=0;j<n;j++) result += (sample[j]-avg)*(sample[j]-avg); } result/=n; return result; } /* *Desc: get the standard variance(标准方差) for a nonmultidimensional array * (Inpute)输入参数: * @param double[] sample 样本数组 * (Output)输出参数: * @return double if ==-1,样本为空 * (Logic)计算逻辑: * sample[]的方差开方 **/ public static double stdval (double[] sample){ int n = sample.length; double result = -1; if(n<=0){ return result; }else { result = val(sample); result = Math.sqrt(result); } return result; } /* *Desc: get the max distance for a nonmultidimensional array * (Inpute)输入参数: * @param double[] sample 样本数组 * (Outpute)输出参数: * @return double if ==-1,样本为空 * (Logic)计算逻辑: * The array of sample 's max value subtract Its * min value [sample[]的最大值-最小值] **/ public static double maxdis (double[] sample){ int n = sample.length; double result = -1; if(n<=0){ return result; }else { result = max(sample)-min(sample); } return result; } /* *Desc: get the average warp for a nonmultidimensional array * (Inpute)输入参数: * @param double[] sample 样本数组 * (Outpute)输出参数: * @return double if ==-1,样本为空 * (Logic)计算逻辑: * sample[]的每一个元素-平均值的绝对值的平均值 **/ public static double meddev (double[] sample){ int n = sample.length; double result = -1; if(n<=0){ return result; }else { double avg = avg(sample); result =0; for(int j=0;j<n;j++){ result += Math.abs(sample[j]-avg); } } result /=n; return result; } /* *Desc: get the inside multiplication of two array *(Inpute)输入参数: * @param double[] sample1 * @param double[] sample2 *(OutPute)输出参数: * @return double res; *(Logic)处理逻辑: * Sum the value of two array's corresponding element production [对应元素乘积的和] **/ public static double inmul(double[] sample1,double[] sample2){ double result = 1.0/0.0; int len1 = sample1.length; int len2 = sample2.length; if(len1 != len2){ System.out.println("two vector dose not at the same dimension,please check it\n"); return result ; }else{ result = 0; for (int i=0;i<len1 ;i++ ) { result += sample1[i]*sample2[i]; } } return result; } /* *Desc : get the opposite production of two array to comprise a new array *(Inpute)输入参数: * @param double[] sample1 * @param double[] sample2 *(Outpute)输出参数: * @return double[] result; *(Logic)处理逻辑: * 形成新的向量,元素为对应元素的乘积 **/ public static double[] vecmul(double[] sample1,double[] sample2){ int len1 = sample1.length; int len2 = sample2.length; double[] result = new double[len1]; if(len1 != len2){ System.out.println("two vector dose not at the same dimension,please check it\n"); return result ; }else{ for (int i=0;i<len1 ;i++ ) { result[i]= sample1[i]*sample2[i]; } } return result; } /* *Desc : sort the element of the array into a from great to little order */ public void decorder(double[] sample){ int n = sample.length; for(int i=0;i<n-1;i++){ for(int j=i+1;j<n;j++){ if (sample[i]<sample[j]) { double temp = sample[i]; sample[i]=sample[j]; sample[j]=sample[i]; } } } } /* *Desc : sort the element of the array into a from little to great order */ public void ascorder(double[] sample){ int n = sample.length; for(int i=0;i<n-1;i++){ for(int j=i+1;j<n;j++){ if (sample[i]>sample[j]) { double temp = sample[i]; sample[i]=sample[j]; sample[j]=sample[i]; } } } } /* *Desc: get two point distance of standard (Euclid distance) *Inpute: * @param double[] Pfrom * @param double[] Pto * Outpute: * @param double res * Logic: */ public static double eudis(double[] Pfrom ,double[] Pto){ int Lfrom = Pfrom.length; int Lto = Pto.length; if (Lfrom != Lto) { return -1; } double res = 0; for(int i=0;i<Lfrom;i++){ double temp = Pfrom[i]-Pto[i]; temp*=temp; res+=temp; } return Math.sqrt(res); } /* *Desc : get the min value of two array's corresponding element's distance *Inpute: * @param double[] Pfrom * @parma double[] Pto *Outpute: * @return double res *Logic: */ public static double mindis(double[] Pfrom,double[] Pto){ int Lfrom = Pfrom.length; int Lto = Pto.length; if (Lfrom != Lto) { return -1; } double res = Math.abs(Pfrom[0]-Pto[0]); for(int i=1;i<Lfrom;i++){ double temp = Math.abs(Pfrom[i]-Pto[i]); if (temp < res ) { res = temp; } } return res; } /* *Desc : get the max value of two array's corresponding element's distance *Inpute: * @param double[] Pfrom * @parma double[] Pto *Outpute: * @return double res *Logic: */ public static double maxdis(double[] Pfrom,double[] Pto){ int Lfrom = Pfrom.length; int Lto = Pto.length; if (Lfrom != Lto) { return -1; } double res = Math.abs(Pfrom[0]-Pto[0]); for(int i=1;i<Lfrom;i++){ double temp = Math.abs(Pfrom[i]-Pto[i]); if (temp > res ) { res = temp; } } return res; } /* *Desc : get the total sum value of two array's corresponding element's distance *Inpute: * @param double[] Pfrom * @parma double[] Pto *Outpute: * @return double res *Logic: */ public static double sumdis(double[] Pfrom,double[] Pto){ int Lfrom = Pfrom.length; int Lto = Pto.length; if (Lfrom != Lto) { return -1; } double res = Math.abs(Pfrom[0]-Pto[0]); for(int i=1;i<Lfrom;i++){ double temp = Math.abs(Pfrom[i]-Pto[i]); res += temp; } return res; } /* *Desc : get the averagedistance of two array's corresponding element's distance *Inpute: * @param double[] Pfrom * @parma double[] Pto *Outpute: * @return double res *Logic: */ public static double dimaddavg(double[] Pfrom,double[] Pto){ int Lfrom = Pfrom.length; int Lto = Pto.length; if (Lfrom != Lto) { return -1; } double res = Math.abs(Pfrom[0]-Pto[0]); for(int i=1;i<Lfrom;i++){ double temp = Math.abs(Pfrom[i]-Pto[i]); res += temp; } return res/Lfrom; } /* *Desc : get the diemnsional id of two array's corresponding element's distance * who has tha max value *Inpute: * @param double[] Pfrom * @parma double[] Pto *Outpute: * @return Int id *Logic: */ public static int maxerrordim(double[] Pfrom,double[] Pto){ int Lfrom = Pfrom.length; int Lto = Pto.length; if (Lfrom != Lto) { return -1; } double maxerror=Math.abs(Pfrom[0]-Pto[0]); int res = 0; for(int i=1;i<Lfrom;i++){ double temp = Math.abs(Pfrom[i]-Pto[i]); if ( temp>maxerror) { res = i; maxerror=temp; } } return res; } }
/*********************************************************************** * @author leitw * * * * **********************************************************************/ /*********************************************************************** * This class was write for Line sequation group,The line sequation was used * widely ,such as regression analysic,linear programming in operational management * and so on. * This class offered some basic method for solute line sequations, * the first step offered guass method * **************************************** b093 *******************************/ /************************************************************************ *线性方程组,根据AX=B,A为系数矩阵,B为常数矩阵 *线性方程求解法则参见线性代数 * *****************************************************************/ public class Sequation { public Sequation() { } /*********************************************************************** *简要说明:全选主元高斯消元法 *功能:解线性方程组 *输入参数: * @param double[][] coeffA 系数矩阵 n×n * @param double[][] constB 常向量 线性方程组的右端 n×m * @param double[][] resultX 返回线性方程组的解 n×m * @param int n 矩阵coeffA的阶数 * @param int m 矩阵const的列数 *输出参数: * @return double abs 矩阵coeffA的行列式,如果abs=0,比较复杂,本函数不作处理,认为没有希望得到的解当|coeffA|=0,可能有无穷多解 **********************************************************************/ public static double guassEquation(double[][] coeffA ,double[][] constB, double[][] resultX , int n , int m ) { int i,j,k,row,line; double temp,max,abs=1; /* *change用于记载系数矩阵列交换的信息 */ int[] change = new int ; for(i=0;i<n;i++) change[i]=i ; /* *从矩阵的第一行开始 *a、找主元 *b、行列互换 *c、线性变换 */ for(i=0;i<n-1;i++) { /* *找主元 */ row=i;line=i; max = Math.abs(coeffA[i][i]); for(j=i;j<n;j++) { for(k=i;k<n;k++) { temp = Math.abs(coeffA[j][k]); if(temp>max) { max = temp; row = j; line = k; } } } /* *主元找到了为第row行,第line列,值为max *如果max=0 ,表示行列式为0,返回0,退出 */ if(max==0) { return 0; } /* *第二步,行列互换,准备先行变换 */ if(row != i) { for(k=i;k<n;k++) { temp = coeffA[i][k]; coeffA[i][k] = coeffA[row][k]; coeffA[row][k] = temp ; } for(k=0;k<m;k++) { temp=constB[i][k]; constB[i][k]=constB[row][k]; constB[row][k]=temp; } } if(line != i) { for(j=0;j<n;j++) { temp = coeffA[j][line]; coeffA[j][line]= coeffA[j][i]; coeffA[j][i]=temp; } /* *记载变量位置的变化(列变换信息标识了变量位置的变化信息) */ k=change[i]; change[i]=change[line]; change[line]= k; } /* *开始线性变换,先对第i行归一化,然后对余行线性变换 */ abs *=coeffA[i][i]; for(k=i+1;k<n;k++) coeffA[i][k]/=coeffA[i][i]; for(k=0;k<m;k++) constB[i][k] /= coeffA[i][i]; coeffA[i][i]=1; /* *余矩阵变换 */ for(j=i+1;j<n;j++) { for(k=i+1;k<n;k++) coeffA[j][k] -= coeffA[j][i]*coeffA[i][k]; for(k=0;k<m;k++) constB[j][k] -= coeffA[j][i]*constB[i][k]; coeffA[j][i] =0 ; } } abs *= coeffA[n-1][n-1]; /* *回代消元 */ for(k=0;k<m;k++) { constB[n-1][k] /= coeffA[n-1][n-1]; for(i=n-2;i>=0;i--) for(j=i+1;j<n;j++) constB[i][k]-=coeffA[i][j]*constB[j][k]; } /* *根据change,调整变量顺序,得最后解 */ for(i=0;i<n;i++) { for(j=0;j<m;j++) { resultX[change[i]][j]=constB[i][j]; } } return abs ; } }
相关文章推荐
- [Java学习] Java中Hashtable类与HashMap类的区别详解
- 在springmvc配置文件里面写默认的注解映射的支持的时候:<mvc:annotation-driven />The prefix "mvc" for element "mvc:annotation
- 学习整合hibernate springmvc spring的 心得(2)
- [Java学习] 原来Java中有两个ArrayList
- [Java学习] Java实现验证码具体代码(图片、汉字)
- intellij打开工程在每个java文件上有个红色的无效符的解决办法
- Spring3.2.0之后各个版本完整包下载地址
- Java函数参数类型后添加三点的含义与用法
- Spring注解@Autowired 和@Resource的区别
- 《Spring实战》学习笔记-第六章:web视图解析
- [Java学习] 对Java的面对对象编程中对象和引用以及内部类的理解
- HashMap源码分析
- JAVA反射——给属性赋值
- java多线程(三)-同步工具Semaphore
- Eclipse快捷键大全
- JAVA极光推送
- ssh中org.springframework.orm.hibernate4.support.OpenSessionInViewFilter的作用及配置
- 《Spring实战》学习笔记-第五章:构建Spring web应用
- java工具类——字符串类型的时间格式转换为Timestamp类型
- springMVC文件上传优化