/* Step 1 imports the original data file and includes Principle Component Analysis.
Throughout the file some outputs are supressed for faster workflow.  Remove the "noprint" statment to print out the results.  These were placed after */
proc import datafile="C:\Users\Kristy gd\Desktop\SAS data\SwordDataStats.csv"  out=All_Sword_Data  dbms=csv  replace; run;

/*set scale to 0-1 to standardize linear data and x,y,z for rivets from different scales. This is done after clusters are determined.*/
proc stdize data=All_sword_data out=All_sword_data method=Range;
var Length--StDev_DashDist; run;


/* PCs for blade profile and and cross section fourier transforms were run using the SHAPE princomp and included in the file input above.

The following is a series of principle component analyses.  
These combine various variables based on overall blade shape (blade profile, hilt profile, cross section, and all non-decorative data). 
Each princomp is given it's own dataset (so that the different Principle components can be examined seperately).
Extraneous data is removed.*/
proc princomp data=All_Sword_Data cov out=Hilts_PCA noprint; 
var HTP_A1 HTP_B1 HTP_C1 HTP_D1	
HTP_A2 HTP_B2 HTP_C2 HTP_D2	
HTP_A3 HTP_B3 HTP_C3 HTP_D3	
HTP_A4 HTP_B4 HTP_C4 HTP_D4	
HTP_A5 HTP_B5 HTP_C5 HTP_D5	
HTP_A6 HTP_B6 HTP_C6 HTP_D6	
HTP_A7 HTP_B7 HTP_C7 HTP_D7	
HTP_A8 HTP_B8 HTP_C8 HTP_D8	
HTP_A9 HTP_B9 HTP_C9 HTP_D9	
HTP_A10 HTP_B10 HTP_C10 HTP_D10	
HTP_A11 HTP_B11 HTP_C11 HTP_D11	
HTP_A12 HTP_B12 HTP_C12 HTP_D12	
HTP_A13 HTP_B13 HTP_C13 HTP_D13	
HTP_A14 HTP_B14 HTP_C14 HTP_D14	
HTP_A15 HTP_B15 HTP_C15 HTP_D15	
HTP_A16 HTP_B16 HTP_C16 HTP_D16	
HTP_A17 HTP_B17 HTP_C17 HTP_D17	
HTP_A18 HTP_B18 HTP_C18 HTP_D18	
HTP_A19 HTP_B19 HTP_C19 HTP_D19	
HTP_A20 HTP_B20 HTP_C20 HTP_D20	
HSP_A1 HSP_B1 HSP_C1 HSP_D1	
HSP_A2 HSP_B2 HSP_C2 HSP_D2	
HSP_A3 HSP_B3 HSP_C3 HSP_D3	
HSP_A4 HSP_B4 HSP_C4 HSP_D4	
HSP_A5 HSP_B5 HSP_C5 HSP_D5	
HSP_A6 HSP_B6 HSP_C6 HSP_D6	
HSP_A7 HSP_B7 HSP_C7 HSP_D7	
HSP_A8 HSP_B8 HSP_C8 HSP_D8	
HSP_A9 HSP_B9 HSP_C9 HSP_D9
HSP_A10 HSP_B10 HSP_C10 HSP_D10	
HSP_A11 HSP_B11 HSP_C11 HSP_D11	
HSP_A12 HSP_B12 HSP_C12 HSP_D12	
HSP_A13 HSP_B13 HSP_C13 HSP_D13	
HSP_A14 HSP_B14 HSP_C14 HSP_D14	
HSP_A15 HSP_B15 HSP_C15 HSP_D15	
HSP_A16 HSP_B16 HSP_C16 HSP_D16	
HSP_A17 HSP_B17 HSP_C17 HSP_D17	
HSP_A18 HSP_B18 HSP_C18 HSP_D18	
HSP_A19 HSP_B19 HSP_C19 HSP_D19	
HSP_A20 HSP_B20 HSP_C20 HSP_D20		
;
run;
	proc princomp data=All_Sword_Data cov out=Rivets_PCA ; 
var 
R1x R1y R1z R2x R2y R2z;
run;




/* removing unneccesary variables, adding in the new prins */
data All_sword_data; set All_sword_data(drop=HTP_A1--HSP_D20); run;
data Hilts_PCA2; set Hilts_PCA(drop=style--HSP_D20 drop=Prin12-Prin160 
rename=(Prin1=HPrin1) rename=(Prin2=HPrin2) rename=(Prin3=HPrin3) rename=(Prin4=HPrin4) rename=(Prin5=HPrin5) rename=(Prin6=HPrin6) 
rename=(Prin7=HPrin7) rename=(Prin8=HPrin8) rename=(Prin9=HPrin9) rename=(Prin10=HPrin10) rename=(Prin11=HPrin11)); run;
data Rivets_pca2; set Rivets_PCA(drop=style--HSP_D20 drop=Prin6
rename=(Prin1=RPrin1) rename=(Prin2=RPrin2) rename=(Prin3=RPrin3) rename=(Prin4=RPrin4) rename=(Prin5=RPrin5) );
	proc sort data=All_sword_data out=All_sword_data;	by Number;run;
	proc sort data=Hilts_PCA2 out=Hilts_PCA2;	by Number;run;
	proc sort data=Rivets_pca2 out=Rivets_pca2;	by Number;run;
	data All_sword_data; merge All_sword_data Hilts_PCA2 Rivets_pca2; by Number;	run;

/* Step 2: Cluster Analysis for groupings.
There are 6 of these: Location, Blades, Cross Sections, Hilts, Rivets, and No Decorative Data

The first set of cluster analysis relates to Location.
	First, an Acelus transform is used on the relevant variables, since the groups are not assumed to be equal.
This creates a set of canonical variables to use for cluster analysis in the next step.*/
proc aceclus data=All_sword_data out=Location_Ace p=.03 noprint; var Lat--Lon; run;
	ods graphics on;
	proc cluster data=Location_Ace method=ward ccc pseudo print=15 outtree=Location_Tree ; var can1 can2 ; id number; format number; run;
	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 3, 6, and 10.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Location_Tree out=Location__3 nclusters=3 haxis=axis1 horizontal; height _rsq_; copy can1 can2 ; id number;	run;
	proc tree data=Location_Tree out=Location__6 nclusters=6 haxis=axis1 horizontal noprint; height _rsq_; copy can1 can2 ; id number;  run;
	proc tree data=Location_Tree out=Location__10 nclusters=10 haxis=axis1 horizontal noprint; height _rsq_; copy can1 can2 ; id number;   run;
    proc sgplot data=Location__3 ; scatter y=can2 x=can1 / group=cluster ; run;
    proc sgplot data=Location__6 ; scatter y=can2 x=can1 / group=cluster ;	run;
    proc sgplot data=Location__10 ; scatter y=can2 x=can1 / group=cluster ;	run;

/*No outliers detected.  Each cluster name column is renamed, other variables are removed, and then the cluster data are added into the existing data sets.*/
	data Location__3; set Location__3(rename=(CLUSTER=LocCluster3) drop=Can1 Can2 CLUSNAME); run;
	data Location__6; set Location__6(rename=(CLUSTER=LocCluster6)drop=Can1 Can2 CLUSNAME);	 run;
	data Location__10; set Location__10(rename=(CLUSTER=LocCluster10)drop=Can1 Can2 CLUSNAME); run;
	proc sort data=Location__3 out=Location__3;	by Number;run;
	proc sort data=Location__6 out=Location__6; by Number;run;
	proc sort data=Location__10 out=Location__10; by Number;run;
	data All_sword_data; merge All_sword_data Location__3 Location__6 Location__10 ; by Number;	run;

/*The second set of cluster analysis relates to blades.

/*I know 9.005 is an outlier since it's a dagger, I'll remove it right now, since it won't cluster.*/
data Blades_cluster; set All_sword_data; if number='09.005' then delete; run;


	/*First, an Acelus transform is used on the relevant variables, since the groups are not assumed to be equal.
This creates a set of canonical variables to use for cluster analysis in the next step.*/
	proc aceclus data=Blades_cluster out=Blades_Ace p=.03 noprint; var BPrin1--BPrin3; run;
	ods graphics on;
	proc cluster data=Blades_Ace method=ward ccc pseudo print=15 outtree=Blades_Tree ; var can1--can2 ; id number; format number; run;
	ods graphics off;

/*09.015 is also an outlier.  Removed and re-run. */
data Blades_cluster; set Blades_cluster; if number='09.015' then delete; run;
	proc aceclus data=Blades_cluster out=Blades_Ace p=.03 noprint; var BPrin1--BPrin3; run;
	ods graphics on;
	proc cluster data=Blades_Ace method=ward ccc pseudo print=15 outtree=Blades_Tree ; var can1--can2 ; id number; format number; run;
	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 6 and 12.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Blades_Tree out=Blades__6 nclusters=6 haxis=axis1 horizontal noprint; height _rsq_; copy can1 can2 ; id number;  run;
	proc tree data=Blades_Tree out=Blades__12 nclusters=12 haxis=axis1 horizontal noprint; height _rsq_; copy can1 can2 ; id number;   run;
    proc sgplot data=Blades__6 ; scatter y=can2 x=can1 / group=cluster ;	run;
    proc sgplot data=Blades__12 ; scatter y=can2 x=can1 / group=cluster ;	run;

/*Remove outliers.  Try again*/
data Blades_cluster; set Blades_cluster; if number='11.005' or number='15.490' or number='09.175' then delete; run;
proc aceclus data=Blades_cluster out=Blades_Ace p=.03 noprint; var BPrin1--BPrin3; run;
	ods graphics on;
	proc cluster data=Blades_Ace method=ward ccc pseudo print=15 outtree=Blades_Tree ; var can1--can2 ; id number; format number; run;
	ods graphics off;

	/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 7.  Each is given it's own work table and graphed to check for outliers. */
	proc tree data=Blades_Tree out=Blades__12 nclusters=12 haxis=axis1 horizontal ; height _rsq_; copy can1 can2 ; id number;  run;
	proc tree data=Blades_Tree out=Blades__7 nclusters=7 haxis=axis1 horizontal ; height _rsq_; copy can1 can2 ; id number;  run;
	proc sgplot data=Blades__7 ; scatter y=can2 x=can1 / group=cluster ;	run;
	proc sgplot data=Blades__12 ; scatter y=can2 x=can1 / group=cluster ;	run;
proc sort data=Blades__12 out=Blades__12;	by CLUSTER;run;

/* No outliers detected. */

	data Blades__7; set Blades__7(rename=(CLUSTER=BladesCluster7) drop=Can1 Can2 CLUSNAME); run;
	data Blades__12; set Blades__12(rename=(CLUSTER=BladesCluster12) drop=Can1 Can2 CLUSNAME); run;
	proc sort data=Blades__7 out=Blades__7;	by Number;run;
	proc sort data=Blades__12 out=Blades__12;	by Number;run;
	data All_sword_data; merge All_sword_data Blades__7 Blades__12; by Number;	run; 



/*The third set of cluster analysis relates to cross sections.

	First, an Acelus transform is used on the relevant variables, since the groups are not assumed to be equal.
This creates a set of canonical variables to use for cluster analysis in the next step.*/
	proc aceclus data=All_sword_data out=Cross_section_Ace p=.03 noprint; var CSPrin1--CSPrin5; run;
	ods graphics on;
	proc cluster data=Cross_section_Ace method=ward ccc pseudo print=15 outtree=Cross_section_Tree ; var can1--can5 ; id number; format number; run;
	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 8.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Cross_section_Tree out=Cross_section__8 nclusters=8 haxis=axis1 horizontal; height _rsq_; copy can1 can2 ; id number;	run;
	proc tree data=Cross_section_Tree out=Cross_section__12 nclusters=12 haxis=axis1 horizontal; height _rsq_; copy can1 can2 ; id number;	run;
    proc sgplot data=Cross_section__8 ; scatter y=can2 x=can1 / group=cluster ;	run;
	proc sgplot data=Cross_section__12 ; scatter y=can2 x=can1 / group=cluster ;	run;


/* No outliers detected. Each cluster name column is renamed, other variables are removed, and then the cluster data are added into the existing data sets.*/
	data Cross_section__8; set Cross_section__8(rename=(CLUSTER=CrossSectionCluster8) drop=Can1 Can2 CLUSNAME); run;
	data Cross_section__12; set Cross_section__12(rename=(CLUSTER=CrossSectionCluster12) drop=Can1 Can2 CLUSNAME); run;
	proc sort data=Cross_section__8 out=Cross_section__8;	by Number;run;
	proc sort data=Cross_section__12 out=Cross_section__12;	by Number;run;
	data All_sword_data; merge All_sword_data Cross_section__8 Cross_section__12; by Number;	run;

/*The fourth set of cluster analysis relates to hilts.

	First, an Acelus transform is used on the relevant variables, since the groups are not assumed to be equal.
This creates a set of canonical variables to use for cluster analysis in the next step.*/
	proc aceclus data=Hilts_pca out=Hilts_Ace p=.03 noprint; var Prin1--Prin11; run;
	ods graphics on;
	proc cluster data=Hilts_Ace method=ward ccc pseudo print=15 outtree=Hilts_Tree ; var can1--can11 ; id number; format number; run;
	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 3 and 6.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Hilts_Tree out=Hilts__9 nclusters=9 haxis=axis1 horizontal noprint; height _rsq_; copy can1 can2 ; id number;  run;
	proc tree data=Hilts_Tree out=Hilts__14 nclusters=14 haxis=axis1 horizontal noprint; height _rsq_; copy can1 can2 ; id number;  run;
    proc sgplot data=Hilts__9 ; scatter y=can2 x=can1 / group=cluster ;	run;
	proc sgplot data=Hilts__14 ; scatter y=can2 x=can1 / group=cluster ;	run;


data hilt_cluster; set Hilts_pca; if number='9.232' or number='9.006' then delete; run;
proc aceclus data=hilt_cluster out=Hilts_Ace p=.03 noprint; var Prin1--Prin11; run;
	ods graphics on;
	proc cluster data=Hilts_Ace method=ward ccc pseudo print=15 outtree=Hilts_Tree ; var can1--can11 ; id number; format number; run;
	ods graphics off;

	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Hilts_Tree out=Hilts__10 nclusters=10 haxis=axis1 horizontal ; height _rsq_; copy can1 can2 ; id number;  run;
	proc tree data=Hilts_Tree out=Hilts__6 nclusters=6 haxis=axis1 horizontal noprint; height _rsq_; copy can1 can2 ; id number;  run;
    proc sgplot data=Hilts__10 ; scatter y=can2 x=can1 / group=cluster ;	run;
	proc sgplot data=Hilts__6 ; scatter y=can2 x=can1 / group=cluster ;	run;

/* 2 outliers detected. Each cluster name column is renamed, other variables are removed, and then the cluster data are added into the existing data sets.*/
	data Hilts__6; set Hilts__6(rename=(CLUSTER=HiltCluster6) drop=Can1 Can2 CLUSNAME); run;
	data Hilts__10; set Hilts__10(rename=(CLUSTER=HiltCluster10) drop=Can1 Can2 CLUSNAME); run;
	proc sort data=Hilts__6 out=Hilts__6;	by Number;run;
	proc sort data=Hilts__10 out=Hilts__10;	by Number;run;
	data All_sword_data; merge All_sword_data Hilts__6 Hilts__10; by Number;	run;

/*The fifth set of cluster analysis relates to rivets.

	First, an Acelus transform is used on the relevant variables, since the groups are not assumed to be equal.
This creates a set of canonical variables to use for cluster analysis in the next step.*/
	proc aceclus data=All_sword_data out=Rivets_Ace p=.03 noprint; var RPrin1--RPrin5; run;
	ods graphics on;
	proc cluster data=Rivets_Ace method=ward ccc pseudo print=15 outtree=Rivets_Tree ; var can1--can5 ; id number; format number; run;
	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 11.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Rivets_Tree out=Rivets__11 nclusters=11 haxis=axis1 horizontal; height _rsq_; copy can1 can2 ; id number;	run;
    proc sgplot data=Rivets__11 ; scatter y=can2 x=can1 / group=cluster ;	run;

/* Remove outlier*/
	data Rivets_Cluster; set All_sword_data; if number='17.186' then delete; run;
	proc aceclus data=Rivets_Cluster out=Rivets_Ace p=.03 noprint; var RPrin1--RPrin5; run;
	ods graphics on;
	proc cluster data=Rivets_Ace method=ward ccc pseudo print=15 outtree=Rivets_Tree ; var can1--can5 ; id number; format number; run;
	ods graphics off;


/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 11.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Rivets_Tree out=Rivets__15 nclusters=15 haxis=axis1 horizontal; height _rsq_; copy can1 can2 ; id number;	run;
    proc sgplot data=Rivets__15 ; scatter y=can2 x=can1 / group=cluster ;	run;



	/* No major outliers detected. Clustering remains in tact.  Keep these clusters, since the outliers would be removed as a group of 1 anyways.
Each cluster name column is renamed, other variables are removed, and then the cluster data are added into the existing data sets.*/
	data Rivets__15; set Rivets__15(rename=(CLUSTER=RivetsCluster15) drop=Can1 Can2 CLUSNAME); run;
	proc sort data=Rivets__15 out=Rivets__15;	by Number;run;
	data All_sword_data; merge All_sword_data Rivets__15 ; by Number;	run;
 




/*Step 3: Check to find signficance of groups.  Groups that are significant have had an added output for the distance matrix between groups.

The first series of ANOVA / MANOVA computations are to test Hypothesis 1, which is based on style Style.  */
proc glm data=All_sword_data; class Style; model CircleDist Radius CircleLines=Style /nouni; manova h=Style; means Style / tukey; 
proc glm data=All_sword_data; class Style; model DashL DashDist=Style /nouni; manova h=Style;
proc glm data=All_sword_data; class Style; model CurvedDist=Style; 
proc glm data=All_sword_data; class Style; model StraightDist=Style; means Style / tukey; 
proc glm data=All_sword_data; class Style; model WaveH WaveW WaveDist=Style /nouni; manova h=Style;
run;
quit;


proc glm data=All_sword_data; class Style; model StDev_CircleDist StDev_Radius StDev_CircleLines=Style /nouni; manova h=Style; means Style / tukey;
proc glm data=All_sword_data; class Style; model StDev_DashL StDev_DashDist=Style /nouni; manova h=Style;
proc glm data=All_sword_data; class Style; model StDev_CurvedDist=Style; means Style / tukey; 
proc glm data=All_sword_data; class Style; model StDev_Straight=Style; means Style / tukey; 
proc glm data=All_sword_data; class Style; model StDev_WaveH StDev_WaveW StDev_WaveDist=Style /nouni; manova h=Style;
run;
quit;

proc glm data=All_sword_data; class Style; model BPrin1--BPrin3=Style /nouni; manova h=Style; means Style / tukey; 
proc glm data=All_sword_data; class Style; model CSPrin1--CSPrin5=Style /nouni; manova h=Style; means Style / tukey; 
proc glm data=All_sword_data; class Style; model HPrin1--HPrin11=Style /nouni; manova h=Style; means Style / tukey; 
proc glm data=All_sword_data; class Style; model Length=Style;  
proc glm data=All_sword_data; class Style; model RPrin1--RPrin5=Style /nouni; manova h=Style; means Style / tukey; 
proc glm data=All_sword_data; class Style; model TangHeight=Style;  
run;
quit;

proc glm data=All_sword_data; class Style; model lat--Lon=Style /nouni; manova h=Style;run;quit;

/*The first set of tests also includes a chi squared test to look at correlation between style and find type. */
proc freq data=All_sword_data order=data;
   tables Style*Find / expected cellchi2 norow nocol chisq;
   output out=ChiSqData n nmiss pchi lrchi;
   title 'Chi-Square Tests for Syle and Find Type';
run; quit;

/* The second series of ANOVA computations are to test Hypothesis 2, which is based on find Styleegory.  */
proc glm data=All_sword_data; class Find; model CircleDist Radius CircleLines=Find /nouni; manova h=Find;
proc glm data=All_sword_data; class Find; model DashL DashDist=Find /nouni; manova h=Find;
proc glm data=All_sword_data; class Find; model CurvedDist=Find; 
proc glm data=All_sword_data; class Find; model StraightDist=Find;
proc glm data=All_sword_data; class Find; model WaveH WaveW WaveDist=Find /nouni; manova h=Find;
run;
quit;

proc glm data=All_sword_data; class Find; model StDev_CircleDist StDev_Radius StDev_CircleLines=Find /nouni; manova h=Find;
proc glm data=All_sword_data; class Find; model StDev_DashL StDev_DashDist=Find /nouni; manova h=Find;
proc glm data=All_sword_data; class Find; model StDev_CurvedDist=Find; 
proc glm data=All_sword_data; class Find; model StDev_Straight=Find;
proc glm data=All_sword_data; class Find; model StDev_WaveH StDev_WaveW StDev_WaveDist=Find /nouni; manova h=Find;
run;
quit;

proc glm data=All_sword_data; class Find; model BPrin1--BPrin3=Find /nouni; manova h=Find; 
proc glm data=All_sword_data; class Find; model CSPrin1--CSPrin5=Find /nouni; manova h=Find;  
proc glm data=All_sword_data; class Find; model HPrin1--HPrin11=Find /nouni; manova h=Find;  
proc glm data=All_sword_data; class Find; model Length=Find;  
proc glm data=All_sword_data; class Find; model RPrin1--RPrin5=Find /nouni; manova h=Find;  
proc glm data=All_sword_data; class Find; model TangHeight=Find;  
run;
quit;
proc glm data=All_sword_data; class find; model lat--Lon=find /nouni; manova h=find;
run;
quit;


/* The third series of ANOVA computations are to test Hypothesis 3, which is based on Location.
The series is for 6 clusters.*/

proc glm data=All_sword_data; class LocCluster6; model CircleDist Radius CircleLines=LocCluster6 /nouni; manova h=LocCluster6;
proc glm data=All_sword_data; class LocCluster6; model DashL DashDist=LocCluster6 /nouni; manova h=LocCluster6;  means LocCluster6 / tukey;
proc glm data=All_sword_data; class LocCluster6; model CurvedDist=LocCluster6; 
proc glm data=All_sword_data; class LocCluster6; model StraightDist=LocCluster6;
proc glm data=All_sword_data; class LocCluster6; model WaveH WaveW WaveDist=LocCluster6 /nouni; manova h=LocCluster6;
run;
quit;

proc glm data=All_sword_data; class LocCluster6; model StDev_CircleDist StDev_Radius StDev_CircleLines=LocCluster6 /nouni; manova h=LocCluster6;
proc glm data=All_sword_data; class LocCluster6; model StDev_DashL StDev_DashDist=LocCluster6 /nouni; manova h=LocCluster6;
proc glm data=All_sword_data; class LocCluster6; model StDev_CurvedDist=LocCluster6; 
proc glm data=All_sword_data; class LocCluster6; model StDev_Straight=LocCluster6;
proc glm data=All_sword_data; class LocCluster6; model StDev_WaveH StDev_WaveW StDev_WaveDist=LocCluster6 /nouni; manova h=LocCluster6; 
run;
quit;

proc glm data=All_sword_data; class LocCluster6; model BPrin1--BPrin3=LocCluster6 /nouni; manova h=LocCluster6;  means LocCluster6 / tukey;
proc glm data=All_sword_data; class LocCluster6; model CSPrin1--CSPrin5=LocCluster6 /nouni; manova h=LocCluster6; means LocCluster6 / tukey;
proc glm data=All_sword_data; class LocCluster6; model HPrin1--HPrin11=LocCluster6 /nouni; manova h=LocCluster6; means LocCluster6 / tukey;
proc glm data=All_sword_data; class LocCluster6; model Length=LocCluster6;   
proc glm data=All_sword_data; class LocCluster6; model RPrin1--RPrin5=LocCluster6 /nouni; manova h=LocCluster6; means LocCluster6 / tukey;
proc glm data=All_sword_data; class LocCluster6; model TangHeight=LocCluster6;  
run;
quit;

proc freq data=All_sword_data order=data;
   tables LocCluster6*Find / expected cellchi2 norow nocol chisq;
   output out=LocCluster6ChiSqData n nmiss pchi lrchi;
   title 'Chi-Square Tests for Location and Find Type';
run; quit;

proc freq data=All_sword_data order=data;
   tables LocCluster6*Style / expected cellchi2 norow nocol chisq;
   output out=LocCluster3aChiSqData n nmiss pchi lrchi;
   title 'Chi-Square Tests for Location and Find Type';
run; quit;

/* The third series of ANOVA computations are to test Hypothesis 3, which is based on Location.
The series is for 10 clusters.*/

proc glm data=All_sword_data; class LocCluster10; model CircleDist Radius CircleLines=LocCluster10 /nouni; manova h=LocCluster10;
proc glm data=All_sword_data; class LocCluster10; model DashL DashDist=LocCluster10 /nouni; manova h=LocCluster10;
proc glm data=All_sword_data; class LocCluster10; model CurvedDist=LocCluster10; 
proc glm data=All_sword_data; class LocCluster10; model StraightDist=LocCluster10;
proc glm data=All_sword_data; class LocCluster10; model WaveH WaveW WaveDist=LocCluster10 /nouni; manova h=LocCluster10;
run;
quit;

proc glm data=All_sword_data; class LocCluster10; model StDev_CircleDist StDev_Radius StDev_CircleLines=LocCluster10 /nouni; manova h=LocCluster10;
proc glm data=All_sword_data; class LocCluster10; model StDev_DashL StDev_DashDist=LocCluster10 /nouni; manova h=LocCluster10;
proc glm data=All_sword_data; class LocCluster10; model StDev_CurvedDist=LocCluster10; 
proc glm data=All_sword_data; class LocCluster10; model StDev_Straight=LocCluster10;
proc glm data=All_sword_data; class LocCluster10; model StDev_WaveH StDev_WaveW StDev_WaveDist=LocCluster10 /nouni; manova h=LocCluster10;
run;
quit;


proc glm data=All_sword_data; class LocCluster10; model BPrin1--BPrin3=LocCluster10 /nouni; manova h=LocCluster10; 
proc glm data=All_sword_data; class LocCluster10; model CSPrin1--CSPrin5=LocCluster10 /nouni; manova h=LocCluster10; 
proc glm data=All_sword_data; class LocCluster10; model HPrin1--HPrin11=LocCluster10 /nouni; manova h=LocCluster10; 
proc glm data=All_sword_data; class LocCluster10; model Length=LocCluster10;   
proc glm data=All_sword_data; class LocCluster10; model RPrin1--RPrin5=LocCluster10 /nouni; manova h=LocCluster10; 
proc glm data=All_sword_data; class LocCluster10; model TangHeight=LocCluster10;  
run;
quit;


proc freq data=All_sword_data order=data;
   tables LocCluster10*Find / expected cellchi2 norow nocol chisq;
   output out=LocCluster10ChiSqData n nmiss pchi lrchi;
   title 'Chi-Square Tests for Location and Find Type';
run; quit;

proc freq data=All_sword_data order=data;
   tables LocCluster10*Style / expected cellchi2 norow nocol chisq;
   output out=LocCluster10aChiSqData n nmiss pchi lrchi;
   title 'Chi-Square Tests for Location and Find Type';
run; quit;



/* The fourth series of ANOVA computations are to test Hypothesis 4, which is based on manufacture choice.
The series is 7 blade clusters.*/

proc glm data=All_sword_data; class BladesCluster7; model CircleDist Radius CircleLines=BladesCluster7 /nouni; manova h=BladesCluster7;
proc glm data=All_sword_data; class BladesCluster7; model DashL DashDist=BladesCluster7 /nouni; manova h=BladesCluster7;
proc glm data=All_sword_data; class BladesCluster7; model CurvedDist=BladesCluster7; 
proc glm data=All_sword_data; class BladesCluster7; model StraightDist=BladesCluster7;
proc glm data=All_sword_data; class BladesCluster7; model WaveH WaveW WaveDist=BladesCluster7 /nouni; manova h=BladesCluster7;
run;
quit;

proc glm data=All_sword_data; class BladesCluster7; model StDev_CircleDist StDev_Radius StDev_CircleLines=BladesCluster7 /nouni; manova h=BladesCluster7;
proc glm data=All_sword_data; class BladesCluster7; model StDev_DashL StDev_DashDist=BladesCluster7 /nouni; manova h=BladesCluster7;
proc glm data=All_sword_data; class BladesCluster7; model StDev_CurvedDist=BladesCluster7; 
proc glm data=All_sword_data; class BladesCluster7; model StDev_Straight=BladesCluster7;
proc glm data=All_sword_data; class BladesCluster7; model StDev_WaveH StDev_WaveW StDev_WaveDist=BladesCluster7 /nouni; manova h=BladesCluster7;
run;
quit;

proc glm data=All_sword_data; class BladesCluster7; model CSPrin1--CSPrin5=BladesCluster7 /nouni; manova h=BladesCluster7; 
proc glm data=All_sword_data; class BladesCluster7; model HPrin1--HPrin11=BladesCluster7 /nouni; manova h=BladesCluster7; 
proc glm data=All_sword_data; class BladesCluster7; model Length=BladesCluster7;   
proc glm data=All_sword_data; class BladesCluster7; model RPrin1--RPrin5=BladesCluster7 /nouni; manova h=BladesCluster7; 
proc glm data=All_sword_data; class BladesCluster7; model TangHeight=BladesCluster7;  
run;
quit;



/* The fourth series of ANOVA computations are to test Hypothesis 4, which is based on manufacture choice.
The series is 12 blade clusters.*/

proc glm data=All_sword_data; class BladesCluster12; model CircleDist Radius CircleLines=BladesCluster12 /nouni; manova h=BladesCluster12;
proc glm data=All_sword_data; class BladesCluster12; model DashL DashDist=BladesCluster12 /nouni; manova h=BladesCluster12;
proc glm data=All_sword_data; class BladesCluster12; model CurvedDist=BladesCluster12; 
proc glm data=All_sword_data; class BladesCluster12; model StraightDist=BladesCluster12;
proc glm data=All_sword_data; class BladesCluster12; model WaveH WaveW WaveDist=BladesCluster12 /nouni; manova h=BladesCluster12;
run;
quit;

proc glm data=All_sword_data; class BladesCluster12; model StDev_CircleDist StDev_Radius StDev_CircleLines=BladesCluster12 /nouni; manova h=BladesCluster12;
proc glm data=All_sword_data; class BladesCluster12; model StDev_DashL StDev_DashDist=BladesCluster12 /nouni; manova h=BladesCluster12;
proc glm data=All_sword_data; class BladesCluster12; model StDev_CurvedDist=BladesCluster12; 
proc glm data=All_sword_data; class BladesCluster12; model StDev_Straight=BladesCluster12;
proc glm data=All_sword_data; class BladesCluster12; model StDev_WaveH StDev_WaveW StDev_WaveDist=BladesCluster12 /nouni; manova h=BladesCluster12;
run;
quit;

proc glm data=All_sword_data; class BladesCluster12; model CSPrin1--CSPrin5=BladesCluster12 /nouni; manova h=BladesCluster12;  means BladesCluster12 / tukey;
proc glm data=All_sword_data; class BladesCluster12; model HPrin1--HPrin11=BladesCluster12 /nouni; manova h=BladesCluster12; 
proc glm data=All_sword_data; class BladesCluster12; model Length=BladesCluster12;   means BladesCluster12 / tukey;
proc glm data=All_sword_data; class BladesCluster12; model RPrin1--RPrin5=BladesCluster12 /nouni; manova h=BladesCluster12; 
proc glm data=All_sword_data; class BladesCluster12; model TangHeight=BladesCluster12;  means BladesCluster12 / tukey;
run;
quit;

/* The fourth series of ANOVA computations are to test Hypothesis 4, which is based on manufacture choice.
The series is 8 cross section clusters.*/

proc glm data=All_sword_data; class CrossSectionCluster8; model CircleDist Radius CircleLines=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8;
proc glm data=All_sword_data; class CrossSectionCluster8; model DashL DashDist=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8;
proc glm data=All_sword_data; class CrossSectionCluster8; model CurvedDist=CrossSectionCluster8; 
proc glm data=All_sword_data; class CrossSectionCluster8; model StraightDist=CrossSectionCluster8;
proc glm data=All_sword_data; class CrossSectionCluster8; model WaveH WaveW WaveDist=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8;
run;
quit;

proc glm data=All_sword_data; class CrossSectionCluster8; model StDev_CircleDist StDev_Radius StDev_CircleLines=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8;
proc glm data=All_sword_data; class CrossSectionCluster8; model StDev_DashL StDev_DashDist=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8;
proc glm data=All_sword_data; class CrossSectionCluster8; model StDev_CurvedDist=CrossSectionCluster8; 
proc glm data=All_sword_data; class CrossSectionCluster8; model StDev_Straight=CrossSectionCluster8;
proc glm data=All_sword_data; class CrossSectionCluster8; model StDev_WaveH StDev_WaveW StDev_WaveDist=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8;
run;
quit;

proc glm data=All_sword_data; class CrossSectionCluster8; model BPrin1--BPrin3=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8; 
proc glm data=All_sword_data; class CrossSectionCluster8; model HPrin1--HPrin11=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8; 
proc glm data=All_sword_data; class CrossSectionCluster8; model Length=CrossSectionCluster8;   
proc glm data=All_sword_data; class CrossSectionCluster8; model RPrin1--RPrin5=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8; 
proc glm data=All_sword_data; class CrossSectionCluster8; model TangHeight=CrossSectionCluster8;  
run;
quit;

/* The fourth series of ANOVA computations are to test Hypothesis 4, which is based on manufacture choice.
The series is 12 cross section clusters.*/

proc glm data=All_sword_data; class CrossSectionCluster12; model CircleDist Radius CircleLines=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12;
proc glm data=All_sword_data; class CrossSectionCluster12; model DashL DashDist=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12;
proc glm data=All_sword_data; class CrossSectionCluster12; model CurvedDist=CrossSectionCluster12; 
proc glm data=All_sword_data; class CrossSectionCluster12; model StraightDist=CrossSectionCluster12;
proc glm data=All_sword_data; class CrossSectionCluster12; model WaveH WaveW WaveDist=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12;
run;
quit;

proc glm data=All_sword_data; class CrossSectionCluster12; model StDev_CircleDist StDev_Radius StDev_CircleLines=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12; 
proc glm data=All_sword_data; class CrossSectionCluster12; model StDev_DashL StDev_DashDist=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12;
proc glm data=All_sword_data; class CrossSectionCluster12; model StDev_CurvedDist=CrossSectionCluster12; 
proc glm data=All_sword_data; class CrossSectionCluster12; model StDev_Straight=CrossSectionCluster12;
proc glm data=All_sword_data; class CrossSectionCluster12; model StDev_WaveH StDev_WaveW StDev_WaveDist=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12;
run;
quit;

proc glm data=All_sword_data; class CrossSectionCluster12; model BPrin1--BPrin3=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12; 
proc glm data=All_sword_data; class CrossSectionCluster12; model HPrin1--HPrin11=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12; 
proc glm data=All_sword_data; class CrossSectionCluster12; model Length=CrossSectionCluster12;   
proc glm data=All_sword_data; class CrossSectionCluster12; model RPrin1--RPrin5=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12; 
proc glm data=All_sword_data; class CrossSectionCluster12; model TangHeight=CrossSectionCluster12;  
run;
quit;



/* The fourth series of ANOVA computations are to test Hypothesis 4, which is based on manufacture choice.
The series is 6 hilt clusters.*/
proc glm data=All_sword_data; class HiltCluster6; model CircleDist Radius CircleLines=HiltCluster6 /nouni; manova h=HiltCluster6; 
proc glm data=All_sword_data; class HiltCluster6; model DashL DashDist=HiltCluster6 /nouni; manova h=HiltCluster6;
proc glm data=All_sword_data; class HiltCluster6; model CurvedDist=HiltCluster6; 
proc glm data=All_sword_data; class HiltCluster6; model StraightDist=HiltCluster6; 
proc glm data=All_sword_data; class HiltCluster6; model WaveH WaveW WaveDist=HiltCluster6 /nouni; manova h=HiltCluster6;
run;
quit;

proc glm data=All_sword_data; class HiltCluster6; model StDev_CircleDist StDev_Radius StDev_CircleLines=HiltCluster6 /nouni; manova h=HiltCluster6;
proc glm data=All_sword_data; class HiltCluster6; model StDev_DashL StDev_DashDist=HiltCluster6 /nouni; manova h=HiltCluster6;
proc glm data=All_sword_data; class HiltCluster6; model StDev_CurvedDist=HiltCluster6; 
proc glm data=All_sword_data; class HiltCluster6; model StDev_Straight=HiltCluster6;
proc glm data=All_sword_data; class HiltCluster6; model StDev_WaveH StDev_WaveW StDev_WaveDist=HiltCluster6 /nouni; manova h=HiltCluster6;
run;
quit;

proc glm data=All_sword_data; class HiltCluster6; model BPrin1--BPrin3=HiltCluster6 /nouni; manova h=HiltCluster6; 
proc glm data=All_sword_data; class HiltCluster6; model CSPrin1--CSPrin5=HiltCluster6 /nouni; manova h=HiltCluster6; 
proc glm data=All_sword_data; class HiltCluster6; model Length=HiltCluster6;   
proc glm data=All_sword_data; class HiltCluster6; model RPrin1--RPrin5=HiltCluster6 /nouni; manova h=HiltCluster6; 
proc glm data=All_sword_data; class HiltCluster6; model TangHeight=HiltCluster6;  
run;
quit;

/* The fourth series of ANOVA computations are to test Hypothesis 4, which is based on manufacture choice.
The series is 10 hilt clusters.*/

proc glm data=All_sword_data; class HiltCluster10; model CircleDist Radius CircleLines=HiltCluster10 /nouni; manova h=HiltCluster10;
proc glm data=All_sword_data; class HiltCluster10; model DashL DashDist=HiltCluster10 /nouni; manova h=HiltCluster10;
proc glm data=All_sword_data; class HiltCluster10; model CurvedDist=HiltCluster10; 
proc glm data=All_sword_data; class HiltCluster10; model StraightDist=HiltCluster10;  means HiltCluster10 / tukey; means HiltCluster10 / tukey;
proc glm data=All_sword_data; class HiltCluster10; model WaveH WaveW WaveDist=HiltCluster10 /nouni; manova h=HiltCluster10;
run;
quit;

proc glm data=All_sword_data; class HiltCluster10; model StDev_CircleDist StDev_Radius StDev_CircleLines=HiltCluster10 /nouni; manova h=HiltCluster10;
proc glm data=All_sword_data; class HiltCluster10; model StDev_DashL StDev_DashDist=HiltCluster10 /nouni; manova h=HiltCluster10;
proc glm data=All_sword_data; class HiltCluster10; model StDev_CurvedDist=HiltCluster10; 
proc glm data=All_sword_data; class HiltCluster10; model StDev_Straight=HiltCluster10; 
proc glm data=All_sword_data; class HiltCluster10; model StDev_WaveH StDev_WaveW StDev_WaveDist=HiltCluster10 /nouni; manova h=HiltCluster10;
run;
quit;

proc glm data=All_sword_data; class HiltCluster10; model BPrin1--BPrin3=HiltCluster10 /nouni; manova h=HiltCluster10; means HiltCluster10 / tukey;
proc glm data=All_sword_data; class HiltCluster10; model CSPrin1--CSPrin5=HiltCluster10 /nouni; manova h=HiltCluster10; 
proc glm data=All_sword_data; class HiltCluster10; model Length=HiltCluster10;   means HiltCluster10 / tukey;
proc glm data=All_sword_data; class HiltCluster10; model RPrin1--RPrin5=HiltCluster10 /nouni; manova h=HiltCluster10;  means HiltCluster10 / tukey;
proc glm data=All_sword_data; class HiltCluster10; model TangHeight=HiltCluster10;  
run;
quit;

/* The fourth series of ANOVA computations are to test Hypothesis 4, which is based on manufacture choice.
The series is 15 rivet clusters.*/
proc glm data=All_sword_data; class RivetsCluster15; model CircleDist Radius CircleLines=RivetsCluster15 /nouni; manova h=RivetsCluster15;
proc glm data=All_sword_data; class RivetsCluster15; model DashL DashDist=RivetsCluster15 /nouni; manova h=RivetsCluster15;
proc glm data=All_sword_data; class RivetsCluster15; model CurvedDist=RivetsCluster15; 
proc glm data=All_sword_data; class RivetsCluster15; model StraightDist=RivetsCluster15;
proc glm data=All_sword_data; class RivetsCluster15; model WaveH WaveW WaveDist=RivetsCluster15 /nouni; manova h=RivetsCluster15;
run;
quit;

proc glm data=All_sword_data; class RivetsCluster15; model StDev_CircleDist StDev_Radius StDev_CircleLines=RivetsCluster15 /nouni; manova h=RivetsCluster15;
proc glm data=All_sword_data; class RivetsCluster15; model StDev_DashL StDev_DashDist=RivetsCluster15 /nouni; manova h=RivetsCluster15;
proc glm data=All_sword_data; class RivetsCluster15; model StDev_CurvedDist=RivetsCluster15; 
proc glm data=All_sword_data; class RivetsCluster15; model StDev_Straight=RivetsCluster15;
proc glm data=All_sword_data; class RivetsCluster15; model StDev_WaveH StDev_WaveW StDev_WaveDist=RivetsCluster15 /nouni; manova h=RivetsCluster15;
run;
quit;

proc glm data=All_sword_data; class RivetsCluster15; model BPrin1--BPrin3=RivetsCluster15 /nouni; manova h=RivetsCluster15; 
proc glm data=All_sword_data; class RivetsCluster15; model CSPrin1--CSPrin5=RivetsCluster15 /nouni; manova h=RivetsCluster15; 
proc glm data=All_sword_data; class RivetsCluster15; model HPrin1--HPrin11=RivetsCluster15 /nouni; manova h=RivetsCluster15; 
proc glm data=All_sword_data; class RivetsCluster15; model Length=RivetsCluster15;   
proc glm data=All_sword_data; class RivetsCluster15; model TangHeight=RivetsCluster15;  
run;
quit;



/*MANOVAs for manufacture vs. Location.*/
proc glm data=All_sword_data; class BladesCluster7; model lat--Lon=BladesCluster7 /nouni; manova h=BladesCluster7; title 'BladesCluster7';
proc glm data=All_sword_data; class BladesCluster12; model lat--Lon=BladesCluster12 /nouni; manova h=BladesCluster12; title 'BladesCluster12';
proc glm data=All_sword_data; class CrossSectionCluster8; model lat--Lon=CrossSectionCluster8 /nouni; manova h=CrossSectionCluster8; title 'CrossSectionCluster8';
proc glm data=All_sword_data; class CrossSectionCluster12; model lat--Lon=CrossSectionCluster12 /nouni; manova h=CrossSectionCluster12; title 'CrossSectionCluster12';
proc glm data=All_sword_data; class HiltCluster6; model lat--Lon=HiltCluster6 /nouni; manova h=HiltCluster6; title 'HiltCluster6';
proc glm data=All_sword_data; class HiltCluster10; model lat--Lon=HiltCluster10 /nouni; manova h=HiltCluster10; title 'HiltCluster10';
proc glm data=All_sword_data; class RivetsCluster15; model lat--Lon=RivetsCluster15 /nouni; manova h=RivetsCluster15; title 'RivetsCluster15';
run;quit;


/*Chi Squared tests for style and find types. */

proc freq data=All_sword_data order=data; tables BladesCluster7*Find / expected cellchi2 norow nocol chisq; output out=ChiSqDataBladesCluster7Find n nmiss pchi lrchi; title 'BladesCluster7';
proc freq data=All_sword_data order=data; tables BladesCluster12*Find / expected cellchi2 norow nocol chisq; output out=ChiSqBladesCluster12Find n nmiss pchi lrchi; title 'BladesCluster12';
proc freq data=All_sword_data order=data; tables CrossSectionCluster8*Find / expected cellchi2 norow nocol chisq; output out=ChiSqDataCSCluster8Find n nmiss pchi lrchi; title 'CrossSectionCluster8';
proc freq data=All_sword_data order=data; tables CrossSectionCluster12*Find / expected cellchi2 norow nocol chisq; output out=ChiSqDataCSCluster12Find n nmiss pchi lrchi; title 'CrossSectionCluster12';
proc freq data=All_sword_data order=data; tables HiltCluster6*Find / expected cellchi2 norow nocol chisq; output out=ChiSqDataHiltCluster6Find n nmiss pchi lrchi; title 'HiltCluster6';
proc freq data=All_sword_data order=data; tables HiltCluster10*Find / expected cellchi2 norow nocol chisq; output out=ChiSqDataHiltCluster10Find n nmiss pchi lrchi; title 'HiltCluster10';
proc freq data=All_sword_data order=data; tables RivetsCluster15*Find / expected cellchi2 norow nocol chisq; output out=ChiSqDataRivetsCluster15Find n nmiss pchi lrchi; title 'RivetsCluster15';
run; quit;

proc freq data=All_sword_data order=data; tables BladesCluster7*Style / expected cellchi2 norow nocol chisq; output out=ChiSqDataBladesCluster7Find n nmiss pchi lrchi; title 'BladesCluster7';
proc freq data=All_sword_data order=data; tables BladesCluster12*Style / expected cellchi2 norow nocol chisq; output out=ChiSqBladesCluster12Find n nmiss pchi lrchi; title 'BladesCluster12';
proc freq data=All_sword_data order=data; tables CrossSectionCluster8*Style / expected cellchi2 norow nocol chisq; output out=ChiSqDataCSCluster8Find n nmiss pchi lrchi; title 'CrossSectionCluster8';
proc freq data=All_sword_data order=data; tables CrossSectionCluster12*Style / expected cellchi2 norow nocol chisq; output out=ChiSqDataCSCluster12Find n nmiss pchi lrchi; title 'CrossSectionCluster12';
proc freq data=All_sword_data order=data; tables HiltCluster6*Style / expected cellchi2 norow nocol chisq; output out=ChiSqDataHiltCluster6Find n nmiss pchi lrchi; title 'HiltCluster6';
proc freq data=All_sword_data order=data; tables HiltCluster10*Style / expected cellchi2 norow nocol chisq; output out=ChiSqDataHiltCluster10Find n nmiss pchi lrchi; title 'HiltCluster10';
proc freq data=All_sword_data order=data; tables RivetsCluster15*Style / expected cellchi2 norow nocol chisq; output out=ChiSqDataRivetsCluster15Find n nmiss pchi lrchi; title 'RivetsCluster15';
run; quit;


/*Collect distance data from ANOVAs in step 5 for MSP analysis.
First, data from the means above is collected.  If a variable shows significance between at least 2 groups, the means for that variable are recorded.
Negative distances are removed (since it is just the inverse of an existing positive difference in the matrix).
These data are collected and filtered manually, then imported below. 

A second MSP is also done where the distance is 0 if there is no significant value and 1 if there is a significant value between variable means.
Those counts are added between variables and only one msp is created per grouping.  As above, these counts are collected manually from the print outs above.*/

/*Step 4: Create minimum spanning tree based on significant differences as calculated in Step6. */


/*Min Span Trees based on Styleegory */ 
data Style_Meanscount; input from $ to $ weight @@; datalines;
Achtkantschwerter Driewulschwerter 7
Achtkantschwerter Moerigenschwerter 4
Achtkantschwerter Riegseeschwerter 5
Achtkantschwerter Schalenknaufschwerter 8
Driewulschwerter Moerigenschwerter 4
Driewulschwerter Riegseeschwerter 6
Driewulschwerter Schalenknaufschwerter 10
Moerigenschwerter Riegseeschwerter 3
Moerigenschwerter Schalenknaufschwerter 7
Riegseeschwerter Schalenknaufschwerter 3
;
proc optnet data_links = Style_Meanscount; minspantree out= Style_Meanscount_MinSpanTree; run;
proc print data= Style_Meanscount_MinSpanTree ;run;

data Style_Weighted; input from $ to $ weight @@; datalines;
Achtkantschwerter Driewulschwerter 2.139017
Achtkantschwerter Moerigenschwerter 1.088844
Achtkantschwerter Riegseeschwerter 1.434397
Achtkantschwerter Schalenknaufschwerter 1.65439
Driewulschwerter Moerigenschwerter 0.870197
Driewulschwerter Riegseeschwerter 1.913983
Driewulschwerter Schalenknaufschwerter 2.84987
Moerigenschwerter Riegseeschwerter 1.304093
Moerigenschwerter Schalenknaufschwerter 1.110226
Riegseeschwerter Schalenknaufschwerter 1.941732
;
proc optnet data_links = Style_Weighted; minspantree out= Style_Weighted_MinSpanTree; run;
proc print data= Style_Weighted_MinSpanTree ;run;




/*Min Span Trees based on 6 Location Clusters  - means count only  */ 
data Loc6_Means_Count; input from $ to $ weight @@; datalines;
Loc1 Loc2 2
Loc1 Loc3 2
Loc1 Loc4 2
Loc1 Loc5 2
Loc1 Loc6 0
Loc2 Loc3 2
Loc2 Loc4 4
Loc2 Loc5 2
Loc2 Loc6 0
Loc3 Loc4 1
Loc3 Loc5 1
Loc3 Loc6 2
Loc4 Loc5 1
Loc4 Loc6 2
Loc5 Loc6 1
;
proc optnet data_links = Loc6_Means_Count; minspantree out= Loc6_Means_Count_MinSpanTree; run;
proc print data= Loc6_Means_Count_MinSpanTree ; run;




/*Min Span Trees based on 6 Location Clusters  - means count only  */ 
data Loc6_Weighted; input from $ to $ weight @@; datalines;
Loc1 Loc2 0.29879
Loc1 Loc3 0.3346
Loc1 Loc4 0.85652
Loc1 Loc5 0.91938
Loc1 Loc6 1.24066
Loc2 Loc3 0.14179
Loc2 Loc4 0.6751
Loc2 Loc5 0.74165
Loc2 Loc6 1.12047
Loc3 Loc4 0.62479
Loc3 Loc5 0.62254
Loc3 Loc6 0.36613
Loc4 Loc5 0.99309
Loc4 Loc6 0.66047
Loc5 Loc6 0.73241
;
proc optnet data_links = Loc6_Weighted; minspantree out= Loc6_WeightedMinSpanTree; run;
proc print data= Loc6_WeightedMinSpanTree ; run;





/*Min Span Trees based on 9 Blade Clusters */ 
data B12_MeansCount; input from $ to $ weight @@; datalines;
Blade1 Blade2 2
Blade1 Blade3 0
Blade1 Blade4 0
Blade1 Blade5 1
Blade1 Blade6 0
Blade1 Blade7 0
Blade1 Blade8 0
Blade1 Blade9 2
Blade1 Blade10 0
Blade1 Blade11 0
Blade1 Blade12 0
Blade2 Blade3 0
Blade2 Blade4 2
Blade2 Blade5 3
Blade2 Blade6 2
Blade2 Blade7 2
Blade2 Blade8 0
Blade2 Blade9 0
Blade2 Blade10 0
Blade2 Blade11 0
Blade2 Blade12 0
Blade3 Blade4 0
Blade3 Blade5 1
Blade3 Blade6 0
Blade3 Blade7 0
Blade3 Blade8 0
Blade3 Blade9 0
Blade3 Blade10 0
Blade3 Blade11 0
Blade3 Blade12 0
Blade4 Blade5 1
Blade4 Blade6 0
Blade4 Blade7 0
Blade4 Blade8 0
Blade4 Blade9 0
Blade4 Blade10 0
Blade4 Blade11 0
Blade4 Blade12 0
Blade5 Blade6 1
Blade5 Blade7 1
Blade5 Blade8 1
Blade5 Blade9 1
Blade5 Blade10 1
Blade5 Blade11 1
Blade5 Blade12 0
Blade6 Blade7 0
Blade6 Blade8 0
Blade6 Blade9 0
Blade6 Blade10 0
Blade6 Blade11 0
Blade6 Blade12 0
Blade7 Blade8 0
Blade7 Blade9 2
Blade7 Blade10 0
Blade7 Blade11 0
Blade7 Blade12 0
Blade8 Blade9 0
Blade8 Blade10 0
Blade8 Blade11 0
Blade8 Blade12 0
Blade9 Blade10 0
Blade9 Blade11 0
Blade9 Blade12 0
Blade10 Blade11 0
Blade10 Blade12 0
Blade11 Blade12 0
;
proc optnet data_links = B12_MeansCount; minspantree out= B12_MeansCount_MinSpanTree; run;
proc print data= B12_MeansCount_MinSpanTree ; run;






/*Min Span Trees based on 9 Blade Clusters */ 
data B12_Weighted; input from $ to $ weight @@; datalines;
Blade1 Blade2 0.14372
Blade1 Blade3 0.119136
Blade1 Blade4 0.276725
Blade1 Blade5 0.737236
Blade1 Blade6 0.37774
Blade1 Blade7 0.218087
Blade1 Blade8 0.104628
Blade1 Blade9 0.235712
Blade1 Blade10 0.331636
Blade1 Blade11 0.18539
Blade1 Blade12 0.153907
Blade2 Blade3 0.163432
Blade2 Blade4 0.315799
Blade2 Blade5 0.756965
Blade2 Blade6 0.397439
Blade2 Blade7 0.243212
Blade2 Blade8 0.068512
Blade2 Blade9 0.278516
Blade2 Blade10 0.296114
Blade2 Blade11 0.23939
Blade2 Blade12 0.070903
Blade3 Blade4 0.164559
Blade3 Blade5 0.618987
Blade3 Blade6 0.277874
Blade3 Blade7 0.126718
Blade3 Blade8 0.113901
Blade3 Blade9 0.163136
Blade3 Blade10 0.219788
Blade3 Blade11 0.088952
Blade3 Blade12 0.184046
Blade4 Blade5 0.337679
Blade4 Blade6 0.43596
Blade4 Blade7 0.044016
Blade4 Blade8 0.129869
Blade4 Blade9 0.288276
Blade4 Blade10 0.121084
Blade4 Blade11 0.097041
Blade4 Blade12 0.314233
Blade5 Blade6 0.391566
Blade5 Blade7 0.538747
Blade5 Blade8 0.688473
Blade5 Blade9 0.659466
Blade5 Blade10 0.469932
Blade5 Blade11 0.557531
Blade5 Blade12 0.450699
Blade6 Blade7 0.230042
Blade6 Blade8 0.347907
Blade6 Blade9 0.293677
Blade6 Blade10 0.101325
Blade6 Blade11 0.192362
Blade6 Blade12 0.66812
Blade7 Blade8 0.174701
Blade7 Blade9 0.171703
Blade7 Blade10 0.160799
Blade7 Blade11 0.053836
Blade7 Blade12 0.215336
Blade8 Blade9 0.241837
Blade8 Blade10 0.255057
Blade8 Blade11 0.189879
Blade8 Blade12 0.065312
Blade9 Blade10 0.227934
Blade9 Blade11 0.119508
Blade9 Blade12 0.299695
Blade10 Blade11 0.146257
Blade10 Blade12 0.318714
Blade11 Blade12 0.249598
;
proc optnet data_links = B12_Weighted; minspantree out= B12_Weighted_MinSpanTree; run;
proc print data= B12_Weighted_MinSpanTree ; run;




/*Min Span Trees based on 14 Hilt Clusters */ 
data H10_Means_Count; input from $ to $ weight @@; datalines;
Hilt1 Hilt2 0
Hilt1 Hilt3 0
Hilt1 Hilt4 2
Hilt1 Hilt5 0
Hilt1 Hilt6 1
Hilt1 Hilt7 1
Hilt1 Hilt8 3
Hilt1 Hilt9 0
Hilt1 Hilt10 0
Hilt2 Hilt3 0
Hilt2 Hilt4 1
Hilt2 Hilt5 0
Hilt2 Hilt6 1
Hilt2 Hilt7 1
Hilt2 Hilt8 3
Hilt2 Hilt9 0
Hilt2 Hilt10 0
Hilt3 Hilt4 0
Hilt3 Hilt5 0
Hilt3 Hilt6 1
Hilt3 Hilt7 0
Hilt3 Hilt8 3
Hilt3 Hilt9 1
Hilt3 Hilt10 0
Hilt4 Hilt5 0
Hilt4 Hilt6 1
Hilt4 Hilt7 0
Hilt4 Hilt8 2
Hilt4 Hilt9 1
Hilt4 Hilt10 0
Hilt5 Hilt6 1
Hilt5 Hilt7 0
Hilt5 Hilt8 3
Hilt5 Hilt9 0
Hilt5 Hilt10 0
Hilt6 Hilt7 1
Hilt6 Hilt8 1
Hilt6 Hilt9 0
Hilt6 Hilt10 0
Hilt7 Hilt8 2
Hilt7 Hilt9 1
Hilt7 Hilt10 0
Hilt8 Hilt9 1
Hilt8 Hilt10 0
Hilt9 Hilt10 0
;
proc optnet data_links = H10_Means_Count; minspantree out= H10_Means_Count_MinSpanTree; run;
proc print data= H10_Means_Count_MinSpanTree ; run;

/*Min Span Trees based on 14 Hilt Clusters */ 
data H10_Weighted; input from $ to $ weight @@; datalines;
Hilt1 Hilt2 0.147011
Hilt1 Hilt3 0.329399
Hilt1 Hilt4 0.639201
Hilt1 Hilt5 0.130389
Hilt1 Hilt6 0.722854
Hilt1 Hilt7 0.216911
Hilt1 Hilt8 1.85732
Hilt1 Hilt9 0.702369
Hilt1 Hilt10 0.18636
Hilt2 Hilt3 0.2623
Hilt2 Hilt4 0.564842
Hilt2 Hilt5 0.25792
Hilt2 Hilt6 0.730736
Hilt2 Hilt7 0.185202
Hilt2 Hilt8 1.821
Hilt2 Hilt9 0.69301
Hilt2 Hilt10 0.127901
Hilt3 Hilt4 0.371298
Hilt3 Hilt5 0.36439
Hilt3 Hilt6 0.957274
Hilt3 Hilt7 0.232152
Hilt3 Hilt8 1.91686
Hilt3 Hilt9 0.58197
Hilt3 Hilt10 0.038539
Hilt4 Hilt5 0.657188
Hilt4 Hilt6 1.096966
Hilt4 Hilt7 0.45531
Hilt4 Hilt8 1.83599
Hilt4 Hilt9 0.645008
Hilt4 Hilt10 0.160331
Hilt5 Hilt6 0.852874
Hilt5 Hilt7 0.251892
Hilt5 Hilt8 1.89726
Hilt5 Hilt9 0.7561
Hilt5 Hilt10 0.170779
Hilt6 Hilt7 0.827236
Hilt6 Hilt8 1.22947
Hilt6 Hilt9 0.780594
Hilt6 Hilt10 0.214435
Hilt7 Hilt8 1.87706
Hilt7 Hilt9 0.754342
Hilt7 Hilt10 0.145181
Hilt8 Hilt9 1.53563
Hilt8 Hilt10 0
Hilt9 Hilt10 0.190259
;
proc optnet data_links = H10_Weighted; minspantree out= H10_Weighted_MinSpanTree; run;
proc print data= H10_Weighted_MinSpanTree ; run;






/*Step 5: A network graph is created in Gephi using the SAS data three ways:
Graph 1: In each method of grouping, add up the weights between points where they show up in the above MSPs to weight the links between groups.  
In this graph, variables that have a larger difference between groups affect the linkage weight more.
Graph 2: For each link on a MSP from above, add 1 to the weight of the link between groups.
In this graph, all variables have equal wieght to the network linkage
Graph 3: Weights for links are determined by the MSP done with a 0/1 signficance count.
The weights on this graph are determined by the presence or absence of significant differences between groups rather than the distance between the means.
*/


/*Other Analysis:  the code below was used for cluster analysis of decoration and regression of location to hilt shape. */


/* circles */
proc aceclus data=All_sword_data out=Circle_Ace p=.03 noprint; var CircleDist Radius CircleLines; run;
	ods graphics on;
	proc cluster data=Circle_Ace method=ward ccc pseudo print=15 outtree=Circle_Tree ; var can1 can2 ; id number; format number; run;
	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 3, and 5.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Circle_Tree out=Circle__3 nclusters=3 haxis=axis1 horizontal; height _rsq_; copy can1 can2 ; id number;	run;
	proc tree data=Circle_Tree out=Circle__5 nclusters=5 haxis=axis1 horizontal noprint; height _rsq_; copy can1 can2 ; id number;  run;
    proc sgplot data=Circle__3 ; scatter y=can2 x=can1 / group=cluster ; run;
    proc sgplot data=Circle__5 ; scatter y=can2 x=can1 / group=cluster ;	run;
	data Circle__3; set Circle__3(rename=(CLUSTER=CircleClus) drop=Can1 Can2 CLUSNAME); run;
/*Use 3 - 6 clusters makes groups that are too small */


/*dashes */
	proc aceclus data=All_sword_data out=Dash_Ace p=.03 noprint; var DashL DashDist; run;
	ods graphics on;
	proc cluster data=Dash_Ace method=ward ccc pseudo print=15 outtree=Dash_Tree ; var can1 can2 ; id number; format number; run;
	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 5.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Dash_Tree out=Dash__5 nclusters=5 haxis=axis1 horizontal; height _rsq_; copy can1 can2 ; id number;	run;
    proc sgplot data=Dash__5 ; scatter y=can2 x=can1 / group=cluster ; run;
	data Dash__5; set Dash__5(rename=(CLUSTER=DashClus) drop=Can1 Can2 CLUSNAME); run;

/*Use 5 clusters makes groups.  Only 1 outlier.*/


	/*parallel curves - only 1 variable, no aceclus transform */
	ods graphics on;
	proc cluster data=All_sword_data method=centroid ccc pseudo print=15 outtree=Curve_Tree ; var CurvedDist ; id number; format number; run;
	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 4.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Curve_Tree out=Curve__4 nclusters=4 haxis=axis1 horizontal; height _rsq_; id number;	run;
	data Curve__4; set Curve__4(rename=(CLUSTER=CurveClus) drop=CLUSNAME); run;
/*Use 4 clusters makes groups.  Only 1 outlier.*/

	/*parallel straights */
	ods graphics on;
	proc cluster data=All_sword_data method=ward ccc pseudo print=15 outtree=Straight_Tree ; var StraightDist ; id number; format number; run;

	ods graphics off;

/* Graphs of CCC, Pseudo F, and Pseudo T-squared suggest possible groups at 8.  Each is given it's own work table and graphed to check for outliers. */
	goptions vsize=9in hsize=6.4in htext=.9pct htitle=3pct;	axis1 order=(0 to 1 by 0.2);
	proc tree data=Straight_Tree out=Straight__8 clusters=8 haxis=axis1 horizontal; height _rsq_; id number;	run;
	data Straight__8; set Straight__8(rename=(CLUSTER=StraigthClus) drop=CLUSNAME); run;

/*Use 4 clusters makes groups.  Only 1 outlier.*/

/* add cluster data to all data*/
	proc sort data=All_sword_data out=All_sword_data;	by Number;run;
	proc sort data=Circle__3  out=Circle__3 ;	by Number;run;
	proc sort data=Dash__5 out=Dash__5;	by Number;run;
	proc sort data=Curve__4 out=Curve__4;	by Number;run;
	proc sort data=Straight__8 out=Straight__8;	by Number;run;
	data All_sword_data; merge All_sword_data Circle__3 Dash__5 Curve__4 Straight__8; by Number;	run;

	/*discrim test */
proc discrim data=Hilts_pca can crosslist out=canout; 
class lat; 
var prin1-prin5; 
run;



data All_Blades; set All_sword_data(drop=Find--LocCluster10 drop=CrossSectionCluster8--CrossSectionCluster12 drop=RivetsCluster15);run;
data All_Blades; set All_Blades;if cmiss(of BladesCluster12) then delete;run;
data All_Blades; set All_Blades;if cmiss(of HiltCluster10) then delete;run;


proc freq data=All_Blades order=data; tables BladesCluster7*HiltCluster6 / expected cellchi2 norow nocol chisq; output out=ChiSqDataBlades n nmiss pchi lrchi; title 'BladesHilts'; run;
proc freq data=All_sword_data order=data; tables BladesCluster7*CrossSectionCluster12 / expected cellchi2 norow nocol chisq; output out=ChiSqDataBlades n nmiss pchi lrchi; title 'BladesHilts'; run;



proc reg data=All_sword_data outest=est1 outsscp=sscp1 rsquare; eq1: model  Lat=HPrin1--HPrin11;
proc print data=sscp1; title2 'SSCP type data set';
proc print data=est1;  title2 'EST type data set';
proc reg data=All_sword_data outest=est1 outsscp=sscp1 rsquare; eq1: model  Lon=HPrin1--HPrin11;
proc print data=sscp1; title2 'SSCP type data set';
proc print data=est1;  title2 'EST type data set';
run;

   data Est_Location; set All_sword_data(drop=style drop=Find--CSPrin5  Drop=RPrin1--RivetsCluster15);run;

Data Est_Location; set Est_Location;
   Lat2=48.31197-3.13234*HPRIN1+8.63701*HPRIN2-10.03307*HPRIN3+2.88708*HPRIN4-7.02584*HPRIN5+4.62069*HPRIN6+14.51688*HPRIN7-9.39206*HPRIN8+24.01061*HPRIN9-12.70149*HPRIN10+7.26966*HPRIN11;
   Lon2=13.78801+9.35802*HPRIN1+-21.78429*HPRIN2+18.06027*HPRIN3+26.10363*HPRIN4-27.50622*HPRIN5+3.90810*HPRIN6+14.97578*HPRIN7-53.59689*HPRIN8-33.28697*HPRIN9+23.31321*HPRIN10-4.56811*HPRIN11;
run;


proc candisc data=All_sword_data mahalanobis out=canout; 
class HiltCluster9; 
var BPrin1-Bprin3; 
run;






