options formdlim='=' nocenter nodate nonumber ls=256; Title 'CPO';title; proc import DATAFILE = "F:\scottdavis\scottdavis.csv" OUT=scott DBMS=csv replace; delimiter = ';'; guessingrows = 500; getnames = yes; run; proc contents data=scott;run; /********************************************************************************/ /* */ /* DATA SET CREATION */ /* 1) FULL TABLE OF 6 VARIABLES */ /* 2) AGGREGATED SET */ /* 3.a) LOW CELL COUNT */ /* 3.b) CELL COUNTS > 5 */ /* 4) CUSTOMER/BRAND/CHANNEL */ /* 5.a) NEW SET */ /* 5.b) RECONDITIONED SET */ /* 6.a) CORDLESS SET */ /* 6.b) NOT CORDLESS SETS */ /* */ /********************************************************************************/ /* A) FULL TABLE OF 6 VARIABLES */ data fulltable_1 (compress=yes); set scott(keep= cposku branddesc saleschanneldesc salesconditiondesc cordless incrange genderdesc); where cposku ~= 'DOESNOTEXIST' and branddesc in("Porter Cable", "Bosch", "Makita", "Milwaukee", "Campbell Hausfeld", "Bostitch", "Delta", "Festool", "Metabo", "Skil", "Karcher", "Powermatic", "Dewalt", "Ryobi", "Homelite", "Jet", "Ridgid", "Senco", "Black & Decker", "Fein", "Panasonic") and saleschanneldesc ~= 'Does Not Exist' and salesconditiondesc ~= 'Unknown' and incrange ~= 'UNK'; /* Removes instant water heaters and truck boxes. */ pws = substr(cposku,1,3); if pws = 'PWS' and branddesc = 'Bosch' then delete; dtb = substr(cposku,1,3); if dtb = 'DTB' and branddesc = 'Delta' then delete; if genderdesc in('Probably Male' 'Male') then genderdesc = 'Male'; if genderdesc in('Probably Female' 'Female') then genderdesc = 'Female'; if genderdesc in('Ambiguous' 'Unknown') then genderdesc = 'Not Known'; if saleschanneldesc in('Club CPO: Phone','Club CPO: Web','Other','Outlets: Phone','Outlets: Web', 'Press.Wash: Pho','Press.Wash: Web','Rec Tools: Web','Recon Tools:Phn', 'Tool Crib: Phon','Tool Crib: Web','Wholesale') then saleschanneldesc = 'agg_channel'; if incrange in('-10','10-15','15-20','20-25','25-30') then incrange = 'Low'; if incrange in('30-35','35-40','40-45','45-50','50-60','60-75') then incrange = 'Middle'; if incrange in('75-100','100-125','125-150','150-200','200-') then incrange = 'High'; run; /* 1.a) AGGREGATED SET */ libname tble1agg "F:\scottdavis\Aggregated Data Sets, txt and csv"; proc sql; create table tble1agg.fulltable_1agg (compress=yes) as ( select distinct branddesc, saleschanneldesc, salesconditiondesc, incrange, genderdesc, cordless, count(*)as freq from fulltable_1 group by branddesc, saleschanneldesc, salesconditiondesc, incrange, genderdesc, cordless ); quit; proc print data=tble1agg.fulltable_1agg; run; libname tble1agg "F:\scottdavis\Aggregated Data Sets, txt and csv";