We solved 4 problems in total and I solved 2 hard problems alone! Both problems have difficulty rating higher than 7.5!
Most dramatically, my solution to one problems was accepted in the very last minute! This made our team beat big insitutions like UK or UL, even some teams from UIUC or Uchicago!
Update: We received a congradulation letter from President Rush!
Letter from president
###Update: Our School have reported our achievement on its main website.
I am learning parallel computing this semester, and my project is to find the optimal solution of travelling
salesman problem in parallel. Since it is a NP problem, improving the efficiency in algorithm will be extremely hard. In order to increase the performance, we make the computations in parallel. I will demonstrate two ways to implement it in parallel, one with openmp and one with MPI.
Due to the embarassingly parallel nature of this problem, linear speed up can be achieved through both methods.
Input: table of 13 cities and traveling costs, seperated by space
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <algorithm>
#include <omp.h>
#define NUM_THREADS 8
int**Cities;// the table of cost
char*CityNames;// a list of city names
int*CityNumbers;int*bestPath;// a array indicating best path
intlowPrice=0;// the lowest price possible
intnumCities;int*remains;int**eachPerm;// permutation for each thread
int*Convert(intnumToConvert)// the digits of the array and the nth permutation you want
{remains=(int*)malloc(numCities*sizeof(int));int*CNList=(int*)malloc(numCities*sizeof(int));for(inti=0;i<numCities;i++){inta=CityNumbers[i];CNList[i]=a;}for(inti=0;i<numCities;i++){remains[i]=0;}intnumToDiv=1;intrem;intindex=0;while(numToConvert>0)//get the array of grab sequence
{rem=numToConvert%numToDiv;remains[index]=rem;index++;numToConvert=int(numToConvert/numToDiv);numToDiv++;}intlen=numCities-1;int*result=(int*)malloc(numCities*sizeof(int));intk=0;//grab
for(inti=len;i>=0;i--){// a = position to get from CNList
inta=remains[i];intget=CNList[a];//printf("get: %d", get);
for(intj=a;j<len;j++){//leave no holes
CNList[j]=CNList[j+1];}result[k]=get;k++;}returnresult;}intfind(intid,inttoWork){inttotal=0;intchecked=0;for(intq=0;q<toWork;q++){for(inti=0;i<numCities;i++){intfrom=eachPerm[id][i];intdestination=eachPerm[id][(i+1)%numCities];//return to the first city when get to the last one
inttrip=Cities[from][destination];total+=trip;//calculate the total price
}checked++;// printf("CPU %d is investigating the No.%d posible route: ", id, checked);
// for (int i =0; i<numCities; i++)
// {
// printf("%d", eachPerm[id][i]);
// }
// printf(", this route cost: %d, current low cost: %d. \n", total, lowPrice);
if(total<lowPrice){//update the lowprice and the sequence if finds a better solution
#pragma omp critical
{lowPrice=total;for(inti=0;i<numCities;i++){inta=eachPerm[id][i];bestPath[i]=a;}}#pragma omp end critical
}total=0;std::next_permutation(eachPerm[id],eachPerm[id]+numCities);}//while(std::next_permutation(CityNumbers+1, CityNumbers+numCities));// the first city is fixed
}intmain(){FILE*file=fopen("cityTable.txt","r");charbuff[25555];fscanf(file,"%s",buff);//get number of cities
numCities=atoi(buff);printf("total %d cities to go.\n",numCities);Cities=(int**)malloc(numCities*sizeof(int*));CityNames=(char*)malloc(numCities*sizeof(char));CityNumbers=(int*)malloc(numCities*sizeof(int));bestPath=(int*)malloc(numCities*sizeof(int));for(inti=0;i<numCities;i++){bestPath[i]=i;//give bestPath an initial value
}for(inti=0;i<numCities;i++){//get the array of city names
fscanf(file,"%s",buff);CityNames[i]=buff[0];printf("%c ",CityNames[i]);}printf("\n");for(inti=0;i<numCities;i++){CityNumbers[i]=i;}for(inti=0;i<numCities;i++){fscanf(file,"%s",buff);//waste a move
Cities[i]=(int*)malloc(numCities*sizeof(int));for(intk=0;k<numCities;k++){fscanf(file,"%s",buff);Cities[i][k]=atoi(buff);//get the price tables for the cities
}}for(inti=0;i<numCities;i++){for(intk=0;k<numCities;k++){printf("%d ",Cities[i][k]);}printf("\n");}for(inti=0;i<numCities;i++){intdestination=(i+1)%numCities;lowPrice+=Cities[i][destination];//give lowprice an initial value;
}// calculate number of permutations each thread need to go through
intnumTotalPerm,numEachPerm;inthold=1;for(inti=(numCities-1);i>0;i--){hold=hold*i;}numTotalPerm=hold;numEachPerm=numTotalPerm/NUM_THREADS;printf("TotalPerm: %d, NumEach: %d \n",numTotalPerm,numEachPerm);// start to split work for each thread
intstartPos=0;eachPerm=(int**)malloc(NUM_THREADS*sizeof(int*));for(inti=0;i<NUM_THREADS;i++){int*ptr;eachPerm[i]=(int*)malloc(numCities*sizeof(int));ptr=Convert(startPos);printf("This Thread start: ");for(inte=0;e<numCities;e++){printf("%d",ptr[e]);}printf("\n");eachPerm[i]=ptr;startPos+=numEachPerm;}//printf("Great 3\n");
//int checked=0;
intid;omp_set_num_threads(NUM_THREADS);#pragma omp parallel private(id)
{//printf("Great 4\n");
id=omp_get_thread_num();printf("I'm thread: %d\n",id);find(id,numEachPerm);}//after analysing all possibilities
printf("The lowest cost possible is: %d, if using the best route: ",lowPrice);for(inti=0;i<numCities;i++){printf("-%d",bestPath[i]);}printf("\n");//clean up and exit
for(inti=0;i<numCities;i++){free(Cities[i]);}free(CityNames);free(Cities);free(bestPath);free(CityNumbers);fclose(file);}
MPI Version
the MPI version is slitly different since the threads are all seperate
I am only posting the different part in main(), in which one thread has to gather the results of
other threads and find the best route among them.
The code of find() and Convert() should have same functionalities.
//---------------------------------------------------------------------------------
//Parallel part
intnumtasks,rank,len,rc;charhostName[MPI_MAX_PROCESSOR_NAME];intinit;MPI_Initialized(&init);if(init==0);//if not been intialized
{rc=MPI_Init(&argc,&argv);if(rc!=MPI_SUCCESS){printf("Error.\n");MPI_Abort(MPI_COMM_WORLD,rc);}}MPI_Comm_size(MPI_COMM_WORLD,&numtasks);MPI_Comm_rank(MPI_COMM_WORLD,&rank);MPI_Get_processor_name(hostName,&len);//printf("Rank: %d Running on %s \n", rank, hostName);
// calculate work for each thread
intstartPos=0;startPos+=numEachPerm*rank;Convert(startPos,CityNumbers);printf("Thread %d start: ",rank);for(inte=0;e<numCities;e++){printf("%d",eachPerm[e]);}printf("\n");//start to find best route within each thread
intthisLow=0;thisLow=find(numEachPerm);//Gather all results and find the best one among them
MPI_Barrier(MPI_COMM_WORLD);intworldLow;MPI_Reduce(&thisLow,&worldLow,1,MPI_INT,MPI_MIN,1,MPI_COMM_WORLD);MPI_Barrier(MPI_COMM_WORLD);intgatherLow[numCities];MPI_Gather(&thisLow,1,MPI_INT,gatherLow,1,MPI_INT,1,MPI_COMM_WORLD);if(rank==1){printf("Gathered Low Costs: ");for(inti=0;i<numtasks;i++){printf("%d, ",gatherLow[i]);}printf("\n");}MPI_Barrier(MPI_COMM_WORLD);intgatherPath[numtasks][numCities];MPI_Gather(bestPath,numCities,MPI_INT,gatherPath,numCities,MPI_INT,1,MPI_COMM_WORLD);if(rank==1){printf("Gathered best Paths: ");for(inti=0;i<numtasks;i++){for(intk=0;k<numCities;k++){printf("%d, ",gatherPath[i][k]);}printf("\n");}}//after analysing all possibilities
MPI_Barrier(MPI_COMM_WORLD);if(rank==1){inttotal=0;intchecked=0;intLP=99999999;for(intq=0;q<numtasks;q++){for(inti=0;i<numCities;i++){intfrom=gatherPath[q][i];intdestination=gatherPath[q][(i+1)%numCities];//return to the first city when get to the last one
intcost=Cities[from][destination];//printf("from: %d, destination: %d, cost: %d \n", from, destination,cost);
total+=cost;//calculate the total price
}checked++;if(total<LP){//update the lowprice and the sequence if finds a better solution
{LP=total;for(inti=0;i<numCities;i++){inta=gatherPath[q][i];finalPath[i]=a;}}}total=0;}printf("The lowest cost possible is: %d, if using the best route: ",worldLow);for(inti=0;i<numCities;i++){printf("-%d",finalPath[i]);}printf("\n");}MPI_Finalize();
I am practicing for the ACM contest next month. Some of the problems are real tricky. My best record so far
is a problem of 8.2 difficulty rating. But even for the comparatively simple problem, there are some elegent
solutions instead of brute force.
My sample java solution for the lexicography problem
The key of this problem is an algorithm to find nth permutation
importjava.util.*;publicclassLexico{staticArrayList<Character>info=newArrayList<Character>();publicstaticvoidmain(String[]args){Scannersc=newScanner(System.in);Stringword;//char[] info = new char[16];intpos;word=sc.next();pos=sc.nextInt();while(!(word.equals("#")&&(pos==0))){char[]hold=word.toCharArray();Arrays.sort(hold);for(inti=0;i<word.length();i++){info.add(hold[i]);//System.out.println("hold: "+hold[i]);}intdiv=1;inttarget=pos-1;intrem[]=newint[16];for(inti=0;i<16;i++){rem[i]=0;//fill rem[] with 0}intindex=0;while(target>0)//get grab seq{inta=target%div;System.out.print(a);rem[index]=a;target=(int)(target/div);div++;index++;}//grab and printcharperm[]=newchar[index];for(inti=hold.length-1;i>=0;i--){charget=hold[rem[i]];for(intk=rem[i];k<hold.length-1;k++){hold[k]=hold[k+1];}//perm[i] = get;System.out.print(get);}System.out.print("\n");//finishing one answer, reset and move to nextinfo.clear();word=sc.next();pos=sc.nextInt();}sc.close();}}
My sample java solution for the amalgamated problem
The input will be contants for a function. and the last input is number of auto-incremented x values.
The program is deisgned to find the largest deline of values among all points efficiently.
I have learned a lot about how the computer make decisions and discretions with bit-level operations.
Also, although brain-burning, using Bitwise operations in computation intensive-programming can substancially boost performance and efficiency.
So I think these stuff could be useful for my programming contests.
Here I have collected the labs problems and other puzzles I solved.
Bit Cowboy Lab – Integars
intbitAnd(intx,inty){/* get ~(x&y) using ~x|~y */intc=~x|~y;return~c;}/*
* getByte - Extract byte n from word x
* Bytes numbered from 0 (LSB) to 3 (MSB)
* Examples: getByte(0x12345678,1) = 0x56
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 6
* Rating: 2
*/intgetByte(intx,intn){/* shift the word right and leave the disired two digits at the right end, the get using mask */intmask=0xff;n=n<<3;x=x>>(n);returnx&mask;}/*
* logicalShift - shift x to the right by n, using a logical shift
* Can assume that 0 <= n <= 31
* Examples: logicalShift(0x87654321,4) = 0x08765432
* Legal ops: ~ & ^ | + << >>
* Max ops: 20
* Rating: 3
*/intlogicalShift(intx,intn){/* shift the word arithmatically, then use mask to clear the left-end bytes */intmask=(1<<31)>>n;//shift only 31 to prevent overflow
mask=mask<<1;mask=~mask;x=x>>n;returnx&mask;}/*
* bitCount - returns count of number of 1's in word
* Examples: bitCount(5) = 2, bitCount(7) = 3
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 40
* Rating: 4
*/intbitCount(intx){/* use mask to check bits , since have operator limits, I count every 4 bits,
then shift them rightward step by step, then add them together in the end*/intmask1,mask2,mask3,sum;mask1=0x11|(0x11<<8);//cover 16 bits, 1 for every 4 bits
mask2=mask1|(mask1<<16);//cover 32 bits, just twice as long as mask1
mask3=0xF|(0xF<<8);// mask first 8 bits
sum=x&mask2;// count 1's
sum=sum+((x>>1)&mask2);sum=sum+((x>>2)&mask2);sum=sum+((x>>3)&mask2);//sum calculates the 1's is first 4 bits and other unwanted value
sum=sum+(sum>>16);// get rid of unwanted value
sum=(sum&mask3)+((sum>>4)&mask3);// limit maximum to 6 bits, because largest decimal value is 32.
return((sum+(sum>>8))&0x3F);}/*
* bang - Compute !x without using !
* Examples: bang(3) = 0, bang(0) = 1
* Legal ops: ~ & ^ | + << >>
* Max ops: 12
* Rating: 4
*/intbang(intx){/* x or negative x will have 1 as thier first digit, but 0 will not*/intmask=0x1;intnegative=~x+1;inta=((x>>31)&mask);intb=((negative>>31)&mask);// one of a or b will be 1 if x is not 0
return(a|b)^mask;}/*
* tmin - return minimum two's complement integer
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 4
* Rating: 1
*/inttmin(void){/*juest need to get 0x100000.....*/return1<<31;}/*
* fitsBits - return 1 if x can be represented as an
* n-bit, two's complement integer.
* 1 <= n <= 32
* Examples: fitsBits(5,3) = 0, fitsBits(-4,3) = 1
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 15
* Rating: 2
*/intfitsBits(intx,intn){/*if the bits from the left end till n are all same, all 0 or all 1, then the leftside
digits can be abadoned and express the same value with only n bits, so the key is to
determine wether the first 32 - n bits are all same*/inthigh=32+(~n+1);intshift=(x<<high)>>high;// this changes all hige order bits to 1 or 0;
return!(shift^x);// check whether x is still same
}/*
* divpwr2 - Compute x/(2^n), for 0 <= n <= 30
* Round toward zero
* Examples: divpwr2(15,1) = 7, divpwr2(-33,4) = -2
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 15
* Rating: 2
*/intdivpwr2(intx,intn){/*needs to round toward 0, so need to know whether x is positive*/intmask=(1<<n)+~0;intbias=(x>>31)&mask;return(x+bias)>>n;}/*
* negate - return -x
* Example: negate(1) = -1.
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 5
* Rating: 2
*/intnegate(intx){/* ~x equals -(2max-abs(x)), thus needs to plus 1*/return~x+1;}/*
* isPositive - return 1 if x > 0, return 0 otherwise
* Example: isPositive(-1) = 0.
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 8
* Rating: 3
*/intisPositive(intx){/* if is negative, the first digit is 1, other wise is 0*///then just get the first digit;
intmask=0x1<<31;intfirstdigit=x&(mask);// first digit is 0 if x is negative , 1 if x is positive or 0
return!(firstdigit>>31|!x);//using |!x to count for 0
}/*
* isLessOrEqual - if x <= y then return 1, else return 0
* Example: isLessOrEqual(4,5) = 1.
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 24
* Rating: 3
*/intisLessOrEqual(intx,inty){/* if x and y has same sign, check the sign of thier differnece, if x and y has different signs
return 0 if x is positive and if x is negative;*/intsign,isLess,dif,equal,isLessorEqual;sign=x^y;isLess=~sign;dif=y+(~x+1);equal=!dif;sign=sign&x;dif=~dif;isLess=isLess&dif;isLess=isLess|sign;isLess=isLess&(1<<31);isLessorEqual=(!!isLess)|(equal);returnisLessorEqual;}/*
* ilog2 - return floor(log base 2 of x), where x > 0
* Example: ilog2(16) = 4
* Legal ops: ! ~ & ^ | + << >>
* Max ops: 90
* Rating: 4
*/intilog2(intx){/* the value of log2 in binary is actually represented by how many digits are used, the
answer shoudld be the number of digits used minus one. I use a method similar to binary search
to find the most significant 1*/intout=0;inta=(!!(x>>16))<<31>>31;// if most sig is in right, a is false, if in left 16 digits a is true;
out+=a&16;x=x>>(a&16);//shift 16 if is on left side,
a=(!!(x>>8))<<31>>31;out+=a&8;x=x>>(a&8);a=(!!(x>>4))<<31>>31;out+=a&4;x=x>>(a&4);a=(!!(x>>2))<<31>>31;out+=a&2;x=x>>(a&2);a=(!!(x>>1))<<31>>31;out+=a&1;returnout;}
Bit Cowboy Lab – Floating numbers
/*
* float_neg - Return bit-level equivalent of expression -f for
* floating point argument f.
* Both the argument and result are passed as unsigned int's, but
* they are to be interpreted as the bit-level representations of
* single-precision floating point values.
* When argument is NaN, return argument.
* Legal ops: Any integer/unsigned operations incl. ||, &&. also if, while
* Max ops: 10
* Rating: 2
*/unsignedfloat_neg(unsigneduf){/*need to negate the first digit since it represents sign, and keep every thing else;
use mask to detect special case NaN, and return original
*/unsignedexp=uf>>23&0xFF;unsignedfra=uf<<9;unsigneda=0x1<<31;if((exp==0xFF)&&(!!fra)){returnuf;}returnuf^a;}/*
* float_i2f - Return bit-level equivalent of expression (float) x
* Result is returned as unsigned int, but
* it is to be interpreted as the bit-level representation of a
* single-precision floating point values.
* Legal ops: Any integer/unsigned operations incl. ||, &&. also if, while
* Max ops: 30
* Rating: 4
*/unsignedfloat_i2f(intx){return2;}/*
* float_twice - Return bit-level equivalent of expression 2*f for
* floating point argument f.
* Both the argument and result are passed as unsigned int's, but
* they are to be interpreted as the bit-level representation of
* single-precision floating point values.
* When argument is NaN, return argument
* Legal ops: Any integer/unsigned operations incl. ||, &&. also if, while
* Max ops: 30
* Rating: 4
*/unsignedfloat_twice(unsigneduf){/* The challenege here is to handle differnet situations , we need to return
the number for Nan and Infi, add exp by one for normal form and add 1 to the number for
denormalized form. I seperate the three parts of the float, make the necessary changes, and
combine them to return*/unsignedsign=uf>>31<<31;unsignedexp=(((uf<<1)>>24)<<23);unsignedfra=(uf<<9)>>9;if(exp==0x7F800000){returnuf;//inifinity or NaN
}elseif(exp==0){return((uf<<1)|sign);// if in denormal form, shift left by 1 and add sign
}else{exp=(exp+(1<<23));//exp + 1 for normal
}return(sign|exp|fra);
I spend a lot of time research, build, and custom this site, which is based on the hypster theme.
Differnent from using blogs provided by other companies, I have absolute freedom here.
I will update this as a programming blog, and write :
Things I learned in class
Projects I have done. Alone or with others.
Other cool things I find.
One picture shot in SF this winter.
I don’t know how often I could update this because I am going to be very busy this year, but let’s see!