Hi, I am trying to do something in SQL that I have done in PHP, but I am trying to have the operation performed as a stored procedure because of the sheer number of inserts I have to perform in PHP. I could really use some help!
I'm not how to assign variables based on subqueries, or how to even structure the nested queries...
I have a table with the following fields
scores
userId INT
testId INT
score INT
and I would like to correlate all of the users in another table 'correlations',
correlations
user1 (FK scores)
user2 (FK scores)
total_delta (the total of the difference in test scores between user1 and user2)
num_common_test s (number of tests both users have taken in common)
avg_delta (total_delta/num_common_test s)
in PHP i'm doing the following:
[PHP]
/**
* Loads all test scores into an array from the database, ordered by userId
* Array has the form [userId,[index,testId,sc ore]]
* ...this takes very little time
*/
function get_user_scores (){
global $user_scores,$u sers,$num_users ,$con,$table,$q uery_limit;
$sql = "select userId, testId, score from ".$table." order by userId limit ".$query_li mit;
$rs = mysql_query($sq l) or die(mysql_error ()."<br>".$sql) ; //pointer to resultset from database
while ($row = mysql_fetch_ass oc($rs)) {
$testId= intval($row['testId']);
$score= intval($row['score']);
$userId = intval($row['userId']);
if(array_key_ex ists($userId,$u ser_scores)){
array_push($use r_scores[$userId],array($testId, $score));
} else {
$user_ratings[$userId] = array();
array_push($use r_scores[$userId],array($testId, $score));
}
}
$users = array_keys($use r_scores);
$num_users = count($users);
}
/**
* Builds the correlation array. For each user, compares other users who have at least one shared test.
* This builds an N choose 2 array with the form [index,user1,use r2,cummulative delta,num common tests,avg delta]
* ...very slow when users > 100
*/
function build_correlati ons(){
global $num_users,$use rs,$correlation _array,$correla tion_array_inde x;
for ($i=0; $i < $num_users; $i++){
$user1 = $users[$i];
for ($k = $i+1; $k < $num_users; $k++){
$user2 = $users[$k];
$user1_correlat ion = correlate_users ($user1,$user2) ;
if($user1_corre lation!=null){
$correlation_ar ray[$correlation_ar ray_index] = $user1_correlat ion;
$correlation_ar ray_index++;
}
}
}
}
/**
* Calculates the correlation between 2 users
*/
function correlate_users ($user1,$user2) {
global $correlation_ar ray,$user_score s;
$user1_scores= $user_scores[$user1];
$user2_scores= $user_scores[$user2];
$total_delta = 0;
$num_common_tes ts = 0;
foreach ($user1_scores as &$value1){
$test1 = $value1[0];
$user1_score = $value1[1];
foreach ($user2_scores as &$value2){
$test2 = $value2[0];
if ($test1 == $test2){ //shared test, we can calculate a correlation
$user2_score = $value2[1];
$delta = abs($user2_scor e - $user1_score);
$total_delta += $delta;
$num_common_tes ts++;
}
}
unset($value2);
}
unset($value1);
if($num_common_ tests > 0){ //add to correlations array
$avg_delta = $total_delta/$num_common_tes ts;
return array($user1,$u ser2,$total_del ta,$num_common_ tests,$avg_delt a);
}
return null;
}
/**
* Saves contents of correlation array to the database
*/
function save_correlatio ns(){
global $con,$correlati on_array;
for($i=0; $i < count($correlat ion_array); $i++){
$sql = "insert into correlations (user1, user2, total_delta, num_common_test s, avg_delta) values (". $correlation_ar ray[$i][0] .",". $correlation_ar ray[$i][1] .",". $correlation_ar ray[$i][2] .",". $correlation_ar ray[$i][3] .",". $correlation_ar ray[$i][4] .")";
$rs = mysql_query($sq l) or die(mysql_error ()."<br>".$sql) ;
}
print("Correlat ions saved<br>\n");
}
[/PHP]
I'm not how to assign variables based on subqueries, or how to even structure the nested queries...
I have a table with the following fields
scores
userId INT
testId INT
score INT
and I would like to correlate all of the users in another table 'correlations',
correlations
user1 (FK scores)
user2 (FK scores)
total_delta (the total of the difference in test scores between user1 and user2)
num_common_test s (number of tests both users have taken in common)
avg_delta (total_delta/num_common_test s)
in PHP i'm doing the following:
[PHP]
/**
* Loads all test scores into an array from the database, ordered by userId
* Array has the form [userId,[index,testId,sc ore]]
* ...this takes very little time
*/
function get_user_scores (){
global $user_scores,$u sers,$num_users ,$con,$table,$q uery_limit;
$sql = "select userId, testId, score from ".$table." order by userId limit ".$query_li mit;
$rs = mysql_query($sq l) or die(mysql_error ()."<br>".$sql) ; //pointer to resultset from database
while ($row = mysql_fetch_ass oc($rs)) {
$testId= intval($row['testId']);
$score= intval($row['score']);
$userId = intval($row['userId']);
if(array_key_ex ists($userId,$u ser_scores)){
array_push($use r_scores[$userId],array($testId, $score));
} else {
$user_ratings[$userId] = array();
array_push($use r_scores[$userId],array($testId, $score));
}
}
$users = array_keys($use r_scores);
$num_users = count($users);
}
/**
* Builds the correlation array. For each user, compares other users who have at least one shared test.
* This builds an N choose 2 array with the form [index,user1,use r2,cummulative delta,num common tests,avg delta]
* ...very slow when users > 100
*/
function build_correlati ons(){
global $num_users,$use rs,$correlation _array,$correla tion_array_inde x;
for ($i=0; $i < $num_users; $i++){
$user1 = $users[$i];
for ($k = $i+1; $k < $num_users; $k++){
$user2 = $users[$k];
$user1_correlat ion = correlate_users ($user1,$user2) ;
if($user1_corre lation!=null){
$correlation_ar ray[$correlation_ar ray_index] = $user1_correlat ion;
$correlation_ar ray_index++;
}
}
}
}
/**
* Calculates the correlation between 2 users
*/
function correlate_users ($user1,$user2) {
global $correlation_ar ray,$user_score s;
$user1_scores= $user_scores[$user1];
$user2_scores= $user_scores[$user2];
$total_delta = 0;
$num_common_tes ts = 0;
foreach ($user1_scores as &$value1){
$test1 = $value1[0];
$user1_score = $value1[1];
foreach ($user2_scores as &$value2){
$test2 = $value2[0];
if ($test1 == $test2){ //shared test, we can calculate a correlation
$user2_score = $value2[1];
$delta = abs($user2_scor e - $user1_score);
$total_delta += $delta;
$num_common_tes ts++;
}
}
unset($value2);
}
unset($value1);
if($num_common_ tests > 0){ //add to correlations array
$avg_delta = $total_delta/$num_common_tes ts;
return array($user1,$u ser2,$total_del ta,$num_common_ tests,$avg_delt a);
}
return null;
}
/**
* Saves contents of correlation array to the database
*/
function save_correlatio ns(){
global $con,$correlati on_array;
for($i=0; $i < count($correlat ion_array); $i++){
$sql = "insert into correlations (user1, user2, total_delta, num_common_test s, avg_delta) values (". $correlation_ar ray[$i][0] .",". $correlation_ar ray[$i][1] .",". $correlation_ar ray[$i][2] .",". $correlation_ar ray[$i][3] .",". $correlation_ar ray[$i][4] .")";
$rs = mysql_query($sq l) or die(mysql_error ()."<br>".$sql) ;
}
print("Correlat ions saved<br>\n");
}
[/PHP]