%% last modified 190609 by manu clear all set(0, 'DefaultTextInterpreter', 'none'); %% List Files Correlation_CuttOff = 0.98; % % example Backbone %wtPKA-C human % FolderName = ; Max_Min_Distance = [0.04 1]; ChemicalShiftScaling = [0.2 1]; %% [ f1 f2] f1-> 15N/13C =0.2/0.3 and f2 f2 -> 1H = 1 ReferenceResidue = [262]; %% % ReferenceResidue = [311]; %% %% op.PlotStar = 0; ReferenceState = 1; %% ReferenceAsInConcise_Paper = 0; Assignment_Column = 1; w1_Column = 2; w2_Column = 3; Interactive_Type = 1; Calculate_ChemicalShiftScaling = 0; % Max_Min_Distance = [0.05 1]; ChemicalShiftScaling = [1 0]; % [0.2 1] %% in order f1 (15N = 0.2, 13C = ) and f2 (1H = 1) %% fnames = dir([FolderName '\*.list']); for FileNo = 1:length(fnames) FileName = fnames((FileNo)).name; TotalColumns = 6;%GetNum_Clmns([FolderName '\' FileName]); fid = fopen([FolderName '\' FileName],'rt'); indata = textscan(fid, [' %s' repmat(' %f', 1, TotalColumns-1)] , 'HeaderLines',2); % indata = textscan(fid, ['\t\t %s\t' repmat('%f\t\t', 1, TotalColumns-1)] , 'HeaderLines',2); %s %f 8.522 254 23.3 32.3 fclose(fid); s(FileNo).FileName = FileName; try [OrderNo, Field_MHz] = Get_OrderNo_Field_FromName_1(FileName); s(FileNo).OrderNo = OrderNo; s(FileNo).Field_MHz = Field_MHz; end % resno_Array = []; for k = 1:length(indata{1}) resno = ExtractNumFromString_1(indata{Assignment_Column}{k}); resno = resno(1); % resno s(FileNo).resno(k) = resno; % ******************************* if isempty(find( resno_Array == resno, 1 )) s(FileNo).x(resno) = indata{w2_Column}(k); s(FileNo).y(resno) = indata{w1_Column}(k); s(FileNo).resname{resno} = indata{Assignment_Column}{k}; resno_Array = [resno_Array resno]; else fprintf('repeated res no : %s ::: %d\n', FileName, resno); s_(FileNo).x(resno) = indata{w2_Column}(k); s_(FileNo).y(resno) = indata{w1_Column}(k); s_(FileNo).resname{resno} = indata{Assignment_Column}{k}; end end end %% to find the scaling factor if exist('Calculate_ChemicalShiftScaling') && (Calculate_ChemicalShiftScaling || ~exist('ChemicalShiftScaling', 'var')) allx = nonzeros([s.x]);ally = nonzeros([s.y]); width_x = max(allx) - min(allx); width_y = max(ally) - min(ally); ChemicalShiftScaling = [width_x/width_y 1]; fprintf('Calculated the chemical Shift Scaling : %1.2f %1.2f\n', ChemicalShiftScaling); end % in case of multiple probes in same residue, pick the one shows maximum variance if exist('s_', 'var') xx_ = reshape([s_.x], length(s_(1).x), length(s)); yy_ = reshape([s_.y], length(s_(1).y), length(s)); xx_(xx_ == 0) = nan;yy_(yy_ == 0) = nan; xxyy_cond = sum(xx_,2) + sum(yy_,2); xxyy_cond(isnan(xxyy_cond)) = 0; xxyy_cond((xxyy_cond~=0)) = 1; ccs_s_ = ChemicalShiftScaling(2)*xx_ + ChemicalShiftScaling(1)*yy_; std_s_ = std(ccs_s_'); xx = reshape([s.x], length(s(1).x), length(s)); yy = reshape([s.y], length(s(1).y), length(s)); ccs_s = ChemicalShiftScaling(2)*xx + ChemicalShiftScaling(1)*yy ; std_s = std(ccs_s'); for k = 1:length(xxyy_cond) if xxyy_cond(k) if std_s_(k) > std_s(k) %% now copy probe with maximum variance to s for k_f = 1:length(s) s(k_f).x(k) = s_(k_f).x(k); s(k_f).y(k) = s_(k_f).y(k); s(k_f).resname{k} = s_(k_f).resname{k}; end end end end end %% CommonResidues = s(1).resno; for k = 2:length(s) CommonResidues = intersect(CommonResidues, s(k).resno ); end nC = length(CommonResidues); %% Spectral Referencing if ReferenceAsInConcise_Paper for k = 1:length(s) clear temp1 ReferenceState = 1; dcs = linspace(-1,1,5000)*5; for k_cs = 1:length(dcs) tempx(k_cs) = nanmean(abs(s(ReferenceState).x - s(k).x + dcs(k_cs))); tempy(k_cs) = nanmean(abs(s(ReferenceState).y - s(k).y + dcs(k_cs))); end [~, mnxLoc]= min(tempx);[~, mnyLoc]= min(tempy); ddx = dcs(mnxLoc); ddy = dcs(mnyLoc); % plot(dcs, tempx); hold on; plot(dcs, tempy); s(k).x = -ddx + s(k).x; s(k).y = -ddy + s(k).y; end else % REFERENCING for residue for k = 1:length(s) s(k).x = (mean(s(ReferenceState).x(ReferenceResidue)) - mean(s(k).x(ReferenceResidue))) + s(k).x; s(k).y = (mean(s(ReferenceState).y(ReferenceResidue)) - mean(s(k).y(ReferenceResidue))) + s(k).y; end end %% Filter with Linewidth SelectedIds = []; for k_res=1:nC Current_resino = CommonResidues(k_res); xArray = [];yArray = [];dxArray = [];dyArray = []; for FileNo = 1:length(fnames) xArray = [xArray s(FileNo).x(Current_resino)]; yArray = [yArray s(FileNo).y(Current_resino)]; end distance_ = []; for k_d1 = 1:length(xArray) for k_d2 = (k_d1+1):length(xArray) distance_ = [distance_ ( ( ChemicalShiftScaling(2)*(xArray(k_d1)-xArray(k_d2)) )^2 + ( ChemicalShiftScaling(1)*(yArray(k_d1) - yArray(k_d2)) )^2 )^0.5]; end end %% in the case of grouped few and one far : excude %% more than 2 groups should be far enough to qualify. (just two groups far away always give high correlation) dist_cond = ((distance_) > Max_Min_Distance(1)); if (length(dist_cond(dist_cond))> (length(xArray))-1) && max(distance_) < Max_Min_Distance(2) SelectedIds = [SelectedIds Current_resino]; end % % % % if min(distance_) > Max_Min_Distance(1) && max(distance_) < Max_Min_Distance(2) % % % % SelectedIds = [SelectedIds Current_resino]; % % % % end end %% Non_Slected_Residues = setdiff(1:max(SelectedIds), SelectedIds); %% NObs = length(SelectedIds); % disp(SelectedIds') for k_states = 1:length(s) %% keep only selected ids vals. Non selected should be nan temp1 = nan(1,length(s(k_states).x)); temp1(SelectedIds) = s(k_states).x(SelectedIds); s(k_states).x = temp1; temp1 = nan(1,length(s(k_states).y)); temp1(SelectedIds) = s(k_states).y(SelectedIds); s(k_states).y = temp1; end %% STAR plot %% if op.PlotStar figure for k = 1:length(SelectedIds) x =s(1).x(SelectedIds(k)); y= s(1).y(SelectedIds(k)); for ks = 2:length(s) x = [x s(ks).x(SelectedIds(k)) ]; y = [y s(ks).y(SelectedIds(k)) ]; end clr = rand(1,3); plot(x, y,'*-','color',clr); hold on set(gca,'xdir','reverse','ydir','reverse') text(x(1),y(1), num2str( SelectedIds(k))); hold on [temp_pth, temp_name1,~] = fileparts(FolderName); [~,temp_name2,~] = fileparts(temp_pth); title([temp_name2 '__' temp_name1]); xlabel('1H chemical shift (ppm)'); ylabel('15N chemical shift (ppm)'); % xlim([0.548 0.959]) % ylim([20.43 25.20]) end end %% FOR CHESCA %% N = max([s.resno]); Dcs = nan(N, length(s)); for k = 1:length(s) Dcs(SelectedIds,k) = ChemicalShiftScaling(2)*s(k).x(SelectedIds) + ChemicalShiftScaling(1)*s(k).y(SelectedIds); end % Dcs = Dcs - repmat(Dcs(:,end), 1,size(Dcs,2)); % Dcs = Dcs(:,1:(end-1)); %% % for k1 = 1:size(Dcs,1) % for k2 = 1:size(Dcs,1) % tempCorr = corrcoef(Dcs(k1,:), Dcs(k2,:)); % % tempCorr = cov(Dcs(k1,:), Dcs(k2,:)); % CorrMat(k1, k2) = tempCorr(2); % end % end CorrMat = corrcoef(Dcs'); %% CorrMat = abs(CorrMat); %% all assigned residues must have diagonal elements %% for k = 1:nC CorrMat(CommonResidues(k),CommonResidues(k)) = 1; end %% chesca.CorrMat = (CorrMat); chesca.NObs = NObs; %% CorrMat(isnan(CorrMat)) = 0; chesca.CorrScore = (sum(CorrMat(:)) - trace(CorrMat) ) / (NObs*(NObs-1)); %% Writting CorrMat pwd1 = pwd; [~,FileName_temp,~] = fileparts(FolderName); OutFolder = [pwd filesep 'CorrMat_out' filesep]; if ~exist(OutFolder, 'dir'); mkdir(OutFolder); end OutFileName = [ datestr(now,'yymmdd') '_' FileName_temp ]; cd(OutFolder) pwdLoc = pwd; File_FullPath = [pwd filesep OutFileName]; % dlmwrite(File_FullPath, CorrMat) s(1).CorrMat = CorrMat; save(File_FullPath,'s') cd(pwd1) %% disp('CorrMat File is written @ ') disp([File_FullPath '.mat']) %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% plot the CorrMat %% FigChesca = figure; op.MinCorrelation = Correlation_CuttOff; op.colors = 'white_purple'; % 'white_red' or 'blue_yellow_red''blue_cyan_green_yellow_red' % op.colors = 'blue_cyan_green_yellow_red'; % 'white_red' or 'blue_yellow_red' op.Figure = FigChesca; Plot_CorrMat_2(s(1).CorrMat, Interactive_Type, op ) [temp_pth, temp_name1,~] = fileparts(FolderName); [~,temp_name2,~] = fileparts(temp_pth); title([temp_name2 '__' temp_name1 '_' num2str(ChemicalShiftScaling) ]); dcm_obj = datacursormode(FigChesca); set(dcm_obj, 'SnapToDataVertex','off','Enable','on') cntr = 0; % fig1 = figure; figure(FigChesca); for k_9 = 1:length(s); point_lbl{k_9} = sprintf(' %d', k_9); end while 1 try figure(FigChesca) if Interactive_Type waitforbuttonpress c_info = getCursorInfo(dcm_obj); end if ~isempty(c_info) % Make selected line wider set(c_info.Target,'LineWidth',5); Dcs2plot = Dcs(c_info.Position,:); if isnan(Dcs2plot(1) ) disp('No correlation for this residue'); else switch Interactive_Type case 1 figure(FigChesca);subplot(3, 7, 7*cntr + [6 7]); case 2 figure(FigChesca);subplot(3, 10, 10*cntr + [7 8]); end x2plot = Dcs2plot(1,:)'; y2plot = Dcs2plot(2,:)'; % linear regression (https://www.mathworks.com/help/matlab/data_analysis/linear-regression.html) x2plot_new = [ones(length(x2plot),1) x2plot]; b1 = x2plot_new\y2plot; yCalc = x2plot_new*b1; figure(FigChesca); plt1 = plot(x2plot, yCalc); hold on plot(x2plot, y2plot, '*'); text(x2plot, y2plot, point_lbl, 'color', 'k' ); hold off title(sprintf( '%s vs %s', s(1).resname{ c_info.Position(1)}, s(1).resname{ c_info.Position(2)} )); % legend(plt1, sprintf( '%1.3f', CorrMat(c_info.Position(1), c_info.Position(2)))) xlim( (max(x2plot)-min(x2plot))*[-0.2 0.2] + [min(x2plot) max(x2plot)]); ylim( (max(y2plot)-min(y2plot))*[-0.3 0.3] + [min(y2plot) max(y2plot)]); if Interactive_Type == 2 plot_chemicalShift(FigChesca, cntr, s, c_info ); end cntr = mod(cntr +1, 3); end end catch ME break; end end % rethrow(ME) %% FUNCTIONS function [OrderNo, Field_MHz] = Get_OrderNo_Field_FromName_1(Name_string) % a ='12_850_APO_WT.list'; output is 12 and 850 pos_ = find(Name_string =='_'); pos_ = pos_-1; OrderNo = str2num( Name_string(1:pos_(1)) ); Field_MHz = str2num( Name_string((pos_(1)+2):pos_(2)) ); end function plot_chemicalShift(FigChesca, cntr, s, c_info ) %% figure(FigChesca); subplot(3, 10, 10*cntr + [9 10]); ax1 = gca; % current axes cla(ax1) for k_9 = 1:length(s); point_lbl{k_9} = sprintf('* %d', k_9); end % figure;% arrayfun(@cla,findall(fig1,'type','axes')) for k = 1:length(s) cs_x(:, k) = [ s(k).x(c_info.Position(1)); s(k).x(c_info.Position(2))] ; cs_y(:, k) = [ s(k).y(c_info.Position(1)); s(k).y(c_info.Position(2))] ; end % % plot(cs_x(1, :), cs_y(1, :), '-','Color','r'); line(cs_x(1, :), cs_y(1, :),'Color','r'); % pbaspect([1 0.2 1]); ax1.XColor = 'r'; ax1.YColor = 'r'; text(cs_x(1, :), cs_y(1, :), point_lbl ,'Color','r') xwidth = max( (max(cs_x') - min(cs_x')) ); ywidth = max( (max(cs_y') - min(cs_y')) ); xlim( mean(cs_x(1, :)) + [-xwidth xwidth]); ylim( mean(cs_y(1, :)) + [-ywidth ywidth]); ax1_pos = ax1.Position; % position of first axes ax2 = axes('Position',ax1_pos, 'XAxisLocation','top','YAxisLocation','right', 'Color','none' );%set(gca, 'XDir','reverse')'YDir','reverse' line(cs_x(2, :), cs_y(2, :),'Parent',ax2,'Color','k'); % pbaspect([1 0.2 1]); text(cs_x(2, :), cs_y(2, :), point_lbl ,'Color','k') xlim( mean(cs_x(2, :)) + [-xwidth xwidth]); ylim( mean(cs_y(2, :)) + [-ywidth ywidth]); end function numCols = GetNum_Clmns(FileName) fid = fopen(FileName,'rt'); fgets(fid); fgets(fid); tLines = fgets(fid); idx_temp = find(tLines==' ');numCols = length(find(diff(idx_temp) > 1)) ; fclose(fid); end function Plot_CorrMat_2(CM, Interactive_Type, varargin ) if ~isempty(varargin) op = varargin{1}; else op.colors = 'blue_cyan_green_yellow_red'; op.MinCorrelation = 0.95; end switch Interactive_Type case 1 subplot(3, 7, [(0+(1:5)) (7+(1:5)) (14+(1:5))]) case 2 subplot(3, 10, [(0+(1:6)) (10+(1:6)) (20+(1:6))]) end [~, CM_Sorted_i ]= sort(CM(:), 'ascend'); %% set(op.Figure,'color','white') Z = CM; N = length(Z); for k = 1:(N*N) ki = CM_Sorted_i(k); if abs(Z(ki)) > op.MinCorrelation color_meter = (Z(ki)- op.MinCorrelation)/(1- op.MinCorrelation); clr_rgb = gen_rgb_color_1(color_meter, op.colors); [k1, k2] = ind2sub(size(Z), ki); plot(k1,k2,'o', 'MarkerFaceColor', clr_rgb, 'MarkerEdgeColor', clr_rgb, 'MarkerSize', 2 ); hold on end end %% set(gca, 'xlim',[0 N] + N*[-0.025 0.1]/4) set(gca, 'ylim',[0 N] + N*[-0.025 0.1]/4) ytickangle(90) xlabel('Residue Number');ylabel('Residue Number'); % set(gca, 'position', [0.1300 0.1100 0.7750 0.745]) end function clr_rgb = gen_rgb_color_1(color_meter, clr_scheme) %% Available colorschemes % 1. blue_yellow_red % 2. blue_green_red % 3. white_red % 4. blue_cyan_green_yellow_red % 5. green_yellow_red %% switch clr_scheme case 'blue_yellow_red' if color_meter < 0.5 clr_rgb = [2*color_meter, 2*color_meter, 1-2*color_meter]; else clr_rgb = [1, 1-2*(color_meter-0.5), 0]; end case 'blue_green_red' if color_meter < 0.5 clr_rgb = [0, 2*color_meter, 1-2*color_meter]; else clr_rgb = [2*(color_meter-0.5), 1-2*(color_meter-0.5), 0]; end case 'blue_cyan_green_yellow_red' if color_meter < 0.25 clr_rgb = [0, 4*color_meter, 1]; elseif color_meter < 0.5 clr_rgb = [0, 1, 1-4*(color_meter - 0.25)]; elseif color_meter < 0.75 clr_rgb = [4*(color_meter - 0.5), 1, 0]; else clr_rgb = [1, 1-4*(color_meter - 0.75), 0]; end case 'white_red' clr_rgb = [1, 1-color_meter, 1-color_meter]; case 'green_yellow_red' if color_meter < 0.5 clr_rgb = [2*color_meter, 1, 0]; else clr_rgb = [1, 1-2*(color_meter-0.5), 0]; end case 'white_purple' clr_rgb = [1-color_meter*(1-00), 1-color_meter*(1-0), 1-color_meter*(1-1)]; otherwise clr_rgb = [0, 0, 1]; end end function n = ExtractNumFromString_1(str) tn= regexp(str,'\s+\.?\d*|-\d+\.?\d*|\.?\d+|-\.?\d+','match'); for k = 1:length(tn) tn_(k) = str2double(tn{k}); end n = tn_; end