Extract href link item from the html source by using matlab.
The result is that the link is saved in a txt file.
%%
% Search for number of string matches per line.
% replace "href" tag by "ctrl H + manual line break".. in doc ..
% makes life easy
home
clc
filename = 'textsrc.txt';
literal = ' <a href="';
fid = fopen(filename, 'rt');
bbase = 'dsave2'
fid_sh = fopen([bbase '.txt'],'w');
y = 0;
jj = 1;
while feof(fid) == 0
tline = fgetl(fid);
matches = findstr(tline, literal);
num = length(matches);
if num > 0
y = y + num;
% fprintf('%s\n',tline);
fprintf(fid_sh, '%s \n', tline);
end
jj = jj+1;
end
fclose(fid);
fclose(fid_sh)
% this file generates the output on the screen which needs to be copied and
% saved as
% dsave2.txt
% Which will be further operated by refinestr.m
data:image/s3,"s3://crabby-images/63a86/63a8624a3d67282b512f7dcbba40edeb6b754e3c" alt="MATLAB for Engineers (2nd Edition)"
data:image/s3,"s3://crabby-images/dccf4/dccf4f0f3b4ffc433b1f0362b9dae0bc1e59d9d3" alt=""
data:image/s3,"s3://crabby-images/93305/933056e59d25a54bb585121241461dda3b4ab688" alt="MATLAB Primer, Eighth Edition"
data:image/s3,"s3://crabby-images/6373c/6373c7941a9303d8f3befe7264be5777142a906e" alt=""
data:image/s3,"s3://crabby-images/1a76a/1a76acbf5deb3d407d9f4218a564679a816a6779" alt="Digital Signal Processing Using MATLAB"
data:image/s3,"s3://crabby-images/d2ab1/d2ab1f47f8d7afc1047a8a17bbe8928f478070ed" alt=""
The result is that the link is saved in a txt file.
%%
% Search for number of string matches per line.
% replace "href" tag by "ctrl H + manual line break".. in doc ..
% makes life easy
home
clc
filename = 'textsrc.txt';
literal = ' <a href="';
fid = fopen(filename, 'rt');
bbase = 'dsave2'
fid_sh = fopen([bbase '.txt'],'w');
y = 0;
jj = 1;
while feof(fid) == 0
tline = fgetl(fid);
matches = findstr(tline, literal);
num = length(matches);
if num > 0
y = y + num;
% fprintf('%s\n',tline);
fprintf(fid_sh, '%s \n', tline);
end
jj = jj+1;
end
fclose(fid);
fclose(fid_sh)
% this file generates the output on the screen which needs to be copied and
% saved as
% dsave2.txt
% Which will be further operated by refinestr.m
No comments:
Post a Comment